diff --git a/.github/workflows/artifact.yml b/.github/workflows/artifact.yml index 5f09f3c7f..b2dc0cc73 100644 --- a/.github/workflows/artifact.yml +++ b/.github/workflows/artifact.yml @@ -12,7 +12,7 @@ env: REGION: europe-west1 GAR_LOCATION: europe-west1-docker.pkg.dev/open-targets-genetics-dev REPOSITORY: gentropy-app - PYTHON_VERSION_DEFAULT: "3.12.7" + PYTHON_VERSION_DEFAULT: "3.11.11" jobs: build-push-artifact: diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 54b1b954f..abf464785 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -4,7 +4,7 @@ name: Checks pull_request: env: - PYTHON_VERSION_DEFAULT: "3.12.7" + PYTHON_VERSION_DEFAULT: "3.11.11" jobs: test: diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index ddc39caf8..961f0bef5 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -2,14 +2,14 @@ name: Release "on": push: - branches: ["main", "release/**"] + branches: ["main", "release/**", "dev"] concurrency: group: deploy cancel-in-progress: false # prevent hickups with semantic-release env: - PYTHON_VERSION_DEFAULT: "3.12.7" + PYTHON_VERSION_DEFAULT: "3.11.11" jobs: release: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bcf248b85..41b914d05 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ default_language_version: - python: python3.12.7 + python: python3.11.11 ci: autoupdate_commit_msg: "chore: pre-commit autoupdate" autofix_commit_msg: "chore: pre-commit auto fixes [...]" diff --git a/Makefile b/Makefile index f39c6dabe..867007d2c 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ else endif CLEAN_PACKAGE_VERSION := $(shell echo "$(PACKAGE_VERSION)" | tr -cd '[:alnum:]') -BUCKET_NAME=gs://genetics_etl_python_playground/initialisation/${APP_NAME}/${REF} +BUCKET_NAME=gs://genetics_etl_python_playground/initialisation .PHONY: $(shell sed -n -e '/^$$/ { n ; /^[^ .\#][^ ]*:/ { s/:.*$$// ; p ; } ; }' $(MAKEFILE_LIST)) @@ -43,24 +43,30 @@ build-documentation: ## Create local server with documentation @echo "Building Documentation..." @uv run mkdocs serve -create-dev-cluster: build ## Spin up a simple dataproc cluster with all dependencies for development purposes +sync-cluster-init-script: ## Synchronise the cluster inicialisation actions script to google cloud + @echo "Synching install_dependencies_on_cluster.sh to $(BUCKET_NAME)" + @gcloud storage cp utils/install_dependencies_on_cluster.sh $(BUCKET_NAME)/install_dependencies_on_cluster.sh + +create-dev-cluster: sync-cluster-init-script## Spin up a simple dataproc cluster with all dependencies for development purposes + @echo "Making sure the branch is in sync with remote, so cluster can install gentropy dev version..." + @./utils/clean_status.sh || (echo "ERROR: Commit and push or stash local changes, to have up to date cluster"; exit 1) @echo "Creating Dataproc Dev Cluster" - @gcloud config set project ${PROJECT_ID} - @gcloud dataproc clusters create "ot-genetics-dev-${CLEAN_PACKAGE_VERSION}-$(USER)" \ + gcloud config set project ${PROJECT_ID} + gcloud dataproc clusters create "ot-genetics-dev-${CLEAN_PACKAGE_VERSION}-$(USER)" \ --image-version 2.2 \ --region ${REGION} \ - --master-machine-type n1-standard-16 \ - --initialization-actions=$(BUCKET_NAME)/install_dependencies_on_cluster.sh \ - --metadata="PACKAGE=$(BUCKET_NAME)/${APP_NAME}-${PACKAGE_VERSION}-py3-none-any.whl" \ + --master-machine-type n1-standard-2 \ + --metadata="GENTROPY_REF=${REF}" \ --secondary-worker-type spot \ --worker-machine-type n1-standard-4 \ + --public-ip-address \ --worker-boot-disk-size 500 \ --autoscaling-policy="projects/${PROJECT_ID}/regions/${REGION}/autoscalingPolicies/otg-etl" \ --optional-components=JUPYTER \ --enable-component-gateway \ --max-idle=60m -make update-dev-cluster: build ## Reinstalls the package on the dev-cluster +update-dev-cluster: build ## Reinstalls the package on the dev-cluster @echo "Updating Dataproc Dev Cluster" @gcloud config set project ${PROJECT_ID} gcloud dataproc jobs submit pig --cluster="ot-genetics-dev-${CLEAN_PACKAGE_VERSION}" \ @@ -69,10 +75,4 @@ make update-dev-cluster: build ## Reinstalls the package on the dev-cluster -e='sh chmod 750 $${PWD}/install_dependencies_on_cluster.sh; sh $${PWD}/install_dependencies_on_cluster.sh' build: clean ## Build Python package with dependencies - @gcloud config set project ${PROJECT_ID} - @echo "Packaging Code and Dependencies for ${APP_NAME}-${PACKAGE_VERSION}" @uv build - @echo "Uploading to ${BUCKET_NAME}" - @gsutil cp src/${APP_NAME}/cli.py ${BUCKET_NAME}/ - @gsutil cp ./dist/${APP_NAME}-${PACKAGE_VERSION}-py3-none-any.whl ${BUCKET_NAME}/ - @gsutil cp ./utils/install_dependencies_on_cluster.sh ${BUCKET_NAME}/ diff --git a/pyproject.toml b/pyproject.toml index af7d0511a..4f2fafaee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,20 +13,20 @@ requires-python = ">=3.10, <3.13" dependencies = [ "pyspark (>=3.5.0, <3.6)", "hail (>=0.2.133, <0.3.0)", - "scipy (>=1.11.4)", - "hydra-core (>=1.3.2)", - "pyliftover (>=0.4.1)", - "numpy (>=1.26.4)", - "wandb (>=0.19.4)", - "omegaconf (>=2.3.0)", - "typing-extensions (>=4.12.2)", - "scikit-learn (>=1.6.1)", - "pandas[gcp,parquet] (>=2.2.3)", - "skops (>=0.11.0)", - "shap (>=0.46)", - "matplotlib (>=3.10.0)", - "google-cloud-secret-manager (>=2.12.6)", - "google-cloud-storage (>=2.14.0)" + "scipy (>=1.11.4, <1.12.0)", + "hydra-core (>=1.3.2, <1.4.0)", + "pyliftover (>=0.4.1, <0.5.0)", + "numpy (>=1.26.4, <1.27.0)", + "wandb (>=0.19.4, <0.20.0)", + "omegaconf (>=2.3.0, <2.4.0)", + "typing-extensions (>=4.12.2, <4.13.0)", + "scikit-learn (>=1.6.1, <1.7.0)", + "pandas[gcp,parquet] (>=2.2.3, <2.3.0)", + "skops (>=0.11.0, <0.12.0)", + "shap (>=0.46, <0.47)", + "matplotlib (>=3.10.0, <3.11.0)", + "google-cloud-secret-manager (>=2.12.6, <2.13.0)", + "google-cloud-storage (>=2.14.0, <2.15.0)" ] classifiers = [ "Programming Language :: Python :: 3.10", @@ -113,6 +113,11 @@ match = "(build|chore|ci|docs|feat|fix|perf|style|refactor|test)" prerelease = true prerelease_token = "alpha" +[tool.semantic_release.branches."dev"] +match = "dev" +prerelease = true +prerelease_token = "dev" + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/utils/clean_status.sh b/utils/clean_status.sh new file mode 100755 index 000000000..80ce570e9 --- /dev/null +++ b/utils/clean_status.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +echo "Fetching version changes..." +git fetch +if output=$(git status --porcelain) && [ -z "$output" ]; then + exit 0 +else + exit 1 +fi diff --git a/utils/install_dependencies.sh b/utils/install_dependencies.sh index f4140cfe0..4ccd491ba 100644 --- a/utils/install_dependencies.sh +++ b/utils/install_dependencies.sh @@ -1,5 +1,5 @@ export SHELL_RC=$(echo "$HOME/.${SHELL##*/}rc") -readonly PYTHON_VERSION=$(cat .python-version >&/dev/null || echo "3.12.7") +readonly PYTHON_VERSION=$(cat .python-version >&/dev/null || echo "3.11.11") if ! command -v uv &>/dev/null; then echo "uv was not found, installing uv..." diff --git a/utils/install_dependencies_on_cluster.sh b/utils/install_dependencies_on_cluster.sh index 849dee0c3..e5dd331d6 100644 --- a/utils/install_dependencies_on_cluster.sh +++ b/utils/install_dependencies_on_cluster.sh @@ -2,8 +2,8 @@ set -exo pipefail -readonly PACKAGE=$(/usr/share/google/get_metadata_value attributes/PACKAGE || true) - +readonly GENTROPY_REF=$(/usr/share/google/get_metadata_value attributes/GENTROPY_REF || true) +readonly REPO_URI="https://github.com/opentargets/gentropy" function err() { echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $*" >&2 exit 1 @@ -11,7 +11,7 @@ function err() { function run_with_retry() { local -r cmd=("$@") - for ((i = 0; i < 10; i++)); do + for ((i = 0; i < 3; i++)); do if "${cmd[@]}"; then return 0 fi @@ -37,33 +37,30 @@ function install_pip() { run_with_retry apt install python-pip -y } + +function install_uv() ( + if !command -v uv >/dev/null; then + echo "Installing UV" + pip install uv + fi + return 0 +) + function main() { # Define a specific directory to download the files - local work_dir="/" - cd "${work_dir}" || err "Failed to change to working directory" - echo "Working directory: $(pwd)" - - # more meaningful errors from hydra echo "export HYDRA_FULL_ERROR=1" | tee --append /etc/profile source /etc/profile - if [[ -z "${PACKAGE}" ]]; then - echo "ERROR: Must specify PACKAGE metadata key" + if [[ -z "${GENTROPY_REF}" ]]; then + echo "ERROR: Must specify GENTROPY_REF metadata key" exit 1 fi install_pip + install_uv - echo "Downloading package..." - gsutil cp ${PACKAGE} . || err "Failed to download PACKAGE" - PACKAGENAME=$(basename ${PACKAGE}) - - echo "Uninstalling previous version if it exists" pip uninstall -y gentropy echo "Install package..." - # NOTE: ensure the gentropy is reinstalled each time without version cache - # see https://pip.pypa.io/en/stable/cli/pip_install/#cmdoption-force-reinstall - run_with_retry pip install --force-reinstall --ignore-installed ${PACKAGENAME} - + run_with_retry uv pip install --no-break-system-packages --system "gentropy @ git+${REPO}.git@${GENTROPY_REF}" } main diff --git a/uv.lock b/uv.lock index 026aa34c8..3bf044251 100644 --- a/uv.lock +++ b/uv.lock @@ -778,11 +778,11 @@ wheels = [ [[package]] name = "executing" -version = "2.1.0" +version = "2.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8c/e3/7d45f492c2c4a0e8e0fad57d081a7c8a0286cdd86372b070cca1ec0caa1e/executing-2.1.0.tar.gz", hash = "sha256:8ea27ddd260da8150fa5a708269c4a10e76161e2496ec3e587da9e3c0fe4b9ab", size = 977485 } +sdist = { url = "https://files.pythonhosted.org/packages/91/50/a9d80c47ff289c611ff12e63f7c5d13942c65d68125160cefd768c73e6e4/executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755", size = 978693 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/fd/afcd0496feca3276f509df3dbd5dae726fcc756f1a08d9e25abe1733f962/executing-2.1.0-py2.py3-none-any.whl", hash = "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf", size = 25805 }, + { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 }, ] [[package]] @@ -985,22 +985,22 @@ test = [ [package.metadata] requires-dist = [ - { name = "google-cloud-secret-manager", specifier = ">=2.12.6" }, - { name = "google-cloud-storage", specifier = ">=2.14.0" }, + { name = "google-cloud-secret-manager", specifier = ">=2.12.6,<2.13.0" }, + { name = "google-cloud-storage", specifier = ">=2.14.0,<2.15.0" }, { name = "hail", specifier = ">=0.2.133,<0.3.0" }, - { name = "hydra-core", specifier = ">=1.3.2" }, - { name = "matplotlib", specifier = ">=3.10.0" }, - { name = "numpy", specifier = ">=1.26.4" }, - { name = "omegaconf", specifier = ">=2.3.0" }, - { name = "pandas", extras = ["gcp", "parquet"], specifier = ">=2.2.3" }, - { name = "pyliftover", specifier = ">=0.4.1" }, + { name = "hydra-core", specifier = ">=1.3.2,<1.4.0" }, + { name = "matplotlib", specifier = ">=3.10.0,<3.11.0" }, + { name = "numpy", specifier = ">=1.26.4,<1.27.0" }, + { name = "omegaconf", specifier = ">=2.3.0,<2.4.0" }, + { name = "pandas", extras = ["gcp", "parquet"], specifier = ">=2.2.3,<2.3.0" }, + { name = "pyliftover", specifier = ">=0.4.1,<0.5.0" }, { name = "pyspark", specifier = ">=3.5.0,<3.6" }, - { name = "scikit-learn", specifier = ">=1.6.1" }, - { name = "scipy", specifier = ">=1.11.4" }, - { name = "shap", specifier = ">=0.46" }, - { name = "skops", specifier = ">=0.11.0" }, - { name = "typing-extensions", specifier = ">=4.12.2" }, - { name = "wandb", specifier = ">=0.19.4" }, + { name = "scikit-learn", specifier = ">=1.6.1,<1.7.0" }, + { name = "scipy", specifier = ">=1.11.4,<1.12.0" }, + { name = "shap", specifier = ">=0.46,<0.47" }, + { name = "skops", specifier = ">=0.11.0,<0.12.0" }, + { name = "typing-extensions", specifier = ">=4.12.2,<4.13.0" }, + { name = "wandb", specifier = ">=0.19.4,<0.20.0" }, ] [package.metadata.requires-dev] @@ -1081,11 +1081,17 @@ wheels = [ name = "google-api-core" version = "2.10.2" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12' and sys_platform == 'linux'", + "python_full_version >= '3.12' and sys_platform != 'linux'", + "python_full_version == '3.11.*' and sys_platform == 'linux'", + "python_full_version == '3.11.*' and sys_platform != 'linux'", +] dependencies = [ - { name = "google-auth" }, - { name = "googleapis-common-protos" }, - { name = "protobuf" }, - { name = "requests" }, + { name = "google-auth", marker = "python_full_version >= '3.11'" }, + { name = "googleapis-common-protos", marker = "python_full_version >= '3.11'" }, + { name = "protobuf", marker = "python_full_version >= '3.11'" }, + { name = "requests", marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/fe/d6/5dd223fc5cb476ab62966849f3e6466056799dbfc005d74da800eb097f7d/google-api-core-2.10.2.tar.gz", hash = "sha256:10c06f7739fe57781f87523375e8e1a3a4674bf6392cd6131a3222182b971320", size = 123691 } wheels = [ @@ -1094,8 +1100,34 @@ wheels = [ [package.optional-dependencies] grpc = [ - { name = "grpcio" }, - { name = "grpcio-status" }, + { name = "grpcio", marker = "python_full_version >= '3.11'" }, + { name = "grpcio-status", marker = "python_full_version >= '3.11'" }, +] + +[[package]] +name = "google-api-core" +version = "2.24.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11' and sys_platform == 'linux'", + "python_full_version < '3.11' and sys_platform != 'linux'", +] +dependencies = [ + { name = "google-auth", marker = "python_full_version < '3.11'" }, + { name = "googleapis-common-protos", marker = "python_full_version < '3.11'" }, + { name = "proto-plus", marker = "python_full_version < '3.11'" }, + { name = "protobuf", marker = "python_full_version < '3.11'" }, + { name = "requests", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/56/d70d66ed1b5ab5f6c27bf80ec889585ad8f865ff32acbafd3b2ef0bfb5d0/google_api_core-2.24.0.tar.gz", hash = "sha256:e255640547a597a4da010876d333208ddac417d60add22b6851a0c66a831fcaf", size = 162647 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/76/65b8b94e74bf1b6d1cc38d916089670c4da5029d25762441d8c5c19e51dd/google_api_core-2.24.0-py3-none-any.whl", hash = "sha256:10d82ac0fca69c82a25b3efdeefccf6f28e02ebb97925a8cce8edbfe379929d9", size = 158576 }, +] + +[package.optional-dependencies] +grpc = [ + { name = "grpcio", marker = "python_full_version < '3.11'" }, + { name = "grpcio-status", marker = "python_full_version < '3.11'" }, ] [[package]] @@ -1130,7 +1162,8 @@ name = "google-cloud-bigquery" version = "3.18.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "google-api-core" }, + { name = "google-api-core", version = "2.10.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "google-api-core", version = "2.24.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "google-cloud-core" }, { name = "google-resumable-media" }, { name = "packaging" }, @@ -1147,7 +1180,8 @@ name = "google-cloud-core" version = "2.4.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "google-api-core" }, + { name = "google-api-core", version = "2.10.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "google-api-core", version = "2.24.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "google-auth" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b8/1f/9d1e0ba6919668608570418a9a51e47070ac15aeff64261fb092d8be94c0/google-cloud-core-2.4.1.tar.gz", hash = "sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073", size = 35587 } @@ -1160,7 +1194,8 @@ name = "google-cloud-secret-manager" version = "2.12.6" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "google-api-core", extra = ["grpc"] }, + { name = "google-api-core", version = "2.10.2", source = { registry = "https://pypi.org/simple" }, extra = ["grpc"], marker = "python_full_version >= '3.11'" }, + { name = "google-api-core", version = "2.24.0", source = { registry = "https://pypi.org/simple" }, extra = ["grpc"], marker = "python_full_version < '3.11'" }, { name = "grpc-google-iam-v1" }, { name = "proto-plus" }, { name = "protobuf" }, @@ -1175,7 +1210,8 @@ name = "google-cloud-storage" version = "2.14.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "google-api-core" }, + { name = "google-api-core", version = "2.10.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "google-api-core", version = "2.24.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "google-auth" }, { name = "google-cloud-core" }, { name = "google-crc32c" }, @@ -2410,7 +2446,8 @@ version = "0.26.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "db-dtypes" }, - { name = "google-api-core" }, + { name = "google-api-core", version = "2.10.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "google-api-core", version = "2.24.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "google-auth" }, { name = "google-auth-oauthlib" }, { name = "google-cloud-bigquery" },