Skip to content

Commit

Permalink
build: new setup
Browse files Browse the repository at this point in the history
  • Loading branch information
Szymon Szyszkowski committed Jan 23, 2025
1 parent f1ff1f9 commit 98d464d
Show file tree
Hide file tree
Showing 10 changed files with 129 additions and 81 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/artifact.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ env:
REGION: europe-west1
GAR_LOCATION: europe-west1-docker.pkg.dev/open-targets-genetics-dev
REPOSITORY: gentropy-app
PYTHON_VERSION_DEFAULT: "3.12.7"
PYTHON_VERSION_DEFAULT: "3.11.11"

jobs:
build-push-artifact:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ name: Checks
pull_request:

env:
PYTHON_VERSION_DEFAULT: "3.12.7"
PYTHON_VERSION_DEFAULT: "3.11.11"

jobs:
test:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ name: Release

"on":
push:
branches: ["main", "release/**"]
branches: ["main", "release/**", "dev"]

concurrency:
group: deploy
cancel-in-progress: false # prevent hickups with semantic-release

env:
PYTHON_VERSION_DEFAULT: "3.12.7"
PYTHON_VERSION_DEFAULT: "3.11.11"

jobs:
release:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
default_language_version:
python: python3.12.7
python: python3.11.11
ci:
autoupdate_commit_msg: "chore: pre-commit autoupdate"
autofix_commit_msg: "chore: pre-commit auto fixes [...]"
Expand Down
28 changes: 14 additions & 14 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ else
endif

CLEAN_PACKAGE_VERSION := $(shell echo "$(PACKAGE_VERSION)" | tr -cd '[:alnum:]')
BUCKET_NAME=gs://genetics_etl_python_playground/initialisation/${APP_NAME}/${REF}
BUCKET_NAME=gs://genetics_etl_python_playground/initialisation

.PHONY: $(shell sed -n -e '/^$$/ { n ; /^[^ .\#][^ ]*:/ { s/:.*$$// ; p ; } ; }' $(MAKEFILE_LIST))

Expand Down Expand Up @@ -43,24 +43,30 @@ build-documentation: ## Create local server with documentation
@echo "Building Documentation..."
@uv run mkdocs serve

create-dev-cluster: build ## Spin up a simple dataproc cluster with all dependencies for development purposes
sync-cluster-init-script: ## Synchronise the cluster inicialisation actions script to google cloud
@echo "Synching install_dependencies_on_cluster.sh to $(BUCKET_NAME)"
@gcloud storage cp utils/install_dependencies_on_cluster.sh $(BUCKET_NAME)/install_dependencies_on_cluster.sh

create-dev-cluster: sync-cluster-init-script## Spin up a simple dataproc cluster with all dependencies for development purposes
@echo "Making sure the branch is in sync with remote, so cluster can install gentropy dev version..."
@./utils/clean_status.sh || (echo "ERROR: Commit and push or stash local changes, to have up to date cluster"; exit 1)
@echo "Creating Dataproc Dev Cluster"
@gcloud config set project ${PROJECT_ID}
@gcloud dataproc clusters create "ot-genetics-dev-${CLEAN_PACKAGE_VERSION}-$(USER)" \
gcloud config set project ${PROJECT_ID}
gcloud dataproc clusters create "ot-genetics-dev-${CLEAN_PACKAGE_VERSION}-$(USER)" \
--image-version 2.2 \
--region ${REGION} \
--master-machine-type n1-standard-16 \
--initialization-actions=$(BUCKET_NAME)/install_dependencies_on_cluster.sh \
--metadata="PACKAGE=$(BUCKET_NAME)/${APP_NAME}-${PACKAGE_VERSION}-py3-none-any.whl" \
--master-machine-type n1-standard-2 \
--metadata="GENTROPY_REF=${REF}" \
--secondary-worker-type spot \
--worker-machine-type n1-standard-4 \
--public-ip-address \
--worker-boot-disk-size 500 \
--autoscaling-policy="projects/${PROJECT_ID}/regions/${REGION}/autoscalingPolicies/otg-etl" \
--optional-components=JUPYTER \
--enable-component-gateway \
--max-idle=60m

make update-dev-cluster: build ## Reinstalls the package on the dev-cluster
update-dev-cluster: build ## Reinstalls the package on the dev-cluster
@echo "Updating Dataproc Dev Cluster"
@gcloud config set project ${PROJECT_ID}
gcloud dataproc jobs submit pig --cluster="ot-genetics-dev-${CLEAN_PACKAGE_VERSION}" \
Expand All @@ -69,10 +75,4 @@ make update-dev-cluster: build ## Reinstalls the package on the dev-cluster
-e='sh chmod 750 $${PWD}/install_dependencies_on_cluster.sh; sh $${PWD}/install_dependencies_on_cluster.sh'

build: clean ## Build Python package with dependencies
@gcloud config set project ${PROJECT_ID}
@echo "Packaging Code and Dependencies for ${APP_NAME}-${PACKAGE_VERSION}"
@uv build
@echo "Uploading to ${BUCKET_NAME}"
@gsutil cp src/${APP_NAME}/cli.py ${BUCKET_NAME}/
@gsutil cp ./dist/${APP_NAME}-${PACKAGE_VERSION}-py3-none-any.whl ${BUCKET_NAME}/
@gsutil cp ./utils/install_dependencies_on_cluster.sh ${BUCKET_NAME}/
33 changes: 19 additions & 14 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,20 @@ requires-python = ">=3.10, <3.13"
dependencies = [
"pyspark (>=3.5.0, <3.6)",
"hail (>=0.2.133, <0.3.0)",
"scipy (>=1.11.4)",
"hydra-core (>=1.3.2)",
"pyliftover (>=0.4.1)",
"numpy (>=1.26.4)",
"wandb (>=0.19.4)",
"omegaconf (>=2.3.0)",
"typing-extensions (>=4.12.2)",
"scikit-learn (>=1.6.1)",
"pandas[gcp,parquet] (>=2.2.3)",
"skops (>=0.11.0)",
"shap (>=0.46)",
"matplotlib (>=3.10.0)",
"google-cloud-secret-manager (>=2.12.6)",
"google-cloud-storage (>=2.14.0)"
"scipy (>=1.11.4, <1.12.0)",
"hydra-core (>=1.3.2, <1.4.0)",
"pyliftover (>=0.4.1, <0.5.0)",
"numpy (>=1.26.4, <1.27.0)",
"wandb (>=0.19.4, <0.20.0)",
"omegaconf (>=2.3.0, <2.4.0)",
"typing-extensions (>=4.12.2, <4.13.0)",
"scikit-learn (>=1.6.1, <1.7.0)",
"pandas[gcp,parquet] (>=2.2.3, <2.3.0)",
"skops (>=0.11.0, <0.12.0)",
"shap (>=0.46, <0.47)",
"matplotlib (>=3.10.0, <3.11.0)",
"google-cloud-secret-manager (>=2.12.6, <2.13.0)",
"google-cloud-storage (>=2.14.0, <2.15.0)"
]
classifiers = [
"Programming Language :: Python :: 3.10",
Expand Down Expand Up @@ -113,6 +113,11 @@ match = "(build|chore|ci|docs|feat|fix|perf|style|refactor|test)"
prerelease = true
prerelease_token = "alpha"

[tool.semantic_release.branches."dev"]
match = "dev"
prerelease = true
prerelease_token = "dev"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
Expand Down
9 changes: 9 additions & 0 deletions utils/clean_status.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env bash

echo "Fetching version changes..."
git fetch
if output=$(git status --porcelain) && [ -z "$output" ]; then
exit 0
else
exit 1
fi
2 changes: 1 addition & 1 deletion utils/install_dependencies.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
export SHELL_RC=$(echo "$HOME/.${SHELL##*/}rc")
readonly PYTHON_VERSION=$(cat .python-version >&/dev/null || echo "3.12.7")
readonly PYTHON_VERSION=$(cat .python-version >&/dev/null || echo "3.11.11")

if ! command -v uv &>/dev/null; then
echo "uv was not found, installing uv..."
Expand Down
35 changes: 16 additions & 19 deletions utils/install_dependencies_on_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@

set -exo pipefail

readonly PACKAGE=$(/usr/share/google/get_metadata_value attributes/PACKAGE || true)

readonly GENTROPY_REF=$(/usr/share/google/get_metadata_value attributes/GENTROPY_REF || true)
readonly REPO_URI="https://github.com/opentargets/gentropy"
function err() {
echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $*" >&2
exit 1
}

function run_with_retry() {
local -r cmd=("$@")
for ((i = 0; i < 10; i++)); do
for ((i = 0; i < 3; i++)); do
if "${cmd[@]}"; then
return 0
fi
Expand All @@ -37,33 +37,30 @@ function install_pip() {
run_with_retry apt install python-pip -y
}


function install_uv() (
if !command -v uv >/dev/null; then
echo "Installing UV"
pip install uv
fi
return 0
)

function main() {
# Define a specific directory to download the files
local work_dir="/"
cd "${work_dir}" || err "Failed to change to working directory"
echo "Working directory: $(pwd)"

# more meaningful errors from hydra
echo "export HYDRA_FULL_ERROR=1" | tee --append /etc/profile
source /etc/profile

if [[ -z "${PACKAGE}" ]]; then
echo "ERROR: Must specify PACKAGE metadata key"
if [[ -z "${GENTROPY_REF}" ]]; then
echo "ERROR: Must specify GENTROPY_REF metadata key"
exit 1
fi
install_pip
install_uv

echo "Downloading package..."
gsutil cp ${PACKAGE} . || err "Failed to download PACKAGE"
PACKAGENAME=$(basename ${PACKAGE})

echo "Uninstalling previous version if it exists"
pip uninstall -y gentropy
echo "Install package..."
# NOTE: ensure the gentropy is reinstalled each time without version cache
# see https://pip.pypa.io/en/stable/cli/pip_install/#cmdoption-force-reinstall
run_with_retry pip install --force-reinstall --ignore-installed ${PACKAGENAME}

run_with_retry uv pip install --no-break-system-packages --system "gentropy @ git+${REPO}.git@${GENTROPY_REF}"
}

main
93 changes: 65 additions & 28 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 98d464d

Please sign in to comment.