-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
36 changed files
with
1,709 additions
and
801 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
--- | ||
name: Build and Push to Artifact Registry | ||
|
||
on: | ||
push: | ||
branches: | ||
- dev | ||
paths: | ||
- images/gentropy/scripts/harmonise-sumstats.sh | ||
- images/gentropy/Dockerfile | ||
|
||
env: | ||
PROJECT_ID: open-targets-genetics-dev | ||
REGION: europe-west1 | ||
GAR_LOCATION: europe-west1-docker.pkg.dev/open-targets-genetics-dev | ||
REPOSITORY: gentropy-app | ||
PYTHON_VERSION_DEFAULT: "3.10.8" | ||
|
||
jobs: | ||
build-push-artifact: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: "Checkout" | ||
uses: "actions/checkout@v3" | ||
|
||
- name: "auth" | ||
uses: "google-github-actions/auth@v2" | ||
with: | ||
credentials_json: "${{ secrets.GC_SERVICE_ACCOUNT_KEY }}" | ||
|
||
- name: "Set up Cloud SDK" | ||
uses: "google-github-actions/setup-gcloud@v2" | ||
|
||
- name: "Use gcloud CLI" | ||
run: "gcloud info" | ||
|
||
- name: Set up QEMU | ||
uses: docker/setup-qemu-action@v3 | ||
|
||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v3 | ||
|
||
- name: "Docker auth" | ||
run: |- | ||
gcloud auth configure-docker ${{ env.REGION }}-docker.pkg.dev --quiet | ||
- name: Quick Docker build (gentropy image overloaded with orchestration logic, AMD64 only, with layer cache) | ||
uses: docker/build-push-action@v6 | ||
with: | ||
platforms: linux/amd64 | ||
push: true | ||
tags: "${{ env.GAR_LOCATION }}/${{ env.REPOSITORY }}/ot_gentropy:${{ github.ref_name }}" | ||
context: . | ||
cache-from: type=gha | ||
cache-to: type=gha,mode=max | ||
file: images/gentropy/Dockerfile |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -147,9 +147,6 @@ dmypy.json | |
# ruff | ||
.ruff_cache | ||
|
||
# vscode | ||
.vscode | ||
|
||
# npm | ||
node_modules | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
"python.analysis.typeCheckingMode": "basic", | ||
"files.associations": { | ||
"*.conf": "hocon", | ||
}, | ||
"yaml.schemas": { | ||
"https://raw.githubusercontent.com/opentargets/pis/refs/heads/sane-paths/.vscode/config-schema.json": "pis.yaml" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
FROM europe-west1-docker.pkg.dev/open-targets-genetics-dev/gentropy-app/gentropy:v2.0.1 | ||
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ | ||
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list \ | ||
&& curl https://packages.cloud.google.com/apt/doc/apt-key.gpg\ | ||
| gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg \ | ||
&& apt-get update -y && apt-get install google-cloud-cli -y | ||
COPY --chmod=0755 images/gentropy/scripts/harmonise-sumstats.sh harmonise-sumstats.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#!/bin/bash | ||
# Script for running harmonisation and qc steps by the google batch job | ||
# Requirements: | ||
# 1. Gentropy & poetry | ||
# 2. gsutil | ||
# 3. gzip | ||
|
||
# set -x | ||
|
||
readonly RAW_FILE=$1 | ||
readonly HARMONISED_FILE=$2 | ||
readonly QC_FILE=$3 | ||
readonly QC_THRESHOLD=$4 | ||
export HYDRA_FULL_ERROR=1 | ||
|
||
logging() { | ||
log_prompt="[$(date "+%Y.%m.%d %H:%M")]" | ||
echo "${log_prompt} $@" | tee -a ${LOCAL_LOG_FILE} | ||
} | ||
|
||
# NOTE: Harmonised path contains ${output_path}/harmonised_sumstats/${study_id} | ||
HARMONISATION_DIR=$(dirname $HARMONISED_FILE) | ||
OUTPUT_PATH=$(dirname $HARMONISATION_DIR) | ||
STUDY_ID=$(basename $HARMONISED_FILE) | ||
LOCAL_LOG_FILE="harmonisation.log" | ||
LOCAL_SUMMARY_FILE=harmonisation.csv | ||
RAW_LOCAL_FILE=$(basename $RAW_FILE) | ||
UNZIPPED_RAW_LOCAL_FILE="${RAW_LOCAL_FILE%.*}" | ||
|
||
# Make sure we start with clean setup | ||
if [ -f ${LOCAL_SUMMARY_FILE} ]; then | ||
rm -rf ${LOCAL_SUMMARY_FILE} | ||
fi | ||
echo "study,harmonisationExitCode,qcExitCode,rawSumstatFile,rawSumstatFileSize,rawUnzippedSumstatFileSize" >$LOCAL_SUMMARY_FILE | ||
|
||
if [ -f ${LOCAL_LOG_FILE} ]; then | ||
rm -rf ${LOCAL_LOG_FILE} | ||
fi | ||
|
||
logging "Copying raw summary statistics from ${RAW_FILE} to ${RAW_LOCAL_FILE}" | ||
gsutil cp $RAW_FILE $RAW_LOCAL_FILE | ||
|
||
RAW_FILE_SIZE=$(du -sh ${RAW_LOCAL_FILE} | cut -f1) | ||
logging "Raw file size ${RAW_FILE_SIZE}" | ||
|
||
logging "Unzipping ${RAW_LOCAL_FILE} to ${UNZIPPED_RAW_LOCAL_FILE}" | ||
gzip -d $RAW_LOCAL_FILE | ||
|
||
UNZIPPED_FILE_SIZE=$(du -sh ${UNZIPPED_RAW_LOCAL_FILE} | cut -f1) | ||
logging "Unzipped file size ${UNZIPPED_FILE_SIZE}" | ||
|
||
logging "Running harmonisation on ${UNZIPPED_RAW_LOCAL_FILE} file" | ||
poetry run gentropy step=gwas_catalog_sumstat_preprocess \ | ||
step.raw_sumstats_path=$UNZIPPED_RAW_LOCAL_FILE \ | ||
step.out_sumstats_path=$HARMONISED_FILE \ | ||
step.session.write_mode=overwrite \ | ||
+step.session.extended_spark_conf="{spark.jars:https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop3-latest.jar}" \ | ||
+step.session.extended_spark_conf="{spark.dynamicAllocation.enabled:false}" \ | ||
+step.session.extended_spark_conf="{spark.driver.memory:16g}" \ | ||
+step.session.extended_spark_conf="{spark.kryoserializer.buffer.max:500m}" \ | ||
+step.session.extended_spark_conf="{spark.driver.maxResultSize:5g}" >>${LOCAL_LOG_FILE} 2>&1 | ||
# NOTE: can not use tee to redirect, otherwise the exit code will always be 0 | ||
HARMONISATION_EXIT_CODE=$? | ||
logging "Harmonisation exit code: ${HARMONISATION_EXIT_CODE}" | ||
|
||
logging "Running qc on ${HARMONISED_FILE} file" | ||
poetry run gentropy step=summary_statistics_qc \ | ||
step.gwas_path=$HARMONISED_FILE \ | ||
step.output_path=$QC_FILE \ | ||
step.pval_threshold=$QC_THRESHOLD \ | ||
step.session.write_mode=overwrite \ | ||
+step.session.extended_spark_conf="{spark.jars:https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop3-latest.jar}" \ | ||
+step.session.extended_spark_conf="{spark.dynamicAllocation.enabled:false}" \ | ||
+step.session.extended_spark_conf="{spark.driver.memory:16g}" \ | ||
+step.session.extended_spark_conf="{spark.kryoserializer.buffer.max:500m}" \ | ||
+step.session.extended_spark_conf="{spark.driver.maxResultSize:5g}" >>${LOCAL_LOG_FILE} 2>&1 | ||
QC_EXIT_CODE=$? | ||
logging "QC exit code: ${QC_EXIT_CODE}" | ||
|
||
echo "$STUDY_ID,$HARMONISATION_EXIT_CODE,$QC_EXIT_CODE,$RAW_FILE,$RAW_FILE_SIZE,$UNZIPPED_FILE_SIZE" >>$LOCAL_SUMMARY_FILE | ||
|
||
clean_up() { | ||
# ensure the logs from the job and summary of harmonisation & qc are preserved (latest are overwritten and dated are maintained) | ||
DATE=$(date "+%Y%m%d%H%M") | ||
REMOTE_LOG_FILE="${OUTPUT_PATH}/harmonisation_summary/${STUDY_ID}/${DATE}/harmonisation.log" | ||
LATEST_REMOTE_LOG_FILE="${OUTPUT_PATH}/harmonisation_summary/${STUDY_ID}/latest/harmonisation.log" | ||
REMOTE_SUMMARY_FILE="${OUTPUT_PATH}/harmonisation_summary/${STUDY_ID}/${DATE}/harmonisation.csv" | ||
LATEST_REMOTE_SUMMARY_FILE="${OUTPUT_PATH}/harmonisation_summary/${STUDY_ID}/latest/harmonisation.csv" | ||
|
||
gsutil cp ${LOCAL_LOG_FILE} ${REMOTE_LOG_FILE} | ||
gsutil cp ${LOCAL_LOG_FILE} ${LATEST_REMOTE_LOG_FILE} | ||
|
||
gsutil cp ${LOCAL_SUMMARY_FILE} ${REMOTE_SUMMARY_FILE} | ||
gsutil cp ${LOCAL_SUMMARY_FILE} ${LATEST_REMOTE_SUMMARY_FILE} | ||
|
||
} | ||
|
||
trap clean_up EXIT | ||
|
||
# exit with a non-zero exit code fist, otherwise 0 | ||
exit $HARMONISATION_EXIT_CODE |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.