Skip to content

Commit ed66f2a

Browse files
author
Szymon Szyszkowski
committed
chore: typos
1 parent ed4f81e commit ed66f2a

File tree

2 files changed

+24
-22
lines changed

2 files changed

+24
-22
lines changed

images/gentropy/Dockerfile

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
FROM europe-west1-docker.pkg.dev/open-targets-genetics-dev/gentropy-app/gentropy:dev
2-
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \
3-
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list \
4-
&& curl https://packages.cloud.google.com/apt/doc/apt-key.gpg\
5-
| gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg \
6-
&& apt-get update -y && apt-get install google-cloud-cli -y
1+
FROM europe-west1-docker.pkg.dev/open-targets-genetics-dev/gentropy-app/gentropy:2.0.1
2+
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" |
3+
tee -a /etc/apt/sources.list.d/google-cloud-sdk.list &&
4+
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg &&
5+
apt-get update -y && apt-get install google-cloud-cli -y
76
COPY --chmod=0755 images/gentropy/scripts/harmonise-sumstats.sh harmonise-sumstats.sh

images/gentropy/scripts/harmonise-sumstats.sh

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#!/bin/bash
22
# Script for running harmonisation and qc steps by the google batch job
3+
# Requirements:
4+
# 1. Gentropy & poetry
5+
# 2. gsutil
6+
# 3. gzip
37

48
# set -x
59

@@ -9,28 +13,29 @@ readonly QC_FILE=$3
913
readonly QC_THRESHOLD=$4
1014
export HYDRA_FULL_ERROR=1
1115

16+
logging() {
17+
log_prompt="[$(date "+%Y.%m.%d %H:%M")]"
18+
echo "${log_prompt} $@" | tee -a ${LOCAL_LOG_FILE}
19+
}
20+
1221
# NOTE: Harmonised path contains ${output_path}/harmonised_sumstats/${study_id}
13-
HARMONISATION_DIR=`dirname $HARMONISED_FILE`
14-
OUTPUT_PATH=`dirname $HARMONISATION_DIR`
15-
STUDY_ID=`basename $HARMONISED_FILE`
22+
HARMONISATION_DIR=$(dirname $HARMONISED_FILE)
23+
OUTPUT_PATH=$(dirname $HARMONISATION_DIR)
24+
STUDY_ID=$(basename $HARMONISED_FILE)
1625
LOCAL_LOG_FILE="harmonisation.log"
1726
LOCAL_SUMMARY_FILE=harmonisation.csv
18-
RAW_LOCAL_FILE=`basename $RAW_FILE`
27+
RAW_LOCAL_FILE=$(basename $RAW_FILE)
1928
UNZIPPED_RAW_LOCAL_FILE="${RAW_LOCAL_FILE%.*}"
2029

30+
# Make sure we start with clean setup
2131
if [ -f ${LOCAL_SUMMARY_FILE} ]; then
2232
rm -rf ${LOCAL_SUMMARY_FILE}
2333
fi
24-
echo "study,harmonisationExitCode,qcExitCode,rawSumstatFile,rawSumstatFileSize,rawUnzippedSumstatFileSize" > $LOCAL_SUMMARY_FILE
34+
echo "study,harmonisationExitCode,qcExitCode,rawSumstatFile,rawSumstatFileSize,rawUnzippedSumstatFileSize" >$LOCAL_SUMMARY_FILE
2535

2636
if [ -f ${LOCAL_LOG_FILE} ]; then
2737
rm -rf ${LOCAL_LOG_FILE}
2838
fi
29-
logging(){
30-
log_prompt="[$(date "+%Y.%m.%d %H:%M")]"
31-
echo "${log_prompt} $@" | tee -a ${LOCAL_LOG_FILE}
32-
}
33-
3439

3540
logging "Copying raw summary statistics from ${RAW_FILE} to ${RAW_LOCAL_FILE}"
3641
gsutil cp $RAW_FILE $RAW_LOCAL_FILE
@@ -53,12 +58,11 @@ poetry run gentropy step=gwas_catalog_sumstat_preprocess \
5358
+step.session.extended_spark_conf="{spark.dynamicAllocation.enabled:false}" \
5459
+step.session.extended_spark_conf="{spark.driver.memory:16g}" \
5560
+step.session.extended_spark_conf="{spark.kryoserializer.buffer.max:500m}" \
56-
+step.session.extended_spark_conf="{spark.driver.maxResultSize:5g}" >> ${LOCAL_LOG_FILE} 2>&1
61+
+step.session.extended_spark_conf="{spark.driver.maxResultSize:5g}" >>${LOCAL_LOG_FILE} 2>&1
5762
# NOTE: can not use tee to redirect, otherwise the exit code will always be 0
5863
HARMONISATION_EXIT_CODE=$?
5964
logging "Harmonisation exit code: ${HARMONISATION_EXIT_CODE}"
6065

61-
6266
logging "Running qc on ${HARMONISED_FILE} file"
6367
poetry run gentropy step=summary_statistics_qc \
6468
step.gwas_path=$HARMONISED_FILE \
@@ -69,15 +73,14 @@ poetry run gentropy step=summary_statistics_qc \
6973
+step.session.extended_spark_conf="{spark.dynamicAllocation.enabled:false}" \
7074
+step.session.extended_spark_conf="{spark.driver.memory:16g}" \
7175
+step.session.extended_spark_conf="{spark.kryoserializer.buffer.max:500m}" \
72-
+step.session.extended_spark_conf="{spark.driver.maxResultSize:5g}" >> ${LOCAL_LOG_FILE} 2>&1
76+
+step.session.extended_spark_conf="{spark.driver.maxResultSize:5g}" >>${LOCAL_LOG_FILE} 2>&1
7377
QC_EXIT_CODE=$?
7478
logging "QC exit code: ${QC_EXIT_CODE}"
7579

76-
77-
echo "$STUDY_ID,$HARMONISATION_EXIT_CODE,$QC_EXIT_CODE,$RAW_FILE,$RAW_FILE_SIZE,$UNZIPPED_FILE_SIZE" >> $LOCAL_SUMMARY_FILE
80+
echo "$STUDY_ID,$HARMONISATION_EXIT_CODE,$QC_EXIT_CODE,$RAW_FILE,$RAW_FILE_SIZE,$UNZIPPED_FILE_SIZE" >>$LOCAL_SUMMARY_FILE
7881

7982
clean_up() {
80-
# ensure the logs from the job and summary of harmonisation & qc are outputed and preserved (latest are overwrtitten and dated are maintained)
83+
# ensure the logs from the job and summary of harmonisation & qc are preserved (latest are overwritten and dated are maintained)
8184
DATE=$(date "+%Y%m%d%H%M")
8285
REMOTE_LOG_FILE="${OUTPUT_PATH}/harmonisation_summary/${STUDY_ID}/${DATE}/harmonisation.log"
8386
LATEST_REMOTE_LOG_FILE="${OUTPUT_PATH}/harmonisation_summary/${STUDY_ID}/latest/harmonisation.log"

0 commit comments

Comments
 (0)