1
1
#! /bin/bash
2
2
# Script for running harmonisation and qc steps by the google batch job
3
+ # Requirements:
4
+ # 1. Gentropy & poetry
5
+ # 2. gsutil
6
+ # 3. gzip
3
7
4
8
# set -x
5
9
@@ -9,28 +13,29 @@ readonly QC_FILE=$3
9
13
readonly QC_THRESHOLD=$4
10
14
export HYDRA_FULL_ERROR=1
11
15
16
+ logging () {
17
+ log_prompt=" [$( date " +%Y.%m.%d %H:%M" ) ]"
18
+ echo " ${log_prompt} $@ " | tee -a ${LOCAL_LOG_FILE}
19
+ }
20
+
12
21
# NOTE: Harmonised path contains ${output_path}/harmonised_sumstats/${study_id}
13
- HARMONISATION_DIR=` dirname $HARMONISED_FILE `
14
- OUTPUT_PATH=` dirname $HARMONISATION_DIR `
15
- STUDY_ID=` basename $HARMONISED_FILE `
22
+ HARMONISATION_DIR=$( dirname $HARMONISED_FILE )
23
+ OUTPUT_PATH=$( dirname $HARMONISATION_DIR )
24
+ STUDY_ID=$( basename $HARMONISED_FILE )
16
25
LOCAL_LOG_FILE=" harmonisation.log"
17
26
LOCAL_SUMMARY_FILE=harmonisation.csv
18
- RAW_LOCAL_FILE=` basename $RAW_FILE `
27
+ RAW_LOCAL_FILE=$( basename $RAW_FILE )
19
28
UNZIPPED_RAW_LOCAL_FILE=" ${RAW_LOCAL_FILE% .* } "
20
29
30
+ # Make sure we start with clean setup
21
31
if [ -f ${LOCAL_SUMMARY_FILE} ]; then
22
32
rm -rf ${LOCAL_SUMMARY_FILE}
23
33
fi
24
- echo " study,harmonisationExitCode,qcExitCode,rawSumstatFile,rawSumstatFileSize,rawUnzippedSumstatFileSize" > $LOCAL_SUMMARY_FILE
34
+ echo " study,harmonisationExitCode,qcExitCode,rawSumstatFile,rawSumstatFileSize,rawUnzippedSumstatFileSize" > $LOCAL_SUMMARY_FILE
25
35
26
36
if [ -f ${LOCAL_LOG_FILE} ]; then
27
37
rm -rf ${LOCAL_LOG_FILE}
28
38
fi
29
- logging (){
30
- log_prompt=" [$( date " +%Y.%m.%d %H:%M" ) ]"
31
- echo " ${log_prompt} $@ " | tee -a ${LOCAL_LOG_FILE}
32
- }
33
-
34
39
35
40
logging " Copying raw summary statistics from ${RAW_FILE} to ${RAW_LOCAL_FILE} "
36
41
gsutil cp $RAW_FILE $RAW_LOCAL_FILE
@@ -53,12 +58,11 @@ poetry run gentropy step=gwas_catalog_sumstat_preprocess \
53
58
+step.session.extended_spark_conf=" {spark.dynamicAllocation.enabled:false}" \
54
59
+step.session.extended_spark_conf=" {spark.driver.memory:16g}" \
55
60
+step.session.extended_spark_conf=" {spark.kryoserializer.buffer.max:500m}" \
56
- +step.session.extended_spark_conf=" {spark.driver.maxResultSize:5g}" >> ${LOCAL_LOG_FILE} 2>&1
61
+ +step.session.extended_spark_conf=" {spark.driver.maxResultSize:5g}" >> ${LOCAL_LOG_FILE} 2>&1
57
62
# NOTE: can not use tee to redirect, otherwise the exit code will always be 0
58
63
HARMONISATION_EXIT_CODE=$?
59
64
logging " Harmonisation exit code: ${HARMONISATION_EXIT_CODE} "
60
65
61
-
62
66
logging " Running qc on ${HARMONISED_FILE} file"
63
67
poetry run gentropy step=summary_statistics_qc \
64
68
step.gwas_path=$HARMONISED_FILE \
@@ -69,15 +73,14 @@ poetry run gentropy step=summary_statistics_qc \
69
73
+step.session.extended_spark_conf=" {spark.dynamicAllocation.enabled:false}" \
70
74
+step.session.extended_spark_conf=" {spark.driver.memory:16g}" \
71
75
+step.session.extended_spark_conf=" {spark.kryoserializer.buffer.max:500m}" \
72
- +step.session.extended_spark_conf=" {spark.driver.maxResultSize:5g}" >> ${LOCAL_LOG_FILE} 2>&1
76
+ +step.session.extended_spark_conf=" {spark.driver.maxResultSize:5g}" >> ${LOCAL_LOG_FILE} 2>&1
73
77
QC_EXIT_CODE=$?
74
78
logging " QC exit code: ${QC_EXIT_CODE} "
75
79
76
-
77
- echo " $STUDY_ID ,$HARMONISATION_EXIT_CODE ,$QC_EXIT_CODE ,$RAW_FILE ,$RAW_FILE_SIZE ,$UNZIPPED_FILE_SIZE " >> $LOCAL_SUMMARY_FILE
80
+ echo " $STUDY_ID ,$HARMONISATION_EXIT_CODE ,$QC_EXIT_CODE ,$RAW_FILE ,$RAW_FILE_SIZE ,$UNZIPPED_FILE_SIZE " >> $LOCAL_SUMMARY_FILE
78
81
79
82
clean_up () {
80
- # ensure the logs from the job and summary of harmonisation & qc are outputed and preserved (latest are overwrtitten and dated are maintained)
83
+ # ensure the logs from the job and summary of harmonisation & qc are preserved (latest are overwritten and dated are maintained)
81
84
DATE=$( date " +%Y%m%d%H%M" )
82
85
REMOTE_LOG_FILE=" ${OUTPUT_PATH} /harmonisation_summary/${STUDY_ID} /${DATE} /harmonisation.log"
83
86
LATEST_REMOTE_LOG_FILE=" ${OUTPUT_PATH} /harmonisation_summary/${STUDY_ID} /latest/harmonisation.log"
0 commit comments