Skip to content

Commit 48ba90d

Browse files
authored
feat: add gnomAD SV/CNV 4.0 for GRCh38 (#55)
1 parent b9fed91 commit 48ba90d

File tree

1 file changed

+22
-17
lines changed

1 file changed

+22
-17
lines changed

download-data.sh

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ export DIR_PREFIX=${DIR_PREFIX-.dev}
2323
# Overall static data directory.
2424
export DATA_DIR=${DATA_DIR-$DIR_PREFIX/volumes/$STATIC_INFIX/data}
2525
# S3 endpoing URL.
26-
export S3_ENDPOINT_URL=https://ceph-s3-public.cubi.varfish-org.org
26+
export S3_ENDPOINT_URL=https://ceph-s3-public.cubi.bihealth.org
2727

2828
# -- Versions -----------------------------------------------------------------
2929

@@ -242,7 +242,7 @@ EOF
242242
mkdir -p $DATA_DIR/download
243243
# Download each entry from download list. Note that we support commenting
244244
# out lines with a leading "#".
245-
grep -v ^# /tmp/download-list.txt | grep -v grch37 >/tmp/download-list.nocomment.txt
245+
grep -v ^# /tmp/download-list.txt >/tmp/download-list.nocomment.txt
246246
while read -r line; do
247247
# Create the download directory.
248248
run mkdir -p $DATA_DIR/download/$line
@@ -251,7 +251,7 @@ while read -r line; do
251251
run s5cmd \
252252
--endpoint-url=$S3_ENDPOINT_URL \
253253
--no-sign-request \
254-
--no-verify-ssl \
254+
--no-verify-ssl \
255255
sync \
256256
"s3://varfish-public/$(prefix_for $line)/$line/*" \
257257
$DATA_DIR/download/$line \
@@ -403,12 +403,6 @@ mkdir -p $DATA_DIR/worker/grch3{7,8}/strucvars/bgdbs
403403

404404
log_info " - strucvars/bgdbs"
405405
rm -f $DATA_DIR/worker/grch3{7,8}/strucvars/bgdbs/{exac,g1k,gnomad,dbvar,dgv,dgv-gs}.bin
406-
ln -sr $(echo $DATA_DIR/download/worker/bgdb-exac-grch37-*/bgdb-exac.bin | tr ' ' '\n' | tail -n 1) \
407-
$DATA_DIR/worker/grch37/strucvars/bgdbs/exac.bin
408-
ln -sr $(echo $DATA_DIR/download/worker/bgdb-g1k-grch37-$V_G1K+$V_WORKER/bgdb-g1k.bin | tr ' ' '\n' | tail -n 1) \
409-
$DATA_DIR/worker/grch37/strucvars/bgdbs/g1k.bin
410-
ln -sr $(echo $DATA_DIR/download/worker/bgdb-gnomad-grch37-*/bgdb-gnomad.bin | tr ' ' '\n' | tail -n 1) \
411-
$DATA_DIR/worker/grch37/strucvars/bgdbs/gnomad.bin
412406
ln -sr $(echo $DATA_DIR/download/worker/bgdb-dbvar-grch37-*/bgdb-dbvar.bin | tr ' ' '\n' | tail -n 1) \
413407
$DATA_DIR/worker/grch37/strucvars/bgdbs/dbvar.bin
414408
ln -sr $(echo $DATA_DIR/download/worker/bgdb-dbvar-grch38-*/bgdb-dbvar.bin | tr ' ' '\n' | tail -n 1) \
@@ -418,9 +412,20 @@ ln -sr $(echo $DATA_DIR/download/worker/bgdb-dgv-grch37-*/bgdb-dgv.bin | tr ' '
418412
ln -sr $(echo $DATA_DIR/download/worker/bgdb-dgv-grch38-*/bgdb-dgv.bin | tr ' ' '\n' | tail -n 1) \
419413
$DATA_DIR/worker/grch38/strucvars/bgdbs/dgv.bin
420414
ln -sr $(echo $DATA_DIR/download/worker/bgdb-dgv-gs-grch37-*/bgdb-dgv-gs.bin | tr ' ' '\n' | tail -n 1) \
421-
$DATA_DIR/worker/grch37/strucvars/bgdbs/dgv-gs.bin
415+
$DATA_DIR/worker/grch37/strucvars/bgdbs/dgv_gs.bin
422416
ln -sr $(echo $DATA_DIR/download/worker/bgdb-dgv-gs-grch38-*/bgdb-dgv-gs.bin | tr ' ' '\n' | tail -n 1) \
423-
$DATA_DIR/worker/grch38/strucvars/bgdbs/dgv-gs.bin
417+
$DATA_DIR/worker/grch38/strucvars/bgdbs/dgv_gs.bin
418+
ln -sr $(echo $DATA_DIR/download/worker/bgdb-g1k-grch37-$V_G1K+$V_WORKER/bgdb-g1k.bin | tr ' ' '\n' | tail -n 1) \
419+
$DATA_DIR/worker/grch37/strucvars/bgdbs/g1k.bin
420+
# NB: no thousand genomes for GRCh38
421+
ln -sr $(echo $DATA_DIR/download/worker/bgdb-exac-grch37-*/bgdb-exac.bin | tr ' ' '\n' | tail -n 1) \
422+
$DATA_DIR/worker/grch37/strucvars/bgdbs/gnomad_exomes.bin
423+
ln -sr $(echo $DATA_DIR/download/worker/bgdb-gnomad-exomes-cnv-grch38-*/bgdb-gnomad-exomes-cnv-grch38.bin | tr ' ' '\n' | tail -n 1) \
424+
$DATA_DIR/worker/grch38/strucvars/bgdbs/gnomad_exomes.bin
425+
ln -sr $(echo $DATA_DIR/download/worker/bgdb-gnomad-grch37-*/bgdb-gnomad.bin | tr ' ' '\n' | tail -n 1) \
426+
$DATA_DIR/worker/grch37/strucvars/bgdbs/gnomad_genomes.bin
427+
ln -sr $(echo $DATA_DIR/download/worker/bgdb-gnomad-genomes-sv-grch38-*/bgdb-gnomad-genomes-sv-grch38.bin | tr ' ' '\n' | tail -n 1) \
428+
$DATA_DIR/worker/grch38/strucvars/bgdbs/gnomad_genomes.bin
424429

425430
log_info " - strucvars/clinvar"
426431
rm -f $DATA_DIR/worker/grch3{7,8}/strucvars/clinvar.bin
@@ -430,13 +435,13 @@ ln -sr $(ls $DATA_DIR/download/worker/clinvar-strucvars-grch37-*/clinvar-strucva
430435
ln -sr $(ls $DATA_DIR/download/worker/clinvar-strucvars-grch38-*/clinvar-strucvars.bin | tr ' ' '\n' | tail -n 1) \
431436
$DATA_DIR/worker/grch38/strucvars/clinvar.bin
432437

433-
log_info " - strucvars/patho-mms"
434-
rm -f $DATA_DIR/worker/grch3?/strucvars/patho-mms.bed
438+
log_info " - strucvars/patho_mms"
439+
rm -f $DATA_DIR/worker/grch3?/strucvars/patho_mms.bed
435440

436441
ln -sr $(ls $DATA_DIR/download/worker/patho-mms-grch37-*/patho-mms.bed | tr ' ' '\n' | tail -n 1) \
437-
$DATA_DIR/worker/grch37/strucvars/patho-mms.bed
442+
$DATA_DIR/worker/grch37/strucvars/patho_mms.bed
438443
ln -sr $(ls $DATA_DIR/download/worker/patho-mms-grch38-*/patho-mms.bed | tr ' ' '\n' | tail -n 1) \
439-
$DATA_DIR/worker/grch38/strucvars/patho-mms.bed
444+
$DATA_DIR/worker/grch38/strucvars/patho_mms.bed
440445

441446
log_info " - strucvars/tads"
442447
mkdir -p $DATA_DIR/worker/grch3{7,8}/tads
@@ -449,14 +454,14 @@ ln -sr $(ls $DATA_DIR/download/worker/tads-grch38-dixon2015/hesc.bed | tr ' ' '\
449454

450455
log_info " - noref/genes"
451456
mkdir -p $DATA_DIR/worker/noref/genes
452-
rm -f $DATA_DIR/worker/noref/genes/{xlink.bin,acmg.tsv,mime2gene.tsv}
457+
rm -f $DATA_DIR/worker/noref/genes/{xlink.bin,acmg.tsv,mim2gene.tsv}
453458

454459
ln -sr $(ls $DATA_DIR/download/worker/genes-xlink-*/genes-xlink.bin | tr ' ' '\n' | tail -n 1) \
455460
$DATA_DIR/worker/noref/genes/xlink.bin
456461
ln -sr $(ls $DATA_DIR/download/worker/acmg-sf-*/acmg_sf.tsv | tr ' ' '\n' | tail -n 1) \
457462
$DATA_DIR/worker/noref/genes/acmg.tsv
458463
ln -sr $(ls $DATA_DIR/download/worker/mim2gene-*/mim2gene.tsv | tr ' ' '\n' | tail -n 1) \
459-
$DATA_DIR/worker/noref/genes/mime2gene.tsv
464+
$DATA_DIR/worker/noref/genes/mim2gene.tsv
460465

461466
log_info " - grch3{7,8}/regions"
462467
mkdir -p $DATA_DIR/worker/grch3{7,8}/genes

0 commit comments

Comments
 (0)