Skip to content

Commit ada2590

Browse files
committed
feat: adding unpublished ancestries as well
1 parent 50b4458 commit ada2590

File tree

3 files changed

+13
-5
lines changed

3 files changed

+13
-5
lines changed

config/datasets/gcp.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ catalog_studies:
1616
# Currently ingesting GWAS Catalog studies from two different files:
1717
- ${datasets.inputs}/v2d/gwas-catalog-v1.0.3-studies-r2023-11-24.tsv
1818
- ${datasets.inputs}/v2d/gwas-catalog-v1.0.3-unpublished-studies-r2023-11-24.tsv
19-
catalog_ancestries: ${datasets.inputs}/v2d/gwas-catalog-v1.0.3-ancestries-r2023-11-24.tsv
19+
catalog_ancestries:
20+
- ${datasets.inputs}/v2d/gwas-catalog-v1.0.3-ancestries-r2023-11-24.tsv
21+
- ${datasets.inputs}/v2d/gwas-catalog-v1.0.3-unpublished-ancestries-r2023-11-24.tsv
2022
catalog_sumstats_lut: ${datasets.inputs}/v2d/harmonised_list-r2023-11-24a.txt
2123
ukbiobank_manifest: gs://genetics-portal-input/ukb_phenotypes/neale2_saige_study_manifest.190430.tsv
2224
l2g_gold_standard_curation: ${datasets.inputs}/l2g/gold_standard/curation.json

src/otg/gwas_catalog.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ class GWASCatalogStep:
2323
2424
Attributes:
2525
session (Session): Session object.
26-
catalog_study_files (list[str]): Raw GWAS catalog studies file.
27-
catalog_ancestry_file (str): Ancestry annotations file from GWAS Catalog.
26+
catalog_study_files (list[str]): List of raw GWAS catalog studies file.
27+
catalog_ancestry_files (list[str]): List of raw ancestry annotations files from GWAS Catalog.
2828
catalog_sumstats_lut (str): GWAS Catalog summary statistics lookup table.
2929
catalog_associations_file (str): Raw GWAS catalog associations file.
3030
variant_annotation_path (str): Input variant annotation path.
@@ -36,7 +36,7 @@ class GWASCatalogStep:
3636

3737
session: Session = MISSING
3838
catalog_study_files: list[str] = MISSING
39-
catalog_ancestry_file: str = MISSING
39+
catalog_ancestry_files: list[str] = MISSING
4040
catalog_sumstats_lut: str = MISSING
4141
catalog_associations_file: str = MISSING
4242
variant_annotation_path: str = MISSING
@@ -53,7 +53,7 @@ def __post_init__(self: GWASCatalogStep) -> None:
5353
self.catalog_study_files, sep="\t", header=True
5454
)
5555
ancestry_lut = self.session.spark.read.csv(
56-
self.catalog_ancestry_file, sep="\t", header=True
56+
self.catalog_ancestry_files, sep="\t", header=True
5757
)
5858
sumstats_lut = self.session.spark.read.csv(
5959
self.catalog_sumstats_lut, sep="\t", header=False

utils/update_GWAS_Catalog_data.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@ wget -q ${RELEASE_URL}/gwas-catalog-download-ancestries-v1.0.3.txt \
6363
-O gwas-catalog-v1.0.3-ancestries-r${YEAR}-${MONTH}-${DAY}.tsv
6464
logging "File gwas-catalog-v1.0.3-ancestries-r${YEAR}-${MONTH}-${DAY}.tsv saved."
6565

66+
wget -q ${RELEASE_URL}/gwas-catalog-unpublished-ancestries-v1.0.3.tsv \
67+
-O gwas-catalog-v1.0.3-unpublished-ancestries-r${YEAR}-${MONTH}-${DAY}.tsv
68+
logging "File gwas-catalog-v1.0.3-unpublished-ancestries-r${YEAR}-${MONTH}-${DAY}.tsv saved."
69+
70+
6671
wget -q ${BASE_URL}/summary_statistics/harmonised_list.txt -O harmonised_list-r${YEAR}-${MONTH}-${DAY}.txt
6772
logging "File harmonised_list-r${YEAR}-${MONTH}-${DAY}.txt saved."
6873

@@ -72,5 +77,6 @@ gsutil -mq cp file://$(pwd)/gwas-catalog-v1.0.3-studies-r${YEAR}-${MONTH}-${DAY}
7277
gsutil -mq cp file://$(pwd)/gwas-catalog-v1.0.3-ancestries-r${YEAR}-${MONTH}-${DAY}.tsv ${GCP_TARGET}/
7378
gsutil -mq cp file://$(pwd)/harmonised_list-r${YEAR}-${MONTH}-${DAY}.txt ${GCP_TARGET}/
7479
gsutil -mq cp file://$(pwd)/gwas-catalog-v1.0.3-unpublished-studies-r${YEAR}-${MONTH}-${DAY}.tsv ${GCP_TARGET}/
80+
gsutil -mq cp file://$(pwd)/gwas-catalog-v1.0.3-unpublished-ancestries-r${YEAR}-${MONTH}-${DAY}.tsv ${GCP_TARGET}/
7581

7682
logging "Done."

0 commit comments

Comments
 (0)