Skip to content

Commit e613bcb

Browse files
feat: added base constants
1 parent 2e535a4 commit e613bcb

File tree

2 files changed

+16
-12
lines changed

2 files changed

+16
-12
lines changed

data_collection/constants.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
"""Module for constants used in data collection."""
22

33
# files
4-
LOVD_URL = "https://databases.lovd.nl/shared/genes/EYS"
5-
LOVD_FILE_URL = "https://databases.lovd.nl/shared/download/all/gene/EYS"
4+
LOVD_URL = "https://databases.lovd.nl/shared/genes"
5+
LOVD_URL_EYS = "https://databases.lovd.nl/shared/genes/EYS"
6+
LOVD_FILE_URL = "https://databases.lovd.nl/shared/download/all/gene"
7+
LOVD_FILE_URL_EYS = "https://databases.lovd.nl/shared/download/all/gene/EYS"
68

7-
GNOMAD_URL = "https://gnomad.broadinstitute.org/gene/ENSG00000188107?dataset=gnomad_r4"
8-
GNOMAD_FILE_URL = ("https://drive.usercontent.google.com/u/0/uc?id=1crkDCVcC0PSnv0JPGj3FpemBs28"
9+
GNOMAD_URL = "https://gnomad.broadinstitute.org/gene"
10+
GNOMAD_URL_EYS = "https://gnomad.broadinstitute.org/gene/ENSG00000188107?dataset=gnomad_r4"
11+
GNOMAD_FILE_URL_EYS = ("https://drive.usercontent.google.com/u/0/uc?id=1crkDCVcC0PSnv0JPGj3FpemBs28"
912
"-T_3y&export=download")
1013

11-
CLINVAR_URL = "https://www.ncbi.nlm.nih.gov/clinvar/?term=eys%5Bgene%5D&redir=gene"
12-
CLINVAR_FILE_URL = ("https://drive.usercontent.google.com/u/0/uc?id=1RK5XBK3k5h0K6f-qfwJSQj7tlF"
14+
CLINVAR_URL = "https://www.ncbi.nlm.nih.gov/clinvar"
15+
CLINVAR_URL_EYS = "https://www.ncbi.nlm.nih.gov/clinvar/?term=eys%5Bgene%5D&redir=gene"
16+
CLINVAR_FILE_URL_EYS = ("https://drive.usercontent.google.com/u/0/uc?id=1RK5XBK3k5h0K6f-qfwJSQj7tlF"
1317
"-H2U6u&export=download")
1418

1519
# paths

data_collection/pipeline.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
import pandas as pd
33

44
from tools import get_file_from_url, from_lovd_to_pandas, from_clinvar_name_to_dna
5-
from constants import (LOVD_FILE_URL,
6-
GNOMAD_FILE_URL,
7-
CLINVAR_FILE_URL,
5+
from constants import (LOVD_FILE_URL_EYS,
6+
GNOMAD_FILE_URL_EYS,
7+
CLINVAR_FILE_URL_EYS,
88
DATA_PATH,
99
LOVD_PATH,
1010
GNOMAD_PATH,
@@ -47,9 +47,9 @@ def calculate_max_frequency(row):
4747

4848
# MAIN
4949
# Download all data
50-
get_file_from_url(LOVD_FILE_URL, LOVD_PATH + "/lovd_data.txt", override=True)
51-
get_file_from_url(GNOMAD_FILE_URL, GNOMAD_PATH + "/gnomad_data.csv", override=True)
52-
get_file_from_url(CLINVAR_FILE_URL, CLINVAR_PATH + "/clinvar_data.txt", override=True)
50+
get_file_from_url(LOVD_FILE_URL_EYS, LOVD_PATH + "/lovd_data.txt", override=True)
51+
get_file_from_url(GNOMAD_FILE_URL_EYS, GNOMAD_PATH + "/gnomad_data.csv", override=True)
52+
get_file_from_url(CLINVAR_FILE_URL_EYS, CLINVAR_PATH + "/clinvar_data.txt", override=True)
5353

5454
# Read and convert data
5555
lovd_data = from_lovd_to_pandas(LOVD_PATH + "/lovd_data.txt")

0 commit comments

Comments
 (0)