Skip to content

Commit

Permalink
remodeled download selected database function;
Browse files Browse the repository at this point in the history
added path option to lovd function;
  • Loading branch information
KajusC committed Sep 26, 2024
1 parent 88389a0 commit b1b5595
Show file tree
Hide file tree
Showing 5 changed files with 4,119 additions and 17 deletions.
2 changes: 1 addition & 1 deletion api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
download_database_for_eys_gene,

# Functions for storing databases
store_database_for_eys_gene
download_selected_database_for_eys_gene
)

# DATA REFACTORING IMPORT
Expand Down
4 changes: 2 additions & 2 deletions api/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
)

# DATA COLLECTION IMPORT
from .collection import (
from .downloading import (
# Custom exceptions
BadResponseException,
DownloadError,
Expand All @@ -49,7 +49,7 @@
download_data_from_gnomad_eys,

# Functions for storing databases
store_database_for_eys_gene
download_selected_database_for_eys_gene

)

Expand Down
1 change: 1 addition & 0 deletions api/data/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
LOVD_FILE_URL = "https://databases.lovd.nl/shared/download/all/gene/"
LOVD_FILE_URL_EYS = LOVD_FILE_URL + "EYS"
STORE_AS_LOVD = "../data/lovd/lovd_data.txt"
STORE_AS_GNOMAD = "../data/gnomad/gnomad_data.csv"

GNOMAD_URL = "https://gnomad.broadinstitute.org/gene"
GNOMAD_URL_EYS = "https://gnomad.broadinstitute.org/gene/ENSG00000188107?dataset=gnomad_r4"
Expand Down
40 changes: 26 additions & 14 deletions api/data/downloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
LOVD_PATH,
DATABASES_DOWNLOAD_PATHS,
LOVD_FILE_URL_EYS,
STORE_AS_LOVD)
STORE_AS_LOVD,
STORE_AS_GNOMAD)


# EXCEPTIONS
Expand Down Expand Up @@ -176,21 +177,36 @@ def download_database_for_eys_gene(database_name, override=False):
os.rename(latest_file, os_path)


def store_database_for_eys_gene(database_name, override=False):
def download_selected_database_for_eys_gene(database_name, save_path="", override=False):
"""
Calls a function to download a database.
:param database_name: the name of the database that should be downloaded
:param save_path: path to save the data
:param override: should be already existing file be overwritten
"""
if not isinstance(database_name, str):
raise TypeError("Database name should be a string")

database_name = database_name.lower()

# if save_path is not provided, save to default location
if database_name == "lovd" and save_path == "":
save_path = STORE_AS_LOVD
elif database_name == "gnomad" and save_path == "":
save_path = STORE_AS_GNOMAD

# check if database_name is supported
if database_name not in DATABASES_DOWNLOAD_PATHS:
raise IndexError(f"Requested {database_name} database is not supported")
raise IndexError(f"Requested for {database_name} database is not supported")

# download the database
if database_name == "lovd":
download_lovd_database_for_eys_gene(database_name, override)
download_lovd_database_for_eys_gene(save_path, override)
elif database_name == "gnomad":
download_data_from_gnomad_eys(database_name, override)
download_data_from_gnomad_eys(save_path, override)
else:
download_database_for_eys_gene(database_name, override)
raise IndexError(f"Requested for {database_name} is not yet supported")


def prepare_popmax_calculation(df, pop_data, name, pop_ids, index):
Expand All @@ -215,7 +231,7 @@ def prepare_popmax_calculation(df, pop_data, name, pop_ids, index):
df.loc[index, f'{name}_an_{variant_id}'] = pop['an']


def download_data_from_gnomad_eys(path, override=False):
def download_data_from_gnomad_eys(path=STORE_AS_GNOMAD, override=False):
"""
Requests gnomAD API for data about a specific gene containing:
- variant_id
Expand All @@ -226,10 +242,8 @@ def download_data_from_gnomad_eys(path, override=False):
- popmax
- popmax population
:param str gene_name: name of gene
:param bool to_file: if True, saves data to variants.csv
:returns: DataFrame from gnomAD API
:rtype: DataFrame
:param str path: path to save the data (default: 'data/gnomad/gnomad_eys.csv')
:param bool override: should an existing file be overriden with a new one
"""

if os.path.exists(path) and not override:
Expand Down Expand Up @@ -347,6 +361,4 @@ def download_data_from_gnomad_eys(path, override=False):
df = df.filter(not_to_drop, axis="columns")

if not os.path.isfile(path) or override:
df.to_csv(path, index=False)

return df
df.to_csv(path, index=False)
Loading

0 comments on commit b1b5595

Please sign in to comment.