diff --git a/api/__init__.py b/api/__init__.py index 459952b..8de60d4 100644 --- a/api/__init__.py +++ b/api/__init__.py @@ -32,7 +32,7 @@ ) # DATA COLLECTION IMPORT -from .data.collection import ( +from .data.downloading import ( # Custom exceptions BadResponseException, DownloadError, @@ -46,7 +46,7 @@ download_database_for_eys_gene, # Functions for storing databases - store_database_for_eys_gene + download_selected_database_for_eys_gene ) # DATA REFACTORING IMPORT diff --git a/api/data/__init__.py b/api/data/__init__.py index bd40c79..b2bc9c1 100644 --- a/api/data/__init__.py +++ b/api/data/__init__.py @@ -34,7 +34,7 @@ ) # DATA COLLECTION IMPORT -from .collection import ( +from .downloading import ( # Custom exceptions BadResponseException, DownloadError, @@ -46,9 +46,11 @@ download_lovd_database_for_eys_gene, download_genes_lovd, download_database_for_eys_gene, + download_data_from_gnomad_eys, # Functions for storing databases - store_database_for_eys_gene + download_selected_database_for_eys_gene + ) # DATA REFACTORING IMPORT @@ -58,7 +60,6 @@ parse_lovd, from_clinvar_name_to_cdna_position, save_lovd_as_vcf, - request_gnomad_api_data, merge_gnomad_lovd, parse_gnomad, set_gnomad_dtypes, diff --git a/api/data/collection.py b/api/data/collection.py deleted file mode 100644 index 8bb8312..0000000 --- a/api/data/collection.py +++ /dev/null @@ -1,191 +0,0 @@ -""" Module providing a functionality to collect data from various sources """ - -import glob -import logging -import os -import time - -import requests -from requests import RequestException - -from selenium import webdriver -from selenium.webdriver.common.by import By -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.ui import WebDriverWait - -from .constants import (LOVD_FILE_URL, - LOVD_PATH, - DATABASES_DOWNLOAD_PATHS, - LOVD_FILE_URL_EYS, - STORE_AS_LOVD) - - -# EXCEPTIONS -class BadResponseException(Exception): - """Custom exception for bad responses.""" - - -class DownloadError(Exception): - """Custom exception for download errors.""" - - -def get_file_from_url(url, save_to, override=False): - """ - Gets file from url and saves it into provided path. Overrides, if override is True. - - :param str url: link with file - :param str save_to: path to save - :param bool override: needs override - """ - - # check if path is not directory - if os.path.isdir(save_to): - raise IsADirectoryError("Specified path is a directory, specify name of file") - - # check if directory exists, if not - create - directory = os.path.dirname(save_to) - if not os.path.exists(directory): - os.makedirs(directory) - logging.info("Created directory: %s", directory) - - # check if file exist and needs to override - if os.path.exists(save_to) and not override: - raise FileExistsError(f"The file at {save_to} already exists.") - - try: - response = requests.get(url, timeout=10) - except RequestException as e: - raise DownloadError(f"Error while downloading file from {url}") from e - - if response.status_code != 200: - raise BadResponseException(f"Bad response from {url}." - f" Status code: {response.status_code}") - - with open(save_to, "wb") as f: - f.write(response.content) - - -def download_lovd_database_for_eys_gene(override=False): - """ - Gets file from url and saves it into provided path. Overrides, if override is True. - - :param bool override: needs override - """ - - url = LOVD_FILE_URL_EYS - save_to = STORE_AS_LOVD - - # check if directory exists, if not - create - save_to_dir = os.path.dirname(save_to) - if not os.path.exists(save_to_dir): - os.makedirs(save_to_dir) - - # check if file exist and needs to override - if os.path.exists(save_to) and not override: - print(f"The file at {save_to} already exists.") - return - - try: - response = requests.get(url, timeout=10) - except RequestException as e: - raise DownloadError(f"Error while downloading file from {url}") from e - - if response.status_code != 200: - raise BadResponseException(f"Bad response from {url}." - f" Status code: {response.status_code}") - - with open(save_to, "wb") as f: - f.write(response.content) - - -def download_genes_lovd(gene_list: list, folder_path=LOVD_PATH, raise_exception=False): - """ - Downloads data into txt files from gene_list. - - :param list gene_list: list of gene's symbols - :param str folder_path: folder to save the data - :param bool raise_exception: raise exception if True, otherwise log - """ - - for gene in gene_list: - file_path = os.path.join(folder_path, gene + ".txt") - url = LOVD_FILE_URL + gene - try: - response = requests.get(url, timeout=10) - except RequestException as e: - raise DownloadError(f"Error while downloading file from {url}") from e - - if response.status_code != 200: - raise BadResponseException(f"Bad response from {url}." - f" Status code: {response.status_code}") - # If gene does not exist, the first word of the file will be Error - valid = 'Error' not in response.text[:6] - if valid: - get_file_from_url(url, file_path) - elif raise_exception: - raise ValueError(f"Symbol: {gene} does not exist in the LOVD database") - else: - logging.error("Symbol: %s does not exist in the LOVD database", gene) - - -def download_database_for_eys_gene(database_name, override=False): - """ - downloads chosen database - and handles where it should be saved, - renames the downloaded (latest) file to appropriate name - :param database_name: the name of the database - :param override: should an existing file be overriden with a new one - """ - - save_as = DATABASES_DOWNLOAD_PATHS[database_name]["store_as"] - os_path = os.path.join(os.getcwd(), "..", "data", database_name, save_as) - - if os.path.exists(os_path) and override: - os.remove(os_path) - elif os.path.exists(os_path) and not override: - return - - url = DATABASES_DOWNLOAD_PATHS[database_name]["url"] - button_location = DATABASES_DOWNLOAD_PATHS[database_name]["button"] - clickable = DATABASES_DOWNLOAD_PATHS[database_name]["clickable"] - - firefox_options = webdriver.FirefoxOptions() - firefox_options.headless = True - firefox_options.add_argument('--headless') - firefox_options.set_preference("browser.download.folderList", 2) - firefox_options.set_preference("browser.download.manager.showWhenStarting", False) - firefox_options.set_preference("browser.download.dir", - os.path.join(os.getcwd(), - "..", - "data", - database_name)) - firefox_options.set_preference("browser.helperApps.neverAsk.saveToDisk", - "application/octet-stream") - - driver = webdriver.Firefox(options=firefox_options) - driver.get(url) - WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.XPATH, clickable))) - driver.execute_script(button_location) - - time.sleep(30) - driver.quit() - - list_of_files = glob.glob(os.path.join(os.getcwd(), "..", "data", database_name, '*')) - latest_file = max(list_of_files, key=os.path.getctime) - os.rename(latest_file, os_path) - - -def store_database_for_eys_gene(database_name, override=False): - """ - Calls a function to download a database. - :param database_name: the name of the database that should be downloaded - :param override: should be already existing file be overwritten - """ - database_name = database_name.lower() - if database_name not in DATABASES_DOWNLOAD_PATHS: - raise IndexError(f"Requested {database_name} database is not supported") - if database_name == "lovd": - download_lovd_database_for_eys_gene(override) - else: - download_database_for_eys_gene(database_name, override) - diff --git a/api/data/constants.py b/api/data/constants.py index 757074c..69e76d1 100644 --- a/api/data/constants.py +++ b/api/data/constants.py @@ -8,6 +8,7 @@ LOVD_FILE_URL = "https://databases.lovd.nl/shared/download/all/gene/" LOVD_FILE_URL_EYS = LOVD_FILE_URL + "EYS" STORE_AS_LOVD = "../data/lovd/lovd_data.txt" +STORE_AS_GNOMAD = "../data/gnomad/gnomad_data.csv" GNOMAD_URL = "https://gnomad.broadinstitute.org/gene" GNOMAD_URL_EYS = "https://gnomad.broadinstitute.org/gene/ENSG00000188107?dataset=gnomad_r4" @@ -25,6 +26,7 @@ LOVD_PATH = os.path.join(DATA_PATH, "lovd/") GNOMAD_PATH = os.path.join(DATA_PATH, "gnomad/") CLINVAR_PATH = os.path.join(DATA_PATH, "clinvar/") +DEFAULT_SAVE_PATH = os.path.join(DATA_PATH, "merged_data/") # variable data types LOVD_TABLES_DATA_TYPES = { diff --git a/api/data/downloading.py b/api/data/downloading.py new file mode 100644 index 0000000..c253f14 --- /dev/null +++ b/api/data/downloading.py @@ -0,0 +1,364 @@ +""" Module providing a functionality to collect data from various sources """ + +import glob +import logging +import os +import time + +import requests +import pandas as pd +from requests import RequestException + +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait + +from .constants import (LOVD_FILE_URL, + LOVD_PATH, + DATABASES_DOWNLOAD_PATHS, + LOVD_FILE_URL_EYS, + STORE_AS_LOVD, + STORE_AS_GNOMAD) + + +# EXCEPTIONS +class BadResponseException(Exception): + """Custom exception for bad responses.""" + + +class DownloadError(Exception): + """Custom exception for download errors.""" + + +def get_file_from_url(url, save_to, override=False): + """ + Gets file from url and saves it into provided path. Overrides, if override is True. + + :param str url: link with file + :param str save_to: path to save + :param bool override: needs override + """ + + # check if path is not directory + if os.path.isdir(save_to): + raise IsADirectoryError("Specified path is a directory, specify name of file") + + # check if directory exists, if not - create + directory = os.path.dirname(save_to) + if not os.path.exists(directory): + os.makedirs(directory) + logging.info("Created directory: %s", directory) + + # check if file exist and needs to override + if os.path.exists(save_to) and not override: + raise FileExistsError(f"The file at {save_to} already exists.") + + try: + response = requests.get(url, timeout=10) + except RequestException as e: + raise DownloadError(f"Error while downloading file from {url}") from e + + if response.status_code != 200: + raise BadResponseException(f"Bad response from {url}." + f" Status code: {response.status_code}") + + with open(save_to, "wb") as f: + f.write(response.content) + + +def download_lovd_database_for_eys_gene(save_to=STORE_AS_LOVD, override=False): + """ + Gets file from url and saves it into provided path. Overrides, if override is True. + + :param str save_to: path to save (default: 'data/lovd/lovd_eys.txt') + :param bool override: needs override + """ + + url = LOVD_FILE_URL_EYS + + # check if directory exists, if not - create + save_to_dir = os.path.dirname(save_to) + if not os.path.exists(save_to_dir): + os.makedirs(save_to_dir) + + # check if file exist and needs to override + if os.path.exists(save_to) and not override: + print(f"The file at {save_to} already exists.") + return + + try: + response = requests.get(url, timeout=10) + except RequestException as e: + raise DownloadError(f"Error while downloading file from {url}") from e + + if response.status_code != 200: + raise BadResponseException(f"Bad response from {url}." + f" Status code: {response.status_code}") + + with open(save_to, "wb") as f: + f.write(response.content) + + +def download_genes_lovd(gene_list: list, folder_path=LOVD_PATH, raise_exception=False): + """ + Downloads data into txt files from gene_list. + + :param list gene_list: list of gene's symbols + :param str folder_path: folder to save the data + :param bool raise_exception: raise exception if True, otherwise log + """ + + for gene in gene_list: + file_path = os.path.join(folder_path, gene + ".txt") + url = LOVD_FILE_URL + gene + try: + response = requests.get(url, timeout=10) + except RequestException as e: + raise DownloadError(f"Error while downloading file from {url}") from e + + if response.status_code != 200: + raise BadResponseException(f"Bad response from {url}." + f" Status code: {response.status_code}") + # If gene does not exist, the first word of the file will be Error + valid = 'Error' not in response.text[:6] + if valid: + get_file_from_url(url, file_path) + elif raise_exception: + raise ValueError(f"Symbol: {gene} does not exist in the LOVD database") + else: + logging.error("Symbol: %s does not exist in the LOVD database", gene) + + +def download_database_for_eys_gene(database_name, override=False): + """ + downloads chosen database + and handles where it should be saved, + renames the downloaded (latest) file to appropriate name + :param database_name: the name of the database + :param override: should an existing file be overriden with a new one + """ + + save_as = DATABASES_DOWNLOAD_PATHS[database_name]["store_as"] + os_path = os.path.join(os.getcwd(), "..", "data", database_name, save_as) + + if os.path.exists(os_path) and override: + os.remove(os_path) + elif os.path.exists(os_path) and not override: + return + + url = DATABASES_DOWNLOAD_PATHS[database_name]["url"] + button_location = DATABASES_DOWNLOAD_PATHS[database_name]["button"] + clickable = DATABASES_DOWNLOAD_PATHS[database_name]["clickable"] + + firefox_options = webdriver.FirefoxOptions() + firefox_options.headless = True + firefox_options.add_argument('--headless') + firefox_options.set_preference("browser.download.folderList", 2) + firefox_options.set_preference("browser.download.manager.showWhenStarting", False) + firefox_options.set_preference("browser.download.dir", + os.path.join(os.getcwd(), + "..", + "data", + database_name)) + firefox_options.set_preference("browser.helperApps.neverAsk.saveToDisk", + "application/octet-stream") + + driver = webdriver.Firefox(options=firefox_options) + driver.get(url) + WebDriverWait(driver, 30).until(EC.element_to_be_clickable((By.XPATH, clickable))) + driver.execute_script(button_location) + + time.sleep(30) + driver.quit() + + list_of_files = glob.glob(os.path.join(os.getcwd(), "..", "data", database_name, '*')) + latest_file = max(list_of_files, key=os.path.getctime) + os.rename(latest_file, os_path) + + +def download_selected_database_for_eys_gene(database_name, save_path="", override=False): + """ + Calls a function to download a database. + + :param database_name: the name of the database that should be downloaded + :param save_path: path to save the data + :param override: should be already existing file be overwritten + """ + if not isinstance(database_name, str): + raise TypeError("Database name should be a string") + + database_name = database_name.lower() + + # if save_path is not provided, save to default location + if database_name == "lovd" and save_path == "": + save_path = STORE_AS_LOVD + elif database_name == "gnomad" and save_path == "": + save_path = STORE_AS_GNOMAD + + # check if database_name is supported + if database_name not in DATABASES_DOWNLOAD_PATHS: + raise IndexError(f"Requested for {database_name} database is not supported") + + # download the database + if database_name == "lovd": + download_lovd_database_for_eys_gene(save_path, override) + elif database_name == "gnomad": + download_data_from_gnomad_eys(save_path, override) + else: + raise IndexError(f"Requested for {database_name} is not yet supported") + + +def prepare_popmax_calculation(df, pop_data, name, pop_ids, index): + """ + prepares the calculation of popmax and popmax population for a variant. + genome and exome data of ac and an. + + :param DataFrame df: DataFrame containing gnomAD data + :param dict pop_data: dictionary containing population data + :param str name: name of the population + :param list[str] pop_ids: list of population ids + :param int index: index of the variant + """ + + for pop_id in pop_ids: + df.loc[index, f'{name}_ac_{pop_id}'] = 0 + df.loc[index, f'{name}_an_{pop_id}'] = 0 + if isinstance(pop_data, list): + for pop in pop_data: + variant_id = pop['id'] + df.loc[index, f'{name}_ac_{variant_id}'] = pop['ac'] + df.loc[index, f'{name}_an_{variant_id}'] = pop['an'] + + +def download_data_from_gnomad_eys(path=STORE_AS_GNOMAD, override=False): + """ + Requests gnomAD API for data about a specific gene containing: + - variant_id + - cDNA change + - protein change + - allele frequency + - homozygote count + - popmax + - popmax population + + :param str path: path to save the data (default: 'data/gnomad/gnomad_eys.csv') + :param bool override: should an existing file be overriden with a new one + """ + + if os.path.exists(path) and not override: + print(f"The file at {path} already exists.") + logging.info("The file at %s already exists.", path) + return + + url = 'https://gnomad.broadinstitute.org/api' + query = f""" + query{{ + gene(gene_symbol: "EYS", reference_genome: GRCh38) {{ + variants(dataset: gnomad_r4) + {{ + variant_id + chrom + pos + ref + hgvsc + hgvsp + alt + exome {{ + ac + an + ac_hom + populations + {{ + id + ac + an + }} + }} + genome + {{ + ac + an + ac_hom + populations + {{ + id + ac + an + }} + }} + }} + }} + }} + """ + + response = requests.post(url, json={'query': query}, timeout=300) # timeout set to 5 minutes + + if response.status_code != 200: + if not os.path.isfile(path): + f = open('logs.txt', 'x') + f.write(response.text) + logging.error("Error while downloading data from gnomAD API. Check logs.txt for more information.") + else: + f = open('logs.txt', 'a') + f.write(response.text) + logging.error("Error while downloading data from gnomAD API. Check logs.txt for more information.") + + data = response.json()['data']['gene']['variants'] + + df = pd.json_normalize(data) + + df.loc[:, 'total_ac'] = df.loc[:, 'exome.ac'].fillna(0) + df.loc[:, 'genome.ac'].fillna(0) + df.loc[:, 'total_an'] = df.loc[:, 'exome.an'].fillna(0) + df.loc[:, 'genome.an'].fillna(0) + + df.loc[:, 'HGVS Consequence'] = df.loc[:, 'hgvsc'].fillna(0) # cDNA change + df.loc[:, 'Protein Consequence'] = df.loc[:, 'hgvsp'].fillna(0) # Protein change + + df.loc[:, 'Allele Frequency'] = df.loc[:, 'total_ac'] / df.loc[:, 'total_an'] + df.loc[:, 'Homozygote Count'] = df.loc[:, 'exome.ac_hom'].fillna(0) + df.loc[:, 'genome.ac_hom'].fillna(0) + exome_populations = df.loc[:, 'exome.populations'] + genome_populations = df.loc[:, 'genome.populations'] + population_ids = ['afr', 'eas', 'asj', 'sas', 'nfe', 'fin', 'mid', 'amr', 'ami', 'remaining'] + + for i in range(len(exome_populations)): + exome_pop = exome_populations[i] + prepare_popmax_calculation(df, exome_pop, 'exome', population_ids, i) + genome_pop = genome_populations[i] + prepare_popmax_calculation(df, genome_pop, 'genome', population_ids, i) + + for population_id in population_ids: + df.loc[:, f'Allele_Frequency_{population_id}'] = ( + (df.loc[:, f'exome_ac_{population_id}'].fillna(0) + df.loc[:, f'genome_ac_{population_id}'].fillna(0)) / + (df.loc[:, f'exome_an_{population_id}'].fillna(0) + df.loc[:, f'genome_an_{population_id}'].fillna(0))) + population_mapping = { + 'afr': 'African/African American', + 'eas': 'East Asian', + 'asj': 'Ashkenazi Jew', + 'sas': 'South Asian', + 'nfe': 'European (non-Finnish)', + 'fin': 'European (Finnish)', + 'mid': 'Middle Eastern', + 'amr': 'Admixed American', + 'ami': "Amish", + 'remaining': 'Remaining', + '': '' + } + + for i in range(df.shape[0]): + max_pop = 0 + max_id = '' + for population_id in population_ids: + if df.loc[i, f'Allele_Frequency_{population_id}'] > max_pop: + max_pop = df.loc[i, f'Allele_Frequency_{population_id}'] + max_id = population_id + df.loc[i, 'Popmax'] = max_pop + df.loc[i, 'Popmax population'] = population_mapping[max_id] + not_to_drop = ['Popmax', 'Popmax population', 'Homozygote Count', 'Allele Frequency', + 'variant_id', 'cDNA change', 'Protein change', 'gnomAD ID'] + + df.rename(columns={'variant_id': 'gnomAD ID'}) + + df = df.filter(not_to_drop, axis="columns") + + if not os.path.isfile(path) or override: + df.to_csv(path, index=False) \ No newline at end of file diff --git a/api/data/refactoring.py b/api/data/refactoring.py index 369c643..44d600c 100644 --- a/api/data/refactoring.py +++ b/api/data/refactoring.py @@ -4,15 +4,14 @@ import logging import re -import requests import pandas as pd from pandas import DataFrame from pyliftover import LiftOver -from .constants import LOVD_TABLES_DATA_TYPES, LOVD_PATH, GNOMAD_TABLES_DATA_TYPES, GNOMAD_PATH - +from .constants import LOVD_TABLES_DATA_TYPES, LOVD_PATH, GNOMAD_TABLES_DATA_TYPES, GNOMAD_PATH, \ + DEFAULT_SAVE_PATH def set_lovd_dtypes(df_dict): @@ -197,8 +196,9 @@ def lovd_fill_hg38(lovd: pd.DataFrame): return lovd.loc[:,'hg38_gnomad_format'] = lovd.loc[:,'VariantOnGenome/DNA/hg38'].replace('', pd.NA) missing_hg38_mask = lovd.loc[:,'hg38_gnomad_format'].isna() - lovd.loc[missing_hg38_mask, 'hg38_gnomad_format'] = lovd.loc[missing_hg38_mask, 'VariantOnGenome/DNA'].apply( - convert_hg19_if_missing) + lovd.loc[missing_hg38_mask, 'hg38_gnomad_format'] = (lovd.loc[missing_hg38_mask, + 'VariantOnGenome/DNA']. + apply(convert_hg19_if_missing)) lovd.loc[:,'hg38_gnomad_format'] = lovd.loc[:,'hg38_gnomad_format'].apply(convert_to_gnomad_gen) @@ -316,117 +316,12 @@ def save_lovd_as_vcf(data, save_to="./lovd.vcf"): f.write("\n") -def prepare_popmax_calculation(df, pop_data, name, pop_ids, index): - """ - prepares the calculation of popmax and popmax population for a variant. - genome and exome data of ac and an. - - :param DataFrame df: DataFrame containing gnomAD data - :param dict pop_data: dictionary containing population data - :param str name: name of the population - :param list[str] pop_ids: list of population ids - :param int index: index of the variant - """ - - for pop_id in pop_ids: - df.loc[index, f'{name}_ac_{pop_id}'] = 0 - df.loc[index, f'{name}_an_{pop_id}'] = 0 - if isinstance(pop_data, list): - for pop in pop_data: - variant_id = pop['id'] - df.loc[index, f'{name}_ac_{variant_id}'] = pop['ac'] - df.loc[index, f'{name}_an_{variant_id}'] = pop['an'] - - -def request_gnomad_api_data(gene_name): - """ - Requests gnomAD API for data about a specific gene containing: - - variant_id - - cDNA change - - protein change - - allele frequency - - homozygote count - - popmax - - popmax population - - :param str gene_name: name of gene - :param bool to_file: if True, saves data to variants.csv - :returns: DataFrame from gnomAD API - :rtype: DataFrame +def find_popmax_in_gnomad(data): """ - - url = 'https://gnomad.broadinstitute.org/api' - query = f""" - query{{ - gene(gene_symbol: "{gene_name}", reference_genome: GRCh38) {{ - variants(dataset: gnomad_r4) - {{ - variant_id - chrom - pos - ref - hgvsc - hgvsp - alt - exome {{ - ac - an - ac_hom - populations - {{ - id - ac - an - }} - }} - genome - {{ - ac - an - ac_hom - populations - {{ - id - ac - an - }} - }} - }} - }} - }} + Finds popmax in gnomad data + :param DataFrame data: Gnomad data. """ - response = requests.post(url, json={'query': query}, timeout=300) # timeout set to 5 minutes - - if response.status_code != 200: - print('Error:', response.status_code) - - data = response.json()['data']['gene']['variants'] - - df = pd.json_normalize(data) - - df.loc[:, 'total_ac'] = df.loc[:, 'exome.ac'].fillna(0) + df.loc[:, 'genome.ac'].fillna(0) - df.loc[:, 'total_an'] = df.loc[:, 'exome.an'].fillna(0) + df.loc[:, 'genome.an'].fillna(0) - - df.loc[:, 'HGVS Consequence'] = df.loc[:, 'hgvsc'].fillna(0) # cDNA change - df.loc[:, 'Protein Consequence'] = df.loc[:, 'hgvsp'].fillna(0) # Protein change - - df.loc[:, 'Allele Frequency'] = df.loc[:, 'total_ac'] / df.loc[:, 'total_an'] - df.loc[:, 'Homozygote Count'] = df.loc[:, 'exome.ac_hom'].fillna(0) + df.loc[:, 'genome.ac_hom'].fillna(0) - exome_populations = df.loc[:, 'exome.populations'] - genome_populations = df.loc[:, 'genome.populations'] - population_ids = ['afr', 'eas', 'asj', 'sas', 'nfe', 'fin', 'mid', 'amr', 'ami', 'remaining'] - - for i in range(len(exome_populations)): - exome_pop = exome_populations[i] - prepare_popmax_calculation(df, exome_pop, 'exome', population_ids, i) - genome_pop = genome_populations[i] - prepare_popmax_calculation(df, genome_pop, 'genome', population_ids, i) - - for population_id in population_ids: - df.loc[:, f'Allele_Frequency_{population_id}'] = ( - (df.loc[:, f'exome_ac_{population_id}'].fillna(0) + df.loc[:, f'genome_ac_{population_id}'].fillna(0)) / ( - df.loc[:, f'exome_an_{population_id}'].fillna(0) + df.loc[:, f'genome_an_{population_id}'].fillna(0))) population_mapping = { 'afr': 'African/African American', 'eas': 'East Asian', @@ -440,21 +335,14 @@ def request_gnomad_api_data(gene_name): 'remaining': 'Remaining', '': '' } + population_ids = ['afr', 'eas', 'asj', 'sas', 'nfe', 'fin', 'mid', 'amr', 'ami', 'remaining'] - for i in range(df.shape[0]): + for i in range(data.shape[0]): max_pop = 0 max_id = '' for population_id in population_ids: - if df.loc[i, f'Allele_Frequency_{population_id}'] > max_pop: - max_pop = df.loc[i, f'Allele_Frequency_{population_id}'] + if data.loc[i, f'Allele_Frequency_{population_id}'] > max_pop: + max_pop = data.loc[i, f'Allele_Frequency_{population_id}'] max_id = population_id - df.loc[i, 'Popmax'] = max_pop - df.loc[i, 'Popmax population'] = population_mapping[max_id] - not_to_drop = ['Popmax', 'Popmax population', 'Homozygote Count', 'Allele Frequency', - 'variant_id', 'cDNA change', 'Protein change'] - - df = df.filter(not_to_drop, axis="columns") - - df.rename(columns={'variant_id': 'gnomAD ID'}) - - return df + data.loc[i, 'Popmax'] = max_pop + data.loc[i, 'Popmax population'] = population_mapping[max_id] diff --git a/api/tools/__init__.py b/api/tools/__init__.py index f8d75a8..e69de29 100644 --- a/api/tools/__init__.py +++ b/api/tools/__init__.py @@ -1,7 +0,0 @@ -""" -This module provides access to the tools for fetching scores for genetic variants. -""" - -from .revel.revel import ( - get_revel_scores -) diff --git a/api/tools/revel/revel.py b/api/tools/revel/revel.py index c44e2d0..b9f274b 100644 --- a/api/tools/revel/revel.py +++ b/api/tools/revel/revel.py @@ -1,7 +1,10 @@ -""" Retrieves REVEL scores for specific variants based on chromosome and position from a CSV file. """ +""" +Retrieves REVEL scores for specific variants based on chromosomeand position from a CSV file. +""" + -import pandas as pd import os +import pandas as pd current_script_dir = os.path.dirname(os.path.abspath(__file__)) revel_file = os.path.join(current_script_dir, 'revel_with_transcript_ids') @@ -16,7 +19,7 @@ def get_revel_scores(chromosome, position): """ variants = [] revel_data = pd.read_csv(revel_file) - + variants = revel_data[ (revel_data['chr'] == chromosome) & (revel_data['hg19_pos'] == position) diff --git a/app/back-end/__init__.py b/app/back-end/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/back-end/src/events/workspace_export_event.py b/app/back-end/src/events/workspace_export_event.py index cd14f3d..73d2836 100644 --- a/app/back-end/src/events/workspace_export_event.py +++ b/app/back-end/src/events/workspace_export_event.py @@ -69,7 +69,8 @@ def handle_workspace_export_feedback(data): if data["status"] == "success": socketio_emit_to_user_session( CONSOLE_FEEDBACK_EVENT, - {"type": "succ", "message": f"File '{data['filePath']}' export was completed successfully."}, + {"type": "succ", + "message": f"File '{data['filePath']}' export was completed successfully."}, data["uuid"], data["sid"], ) diff --git a/app/back-end/src/routes/workspace_merge_route.py b/app/back-end/src/routes/workspace_merge_route.py index 6de3f6c..4769a02 100644 --- a/app/back-end/src/routes/workspace_merge_route.py +++ b/app/back-end/src/routes/workspace_merge_route.py @@ -8,6 +8,8 @@ import os import time # TODO: Remove this import once the merge logic is implemented + +import pandas as pd from flask import Blueprint, request, jsonify from src.setup.extensions import logger @@ -20,6 +22,9 @@ WORKSPACE_UPDATE_FEEDBACK_EVENT, ) +from api import set_lovd_dtypes, parse_lovd +from api.data import merge_gnomad_lovd, set_gnomad_dtypes, parse_gnomad + workspace_merge_route_bp = Blueprint("workspace_merge_route", __name__) @@ -85,8 +90,41 @@ def get_workspace_merge_lovd_gnomad(relative_path): # [destination_path, override, lovd_file, gnomad_file] # - # TODO: Remove this sleep statement once the merge logic is implemented - time.sleep(1) # Simulate a delay for the merge process + if os.path.exists(destination_path) and not override: + return + + if not os.path.exists(destination_path): + os.makedirs(destination_path) + + if not os.path.exists(lovd_file): + raise FileNotFoundError(f"LOVD data file not found at: {lovd_file}") + + if not os.path.exists(gnomad_file): + raise FileNotFoundError(f"gnomAD data file not found at: {gnomad_file}") + + lovd_data = parse_lovd(lovd_file) + gnomad_data = parse_gnomad(gnomad_file) + + set_lovd_dtypes(lovd_data) + set_gnomad_dtypes(gnomad_data) + + # Extract "Variants_On_Genome" and merge it with "Variants_On_Transcripts" + variants_on_genome = lovd_data["Variants_On_Genome"].copy() + gnomad_data = gnomad_data.copy() + + lovd_data = pd.merge( + lovd_data["Variants_On_Transcripts"], + variants_on_genome[['id', 'VariantOnGenome/DNA', 'VariantOnGenome/DNA/hg38']], + on='id', + how='left' + ) + + final_data = merge_gnomad_lovd(lovd_data, gnomad_data) + + try: + final_data.to_csv(destination_path) + except OSError as e: + raise RuntimeError(f"Error saving file: {e}") # Emit a feedback to the user's console socketio_emit_to_user_session( diff --git a/app/front-end/README.md b/app/front-end/README.md index 8349c4a..2eb749e 100644 --- a/app/front-end/README.md +++ b/app/front-end/README.md @@ -1 +1,53 @@ -# Coming Soon! +# Development Server Setup Guide + +This guide provides instructions on setting up and running a Vite-based development server using React and TypeScript. + +## Prerequisites + +1. **Node.js** +2. **Visual Studio Code (VS Code)** + +## Step 1: Install Node.js and npm + +1. **Check Node.js version:**: + ```bash + node -v + ``` + + Ensure you have Node.js installed. You can download it from [here](https://nodejs.org/). + +2. **Check npm version:**: + ```bash + npm -v + ``` + + If npm is not installed, it will be installed automatically with Node.js. + +## Step 2: Set Up Your Development Environment + +1. **Open VS Code:**: + - Install Visual Studio Code if you haven’t already. Download it from [here](https://code.visualstudio.com/). + +2. **Open Your Project Folder:** + - In the VS Code window, open the Command Palette again (`Ctrl+Shift+P`). + - Type `>File: Open Folder...` and navigate to your project folder located on Windows. Open whole github repository (root). + +3. **Navigate to front-end application:** + - Press `` Ctrl+Shift+` `` to open the New Terminal. Then navigate to: + + ```bash + cd app/front-end + ``` + +4. **Install Dependencies:** + + ```bash + npm install + ``` + +## Step 3: Run the Development Server + ```bash + npm run dev + ``` + + This will run the React application with Vite on `http://localhost:5173/`. To shutdown the application press `Ctrl+C` in VS Code terminal. \ No newline at end of file diff --git a/app/front-end/src/features/editor/components/editorView/editorConfirmLeave.tsx b/app/front-end/src/features/editor/components/editorView/editorConfirmLeave.tsx new file mode 100644 index 0000000..a128ef5 --- /dev/null +++ b/app/front-end/src/features/editor/components/editorView/editorConfirmLeave.tsx @@ -0,0 +1,83 @@ +import { FileTreeItemContextMenuStyledDialog } from '@/features/editor/components/fileTreeView/fileTreeItem'; +import { useStatusContext } from '@/hooks'; +import { Close as CloseIcon } from '@mui/icons-material'; +import { + Box, + Button, + DialogActions, + DialogContent, + DialogTitle, + Grid, + IconButton, + Typography, + useTheme, +} from '@mui/material'; +import { useCallback } from 'react'; + +interface EditorConfirmLeaveDialogProps { + onConfirm: () => void; + isOpen: boolean; + onClose: () => void; +} + +export const EditorConfirmLeave: React.FC = ({ onConfirm, isOpen, onClose }) => { + const { unsavedStateUpdate } = useStatusContext(); + const Theme = useTheme(); + + const handleConfirm = useCallback(() => { + unsavedStateUpdate(false); + onConfirm(); + onClose(); + }, [onConfirm, onClose, unsavedStateUpdate]); + + return ( + + + + + Unsaved changes + + + + + + + + + + + + + You have unsaved changes. If you continue, your changes will be lost.
+ Do you wish to continue? +
+
+ + + + +
+ ); +}; diff --git a/app/front-end/src/features/editor/components/editorView/editorToolbar.tsx b/app/front-end/src/features/editor/components/editorView/editorToolbar.tsx index ad2eb32..da684c0 100644 --- a/app/front-end/src/features/editor/components/editorView/editorToolbar.tsx +++ b/app/front-end/src/features/editor/components/editorView/editorToolbar.tsx @@ -2,7 +2,7 @@ import { useStatusContext } from '@/hooks'; import { socket } from '@/lib'; import { Events } from '@/types'; import { Done as DoneIcon, Error as ErrorIcon } from '@mui/icons-material'; -import { Box, Button, CircularProgress, useTheme } from '@mui/material'; +import { alpha, Box, Button, CircularProgress, Typography, useTheme } from '@mui/material'; import { GridToolbarColumnsButton, GridToolbarContainer, @@ -52,7 +52,7 @@ export const EditorToolbar: React.FC = ({ handleSave }) => { const [saveStatus, setSaveStatus] = useState(true); const Theme = useTheme(); - const { blocked } = useStatusContext(); + const { blocked, unsavedStateUpdate, unsaved } = useStatusContext(); useEffect(() => { const handleWorkspaceFileSaveFeedback = (data: { status: 'success' | 'error' }) => { @@ -73,10 +73,17 @@ export const EditorToolbar: React.FC = ({ handleSave }) => { {/* */} + {unsaved && ( + + Changes not saved + + )} +