diff --git a/api/data/constants.py b/api/data/constants.py index fa0060f..69e76d1 100644 --- a/api/data/constants.py +++ b/api/data/constants.py @@ -26,6 +26,7 @@ LOVD_PATH = os.path.join(DATA_PATH, "lovd/") GNOMAD_PATH = os.path.join(DATA_PATH, "gnomad/") CLINVAR_PATH = os.path.join(DATA_PATH, "clinvar/") +DEFAULT_SAVE_PATH = os.path.join(DATA_PATH, "merged_data/") # variable data types LOVD_TABLES_DATA_TYPES = { diff --git a/api/data/refactoring.py b/api/data/refactoring.py index b93ccd7..44d600c 100644 --- a/api/data/refactoring.py +++ b/api/data/refactoring.py @@ -4,15 +4,14 @@ import logging import re -import requests import pandas as pd from pandas import DataFrame from pyliftover import LiftOver -from .constants import LOVD_TABLES_DATA_TYPES, LOVD_PATH, GNOMAD_TABLES_DATA_TYPES, GNOMAD_PATH - +from .constants import LOVD_TABLES_DATA_TYPES, LOVD_PATH, GNOMAD_TABLES_DATA_TYPES, GNOMAD_PATH, \ + DEFAULT_SAVE_PATH def set_lovd_dtypes(df_dict): @@ -347,4 +346,3 @@ def find_popmax_in_gnomad(data): max_id = population_id data.loc[i, 'Popmax'] = max_pop data.loc[i, 'Popmax population'] = population_mapping[max_id] - diff --git a/app/back-end/src/routes/workspace_merge_route.py b/app/back-end/src/routes/workspace_merge_route.py index 6de3f6c..4769a02 100644 --- a/app/back-end/src/routes/workspace_merge_route.py +++ b/app/back-end/src/routes/workspace_merge_route.py @@ -8,6 +8,8 @@ import os import time # TODO: Remove this import once the merge logic is implemented + +import pandas as pd from flask import Blueprint, request, jsonify from src.setup.extensions import logger @@ -20,6 +22,9 @@ WORKSPACE_UPDATE_FEEDBACK_EVENT, ) +from api import set_lovd_dtypes, parse_lovd +from api.data import merge_gnomad_lovd, set_gnomad_dtypes, parse_gnomad + workspace_merge_route_bp = Blueprint("workspace_merge_route", __name__) @@ -85,8 +90,41 @@ def get_workspace_merge_lovd_gnomad(relative_path): # [destination_path, override, lovd_file, gnomad_file] # - # TODO: Remove this sleep statement once the merge logic is implemented - time.sleep(1) # Simulate a delay for the merge process + if os.path.exists(destination_path) and not override: + return + + if not os.path.exists(destination_path): + os.makedirs(destination_path) + + if not os.path.exists(lovd_file): + raise FileNotFoundError(f"LOVD data file not found at: {lovd_file}") + + if not os.path.exists(gnomad_file): + raise FileNotFoundError(f"gnomAD data file not found at: {gnomad_file}") + + lovd_data = parse_lovd(lovd_file) + gnomad_data = parse_gnomad(gnomad_file) + + set_lovd_dtypes(lovd_data) + set_gnomad_dtypes(gnomad_data) + + # Extract "Variants_On_Genome" and merge it with "Variants_On_Transcripts" + variants_on_genome = lovd_data["Variants_On_Genome"].copy() + gnomad_data = gnomad_data.copy() + + lovd_data = pd.merge( + lovd_data["Variants_On_Transcripts"], + variants_on_genome[['id', 'VariantOnGenome/DNA', 'VariantOnGenome/DNA/hg38']], + on='id', + how='left' + ) + + final_data = merge_gnomad_lovd(lovd_data, gnomad_data) + + try: + final_data.to_csv(destination_path) + except OSError as e: + raise RuntimeError(f"Error saving file: {e}") # Emit a feedback to the user's console socketio_emit_to_user_session(