Skip to content

Commit

Permalink
Merge pull request #63 from Strexas/VLE/KBE-34/routing_of_merging_data
Browse files Browse the repository at this point in the history
VLE/KBE-34/routing_of_merging_data
  • Loading branch information
Akaud authored Sep 30, 2024
2 parents 3f818ab + c0c963a commit 925c3df
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 6 deletions.
1 change: 1 addition & 0 deletions api/data/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
LOVD_PATH = os.path.join(DATA_PATH, "lovd/")
GNOMAD_PATH = os.path.join(DATA_PATH, "gnomad/")
CLINVAR_PATH = os.path.join(DATA_PATH, "clinvar/")
DEFAULT_SAVE_PATH = os.path.join(DATA_PATH, "merged_data/")

# variable data types
LOVD_TABLES_DATA_TYPES = {
Expand Down
6 changes: 2 additions & 4 deletions api/data/refactoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@
import logging
import re

import requests

import pandas as pd
from pandas import DataFrame

from pyliftover import LiftOver

from .constants import LOVD_TABLES_DATA_TYPES, LOVD_PATH, GNOMAD_TABLES_DATA_TYPES, GNOMAD_PATH

from .constants import LOVD_TABLES_DATA_TYPES, LOVD_PATH, GNOMAD_TABLES_DATA_TYPES, GNOMAD_PATH, \
DEFAULT_SAVE_PATH


def set_lovd_dtypes(df_dict):
Expand Down Expand Up @@ -347,4 +346,3 @@ def find_popmax_in_gnomad(data):
max_id = population_id
data.loc[i, 'Popmax'] = max_pop
data.loc[i, 'Popmax population'] = population_mapping[max_id]

42 changes: 40 additions & 2 deletions app/back-end/src/routes/workspace_merge_route.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

import os
import time # TODO: Remove this import once the merge logic is implemented

import pandas as pd
from flask import Blueprint, request, jsonify

from src.setup.extensions import logger
Expand All @@ -20,6 +22,9 @@
WORKSPACE_UPDATE_FEEDBACK_EVENT,
)

from api import set_lovd_dtypes, parse_lovd
from api.data import merge_gnomad_lovd, set_gnomad_dtypes, parse_gnomad

workspace_merge_route_bp = Blueprint("workspace_merge_route", __name__)


Expand Down Expand Up @@ -85,8 +90,41 @@ def get_workspace_merge_lovd_gnomad(relative_path):
# [destination_path, override, lovd_file, gnomad_file]
#

# TODO: Remove this sleep statement once the merge logic is implemented
time.sleep(1) # Simulate a delay for the merge process
if os.path.exists(destination_path) and not override:
return

if not os.path.exists(destination_path):
os.makedirs(destination_path)

if not os.path.exists(lovd_file):
raise FileNotFoundError(f"LOVD data file not found at: {lovd_file}")

if not os.path.exists(gnomad_file):
raise FileNotFoundError(f"gnomAD data file not found at: {gnomad_file}")

lovd_data = parse_lovd(lovd_file)
gnomad_data = parse_gnomad(gnomad_file)

set_lovd_dtypes(lovd_data)
set_gnomad_dtypes(gnomad_data)

# Extract "Variants_On_Genome" and merge it with "Variants_On_Transcripts"
variants_on_genome = lovd_data["Variants_On_Genome"].copy()
gnomad_data = gnomad_data.copy()

lovd_data = pd.merge(
lovd_data["Variants_On_Transcripts"],
variants_on_genome[['id', 'VariantOnGenome/DNA', 'VariantOnGenome/DNA/hg38']],
on='id',
how='left'
)

final_data = merge_gnomad_lovd(lovd_data, gnomad_data)

try:
final_data.to_csv(destination_path)
except OSError as e:
raise RuntimeError(f"Error saving file: {e}")

# Emit a feedback to the user's console
socketio_emit_to_user_session(
Expand Down

0 comments on commit 925c3df

Please sign in to comment.