Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VLE/KBE-34/routing_of_merging_data #63

Merged
merged 9 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 0 additions & 20 deletions api/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,26 +33,6 @@
GNOMAD_PATH,
)

# DATA COLLECTION IMPORT
from .collection import (
# Custom exceptions
BadResponseException,
DownloadError,

# Custom utility functions
get_file_from_url,

# Functions for downloading databases
download_lovd_database_for_eys_gene,
download_genes_lovd,
download_database_for_eys_gene,
download_data_from_gnomad_eys,

# Functions for storing databases
store_database_for_eys_gene

)

# DATA REFACTORING IMPORT
from .refactoring import (
# Functions for refactoring data
Expand Down
1 change: 1 addition & 0 deletions api/data/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
LOVD_PATH = os.path.join(DATA_PATH, "lovd/")
GNOMAD_PATH = os.path.join(DATA_PATH, "gnomad/")
CLINVAR_PATH = os.path.join(DATA_PATH, "clinvar/")
DEFAULT_SAVE_PATH = os.path.join(DATA_PATH, "merged_data/")

# variable data types
LOVD_TABLES_DATA_TYPES = {
Expand Down
6 changes: 2 additions & 4 deletions api/data/refactoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@
import logging
import re

import requests

import pandas as pd
from pandas import DataFrame

from pyliftover import LiftOver

from .constants import LOVD_TABLES_DATA_TYPES, LOVD_PATH, GNOMAD_TABLES_DATA_TYPES, GNOMAD_PATH

from .constants import LOVD_TABLES_DATA_TYPES, LOVD_PATH, GNOMAD_TABLES_DATA_TYPES, GNOMAD_PATH, \
DEFAULT_SAVE_PATH


def set_lovd_dtypes(df_dict):
Expand Down Expand Up @@ -347,4 +346,3 @@ def find_popmax_in_gnomad(data):
max_id = population_id
data.loc[i, 'Popmax'] = max_pop
data.loc[i, 'Popmax population'] = population_mapping[max_id]

42 changes: 40 additions & 2 deletions app/back-end/src/routes/workspace_merge_route.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

import os
import time # TODO: Remove this import once the merge logic is implemented

import pandas as pd
from flask import Blueprint, request, jsonify

from src.setup.extensions import logger
Expand All @@ -20,6 +22,9 @@
WORKSPACE_UPDATE_FEEDBACK_EVENT,
)

from api import set_lovd_dtypes, parse_lovd
from api.data import merge_gnomad_lovd, set_gnomad_dtypes, parse_gnomad

workspace_merge_route_bp = Blueprint("workspace_merge_route", __name__)


Expand Down Expand Up @@ -85,8 +90,41 @@ def get_workspace_merge_lovd_gnomad(relative_path):
# [destination_path, override, lovd_file, gnomad_file]
#

# TODO: Remove this sleep statement once the merge logic is implemented
time.sleep(1) # Simulate a delay for the merge process
if os.path.exists(destination_path) and not override:
return

if not os.path.exists(destination_path):
os.makedirs(destination_path)

if not os.path.exists(lovd_file):
raise FileNotFoundError(f"LOVD data file not found at: {lovd_file}")

if not os.path.exists(gnomad_file):
raise FileNotFoundError(f"gnomAD data file not found at: {gnomad_file}")

lovd_data = parse_lovd(lovd_file)
gnomad_data = parse_gnomad(gnomad_file)

set_lovd_dtypes(lovd_data)
set_gnomad_dtypes(gnomad_data)

# Extract "Variants_On_Genome" and merge it with "Variants_On_Transcripts"
variants_on_genome = lovd_data["Variants_On_Genome"].copy()
gnomad_data = gnomad_data.copy()

lovd_data = pd.merge(
lovd_data["Variants_On_Transcripts"],
variants_on_genome[['id', 'VariantOnGenome/DNA', 'VariantOnGenome/DNA/hg38']],
on='id',
how='left'
)

final_data = merge_gnomad_lovd(lovd_data, gnomad_data)

try:
final_data.to_csv(destination_path)
except OSError as e:
raise RuntimeError(f"Error saving file: {e}")

# Emit a feedback to the user's console
socketio_emit_to_user_session(
Expand Down
Loading
Loading