Skip to content

Commit

Permalink
move content of routing merge to app/back-end/src/routes/workspace_me…
Browse files Browse the repository at this point in the history
…rge_route
  • Loading branch information
Akaud committed Sep 25, 2024
1 parent 2402ea6 commit 1b6ac7d
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 95 deletions.
53 changes: 0 additions & 53 deletions api/data/refactoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,56 +346,3 @@ def find_popmax_in_gnomad(data):
max_id = population_id
data.loc[i, 'Popmax'] = max_pop
data.loc[i, 'Popmax population'] = population_mapping[max_id]


def routing_merge(lovd_path:str=LOVD_PATH,
gnomad_path:str=GNOMAD_PATH,
save_path:str=DEFAULT_SAVE_PATH,
overwrite:bool=False):
"""
Merges data from provided paths and saves to new location
:param overwrite: does file requires overwriting
:param lovd_path: path to LOVD dataframe
:param gnomad_path: path to gnomAD dataframe
:param save_path: path where to save merged data
:return: None
"""

save_as = os.path.join(save_path, "lovd_gnomad.csv")

if os.path.exists(save_as) and not overwrite:
return

if not os.path.exists(save_path):
os.makedirs(save_path)

if not os.path.exists(os.path.join(lovd_path, "lovd_data.txt")):
raise FileNotFoundError(f"LOVD data file not found at: {lovd_path}")

if not os.path.exists(os.path.join(gnomad_path, "gnomad_data.csv")):
raise FileNotFoundError(f"gnomAD data file not found at: {gnomad_path}")

lovd_data = parse_lovd(lovd_path + "/lovd_data.txt")
gnomad_data = parse_gnomad(gnomad_path + '/gnomad_data.csv')

set_lovd_dtypes(lovd_data)
set_gnomad_dtypes(gnomad_data)

# Extract "Variants_On_Genome" and merge it with "Variants_On_Transcripts"
variants_on_genome = lovd_data["Variants_On_Genome"].copy()
gnomad_data = gnomad_data.copy()

lovd_data = pd.merge(
lovd_data["Variants_On_Transcripts"],
variants_on_genome[['id', 'VariantOnGenome/DNA', 'VariantOnGenome/DNA/hg38']],
on='id',
how='left'
)

final_data = merge_gnomad_lovd(lovd_data, gnomad_data)

try:
final_data.to_csv(save_as)
print(f"Merged data saved to {save_path}")
except OSError as e:
print(f"Error saving file: {e}")
41 changes: 41 additions & 0 deletions app/back-end/src/routes/workspace_merge_route.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

import os
import time # TODO: Remove this import once the merge logic is implemented

import pandas as pd
from flask import Blueprint, request, jsonify

from src.setup.extensions import logger
Expand All @@ -20,6 +22,9 @@
WORKSPACE_UPDATE_FEEDBACK_EVENT,
)

from api import set_lovd_dtypes, parse_lovd
from api.data import merge_gnomad_lovd, set_gnomad_dtypes, parse_gnomad

workspace_merge_route_bp = Blueprint("workspace_merge_route", __name__)


Expand Down Expand Up @@ -85,6 +90,42 @@ def get_workspace_merge_lovd_gnomad(relative_path):
# [destination_path, override, lovd_file, gnomad_file]
#

if os.path.exists(destination_path) and not override:
return

if not os.path.exists(destination_path):
os.makedirs(destination_path)

if not os.path.exists(lovd_file):
raise FileNotFoundError(f"LOVD data file not found at: {lovd_file}")

if not os.path.exists(gnomad_file):
raise FileNotFoundError(f"gnomAD data file not found at: {gnomad_file}")

lovd_data = parse_lovd(lovd_file)
gnomad_data = parse_gnomad(gnomad_file)

set_lovd_dtypes(lovd_data)
set_gnomad_dtypes(gnomad_data)

# Extract "Variants_On_Genome" and merge it with "Variants_On_Transcripts"
variants_on_genome = lovd_data["Variants_On_Genome"].copy()
gnomad_data = gnomad_data.copy()

lovd_data = pd.merge(
lovd_data["Variants_On_Transcripts"],
variants_on_genome[['id', 'VariantOnGenome/DNA', 'VariantOnGenome/DNA/hg38']],
on='id',
how='left'
)

final_data = merge_gnomad_lovd(lovd_data, gnomad_data)

try:
final_data.to_csv(destination_path)
except OSError as e:
raise RuntimeError(f"Error saving file: {e}")

# TODO: Remove this sleep statement once the merge logic is implemented
time.sleep(1) # Simulate a delay for the merge process

Expand Down
42 changes: 0 additions & 42 deletions tests/pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2181,48 +2181,6 @@
],
"id": "d86fa6b925aea085",
"execution_count": null
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-22T13:05:20.884141Z",
"start_time": "2024-09-22T13:05:19.105877Z"
}
},
"cell_type": "code",
"source": [
"from api.data.refactoring import routing_merge\n",
"routing_merge()"
],
"id": "29ecf5e58e3d53e4",
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-22T13:03:46.634257Z",
"start_time": "2024-09-22T13:02:58.726380Z"
}
},
"cell_type": "code",
"source": [
"from api import store_database_for_eys_gene\n",
"\n",
"store_database_for_eys_gene('lovd', False)\n",
"store_database_for_eys_gene('gnomad', False)"
],
"id": "b5eedffd56faee1d",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The file at ../data/lovd/lovd_data.txt already exists.\n"
]
}
],
"execution_count": 1
}
],
"metadata": {
Expand Down

0 comments on commit 1b6ac7d

Please sign in to comment.