From 929d1376b0f69c50d1e315c83cb2db1d860db409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kajus=20=C4=8Cerniauskas?= Date: Wed, 31 Jul 2024 17:47:02 +0300 Subject: [PATCH 01/72] implemented a method request gene information by its id, forms it to dataframe. --- api/__init__.py | 4 +- api/data/__init__.py | 60 + api/data/refactoring.py | 86 ++ tests/pipeline.ipynb | 3245 ++++++++++++++------------------------- 4 files changed, 1338 insertions(+), 2057 deletions(-) diff --git a/api/__init__.py b/api/__init__.py index 940f6f3..f54edfc 100644 --- a/api/__init__.py +++ b/api/__init__.py @@ -54,5 +54,7 @@ # Functions for refactoring data set_lovd_dtypes, parse_lovd, - from_clinvar_name_to_cdna_position + from_clinvar_name_to_cdna_position, + save_lovd_as_vcf, + request_clinvar_api_data ) diff --git a/api/data/__init__.py b/api/data/__init__.py index e69de29..001190d 100644 --- a/api/data/__init__.py +++ b/api/data/__init__.py @@ -0,0 +1,60 @@ +""" +Package for data collection purposes provides both collection and refactoring functionality. + +Data from LOVD, ClinVar and GnomAd databases can be downloaded using this package. GnomAd and +ClinVar are limited with EYS gene, but it is possible to download data for any gene in LOVD. + +All necessary functionality can be imported directly from data without +specifying the module. + +data collection pipeline example is established for project's specific usage. +""" + +# CONSTANTS IMPORT +from .constants import ( + # URLs for LOVD database + LOVD_URL, LOVD_URL_EYS, LOVD_FILE_URL, LOVD_FILE_URL_EYS, + + # URLs for gnomAD database + GNOMAD_URL, GNOMAD_URL_EYS, GNOMAD_FILE_URL_EYS, + + # URLs for ClinVar database + CLINVAR_URL, CLINVAR_URL_EYS, CLINVAR_FILE_URL_EYS, + + # Paths for data storage + DATA_PATH, LOVD_PATH, GNOMAD_PATH, CLINVAR_PATH, + + # Data types for tables + LOVD_TABLES_DATA_TYPES, + + # Paths for database downloads + DATABASES_DOWNLOAD_PATHS +) + +# DATA COLLECTION IMPORT +from .collection import ( + # Custom exceptions + BadResponseException, + DownloadError, + + # Custom utility functions + get_file_from_url, + + # Functions for downloading databases + download_lovd_database_for_eys_gene, + download_genes_lovd, + download_database_for_eys_gene, + + # Functions for storing databases + store_database_for_eys_gene +) + +# DATA REFACTORING IMPORT +from .refactoring import ( + # Functions for refactoring data + set_lovd_dtypes, + parse_lovd, + from_clinvar_name_to_cdna_position, + save_lovd_as_vcf, + request_clinvar_api_data, +) diff --git a/api/data/refactoring.py b/api/data/refactoring.py index efcf1f1..f7142a3 100644 --- a/api/data/refactoring.py +++ b/api/data/refactoring.py @@ -3,11 +3,15 @@ import os import logging +import pandas +import requests + import pandas as pd from pandas import DataFrame from .constants import LOVD_TABLES_DATA_TYPES, LOVD_PATH + def set_lovd_dtypes(df_dict): """ Convert data from LOVD format table to desired data format based on specified data types. @@ -154,3 +158,85 @@ def save_lovd_as_vcf(data, save_to="./lovd.vcf"): f.write("\t".join(record)) f.write("\n") + + +def request_clinvar_api_data(gene_id: str): + """ + Requests ClinVar API for data about variant with given id.\n + Converts it to pandas dataframe. + + :param str gene_id: id of variant (may be multiple) + :returns: dataframe from ClinVar API + :rtype: dataframe + """ + + path = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=clinvar&id={gene_id}&retmode=json" + + request = requests.get(path) + + if request.status_code != 200: + raise ValueError(f"Request failed with status code {request.status_code}") + + data = request.json() + + # Extract the 'result' part of the JSON + results = data['result'] + + # Extract the 'uids' part of the JSON + flattened_data = [] + + for uid in results['uids']: + entry = results[uid] + + # Using pd.json_normalize to flatten the JSON data + flattened_entry = pd.json_normalize(entry, sep='_') + + flattened_variation_set = pd.json_normalize(flattened_entry['variation_set'][0], sep='_') + flattened_variation_xrefs = pd.json_normalize(flattened_variation_set['variation_xrefs'][0], sep='_') + flattened_variation_loc0 = pd.json_normalize(flattened_variation_set['variation_loc'][0][0], + sep='_') # 1/2 frames + flattened_variation_loc0 = flattened_variation_loc0.add_prefix('0_') + flattened_variation_loc1 = pd.json_normalize(flattened_variation_set['variation_loc'][0][1], + sep='_') # 2/2 frames + flattened_variation_loc1 = flattened_variation_loc1.add_prefix('1_') + flattened_allele_freq_set = pd.json_normalize(flattened_variation_set['allele_freq_set'][0], sep='_') + + flattened_genes0 = pd.json_normalize(flattened_entry['genes'][0][0], sep='_') # 1/2 frames + flattened_genes0 = flattened_genes0.add_prefix('0_') + flattened_genes1 = pd.json_normalize(flattened_entry['genes'][0][1], sep='_') # 2/2 frames + flattened_genes1 = flattened_genes1.add_prefix('1_') + + flattened_germline_classification_trait_set = pd.json_normalize( + flattened_entry['germline_classification_trait_set'][0], sep='_') + flattened_trait_xrefs = pd.json_normalize(flattened_germline_classification_trait_set['trait_xrefs'][0], + sep='_') + + # dropping extracted nests + flattened_entry = flattened_entry.drop(columns=['variation_set', 'genes', 'germline_classification_trait_set'], + axis=1) + flattened_variation_set = flattened_variation_set.drop( + columns=['variation_xrefs', 'variation_loc', 'allele_freq_set'], axis=1) + flattened_germline_classification_trait_set = flattened_germline_classification_trait_set.drop( + columns=['trait_xrefs'], axis=1) + + # adding extracted nests to the frames + flattened_germline_classification_trait_set = pd.concat( + [flattened_germline_classification_trait_set, flattened_trait_xrefs], axis=1) + + flattened_variation_set = pd.concat([flattened_variation_set, flattened_variation_xrefs], axis=1) + flattened_variation_set = pd.concat([flattened_variation_set, flattened_variation_loc0], axis=1) # might break + flattened_variation_set = pd.concat([flattened_variation_set, flattened_variation_loc1], axis=1) # might break + flattened_variation_set = pd.concat([flattened_variation_set, flattened_allele_freq_set], axis=1) + + flattened_entry = pd.concat([flattened_entry, flattened_variation_set], axis=1) + flattened_entry = pd.concat([flattened_entry, flattened_genes0], axis=1) + flattened_entry = pd.concat([flattened_entry, flattened_genes1], axis=1) + flattened_entry = pd.concat([flattened_entry, flattened_germline_classification_trait_set], axis=1) + + # Append the flattened entry to the list + flattened_data.append(flattened_entry) + + # Concatenate all flattened entries into a single DataFrame + df = pd.concat(flattened_data, ignore_index=True) + + return df diff --git a/tests/pipeline.ipynb b/tests/pipeline.ipynb index 67814a7..210a5fe 100644 --- a/tests/pipeline.ipynb +++ b/tests/pipeline.ipynb @@ -2,44 +2,47 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, "id": "initial_id", "metadata": { - "ExecuteTime": { - "end_time": "2024-05-13T15:38:18.029744Z", - "start_time": "2024-05-13T15:38:17.807980Z" - }, "collapsed": true, "jupyter": { "outputs_hidden": true + }, + "ExecuteTime": { + "end_time": "2024-07-31T14:38:44.492053Z", + "start_time": "2024-07-31T14:38:44.487236Z" } }, - "outputs": [], "source": [ "import pandas as pd\n", "\n", "from api.data import (store_database_for_eys_gene,\n", " parse_lovd,\n", " LOVD_PATH,\n", - " set_lovd_dtypes)\n", + " set_lovd_dtypes,\n", + " )\n", "from api.data import save_lovd_as_vcf\n", "pd.options.display.max_columns = 0" - ] + ], + "outputs": [], + "execution_count": 41 }, { "cell_type": "code", - "execution_count": 2, "id": "f49f7691a27aa7b4", "metadata": { - "ExecuteTime": { - "end_time": "2024-05-13T15:08:35.710520Z", - "start_time": "2024-05-13T15:08:35.708264Z" - }, "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "ExecuteTime": { + "end_time": "2024-07-31T14:38:44.501624Z", + "start_time": "2024-07-31T14:38:44.494128Z" } }, + "source": [ + "store_database_for_eys_gene(\"lovd\", override=False)" + ], "outputs": [ { "name": "stdout", @@ -49,39 +52,41 @@ ] } ], - "source": [ - "store_database_for_eys_gene(\"lovd\", override=False)" - ] + "execution_count": 42 }, { "cell_type": "code", - "execution_count": 2, "id": "cf5c45c0f7b9de0f", "metadata": { - "ExecuteTime": { - "end_time": "2024-05-13T15:38:24.591752Z", - "start_time": "2024-05-13T15:38:19.498594Z" - }, "collapsed": false, "jupyter": { "outputs_hidden": false + }, + "ExecuteTime": { + "end_time": "2024-07-31T14:38:51.917606Z", + "start_time": "2024-07-31T14:38:44.502630Z" } }, - "outputs": [], "source": [ "data = parse_lovd(LOVD_PATH + \"/lovd_data.txt\")" - ] + ], + "outputs": [], + "execution_count": 43 }, { "cell_type": "code", - "execution_count": 8, "id": "8a089e29bfc8c119", "metadata": { "ExecuteTime": { - "end_time": "2024-05-13T15:12:07.510712Z", - "start_time": "2024-05-13T15:12:07.366319Z" + "end_time": "2024-07-31T14:38:52.101863Z", + "start_time": "2024-07-31T14:38:51.918626Z" } }, + "source": [ + "for i in data:\n", + " print(i)\n", + " display(data[i])" + ], "outputs": [ { "name": "stdout", @@ -92,6 +97,12 @@ }, { "data": { + "text/plain": [ + " id name ... updated_by updated_date\n", + "0 EYS eyes shut homolog (Drosophila) ... 00006 2024-05-31 11:42:04\n", + "\n", + "[1 rows x 34 columns]" + ], "text/html": [ "
\n", "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chrhg19_posgrch38_posrefaltaarefaaaltREVELEnsembl_transcriptid
2875612766565575864945865TAQL0.188ENST00000503581;ENST00000370621;ENST00000370616
2875612866565575864945865TCQR0.111ENST00000503581;ENST00000370621;ENST00000370616
2875612966565575864945865TGQP0.344ENST00000503581;ENST00000370621;ENST00000370616
\n", - "
" - ], "text/plain": [ - " chr hg19_pos grch38_pos ref alt aaref aaalt REVEL \\\n", - "28756127 6 65655758 64945865 T A Q L 0.188 \n", - "28756128 6 65655758 64945865 T C Q R 0.111 \n", - "28756129 6 65655758 64945865 T G Q P 0.344 \n", - "\n", - " Ensembl_transcriptid \n", - "28756127 ENST00000503581;ENST00000370621;ENST00000370616 \n", - "28756128 ENST00000503581;ENST00000370621;ENST00000370616 \n", - "28756129 ENST00000503581;ENST00000370621;ENST00000370616 " + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Screenings\n", + "\n", + "RangeIndex: 1465 entries, 0 to 1464\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 1465 non-null Int64 \n", + " 1 individualid 1465 non-null Int64 \n", + " 2 variants_found 1465 non-null Int64 \n", + " 3 owned_by 1465 non-null Int64 \n", + " 4 created_by 1465 non-null Int64 \n", + " 5 created_date 1465 non-null datetime64[ns]\n", + " 6 edited_by 15 non-null Int64 \n", + " 7 edited_date 15 non-null datetime64[ns]\n", + " 8 Screening/Technique 1465 non-null string \n", + " 9 Screening/Template 1465 non-null string \n", + " 10 Screening/Tissue 1465 non-null string \n", + " 11 Screening/Remarks 1465 non-null string \n", + "dtypes: Int64(6), datetime64[ns](2), string(4)\n", + "memory usage: 146.1 KB\n" + ] + }, + { + "data": { + "text/plain": [ + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Screenings_To_Genes\n", + "\n", + "RangeIndex: 1316 entries, 0 to 1315\n", + "Data columns (total 2 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 screeningid 1316 non-null Int64 \n", + " 1 geneid 1316 non-null string\n", + "dtypes: Int64(1), string(1)\n", + "memory usage: 22.0 KB\n" + ] + }, + { + "data": { + "text/plain": [ + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Variants_On_Genome\n", + "\n", + "RangeIndex: 2560 entries, 0 to 2559\n", + "Data columns (total 26 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 2560 non-null Int64 \n", + " 1 allele 2560 non-null Int64 \n", + " 2 effectid 2560 non-null Int64 \n", + " 3 chromosome 2560 non-null Int64 \n", + " 4 position_g_start 2559 non-null Int64 \n", + " 5 position_g_end 2559 non-null Int64 \n", + " 6 type 2560 non-null string \n", + " 7 average_frequency 2559 non-null float64\n", + " 8 owned_by 2560 non-null Int64 \n", + " 9 VariantOnGenome/DBID 2560 non-null string \n", + " 10 VariantOnGenome/DNA 2560 non-null string \n", + " 11 VariantOnGenome/Frequency 2560 non-null string \n", + " 12 VariantOnGenome/Reference 2560 non-null string \n", + " 13 VariantOnGenome/Restriction_site 2560 non-null string \n", + " 14 VariantOnGenome/Published_as 2560 non-null string \n", + " 15 VariantOnGenome/Remarks 2560 non-null string \n", + " 16 VariantOnGenome/Genetic_origin 2560 non-null string \n", + " 17 VariantOnGenome/Segregation 2560 non-null string \n", + " 18 VariantOnGenome/dbSNP 2560 non-null string \n", + " 19 VariantOnGenome/VIP 2560 non-null string \n", + " 20 VariantOnGenome/Methylation 2560 non-null string \n", + " 21 VariantOnGenome/ISCN 2560 non-null string \n", + " 22 VariantOnGenome/DNA/hg38 2560 non-null string \n", + " 23 VariantOnGenome/ClinVar 2560 non-null string \n", + " 24 VariantOnGenome/ClinicalClassification 2560 non-null string \n", + " 25 VariantOnGenome/ClinicalClassification/Method 2560 non-null string \n", + "dtypes: Int64(7), float64(1), string(18)\n", + "memory usage: 537.6 KB\n" + ] + }, + { + "data": { + "text/plain": [ + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Variants_On_Transcripts\n", + "\n", + "RangeIndex: 2560 entries, 0 to 2559\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 id 2560 non-null Int64 \n", + " 1 transcriptid 2560 non-null Int64 \n", + " 2 effectid 2560 non-null Int64 \n", + " 3 position_c_start 2559 non-null Int64 \n", + " 4 position_c_start_intron 2560 non-null Int64 \n", + " 5 position_c_end 2559 non-null Int64 \n", + " 6 position_c_end_intron 2560 non-null Int64 \n", + " 7 VariantOnTranscript/DNA 2560 non-null string\n", + " 8 VariantOnTranscript/RNA 2560 non-null string\n", + " 9 VariantOnTranscript/Protein 2560 non-null string\n", + " 10 VariantOnTranscript/Exon 2560 non-null string\n", + "dtypes: Int64(7), string(4)\n", + "memory usage: 237.6 KB\n" + ] + }, + { + "data": { + "text/plain": [ + "None" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Screenings_To_Variants\n", + "\n", + "RangeIndex: 2168 entries, 0 to 2167\n", + "Data columns (total 2 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 screeningid 2168 non-null Int64\n", + " 1 variantid 2168 non-null Int64\n", + "dtypes: Int64(2)\n", + "memory usage: 38.2 KB\n" + ] + }, + { + "data": { + "text/plain": [ + "None" ] }, "metadata": {}, "output_type": "display_data" } ], + "execution_count": 45 + }, + { + "cell_type": "code", + "id": "c968af1617be40db", + "metadata": { + "ExecuteTime": { + "end_time": "2024-07-31T14:38:52.601560Z", + "start_time": "2024-07-31T14:38:52.346875Z" + } + }, + "source": [ + "save_lovd_as_vcf(data[\"Variants_On_Genome\"], \"./lovd.vcf\")" + ], + "outputs": [ + { + "ename": "KeyError", + "evalue": "'Variants_On_Genome'", + "output_type": "error", + "traceback": [ + "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[1;31mKeyError\u001B[0m Traceback (most recent call last)", + "File \u001B[1;32m~\\PycharmProjects\\kath\\venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:3805\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 3804\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m-> 3805\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mcasted_key\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 3806\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n", + "File \u001B[1;32mindex.pyx:167\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[1;34m()\u001B[0m\n", + "File \u001B[1;32mindex.pyx:196\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[1;34m()\u001B[0m\n", + "File \u001B[1;32mpandas\\\\_libs\\\\hashtable_class_helper.pxi:7081\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[1;34m()\u001B[0m\n", + "File \u001B[1;32mpandas\\\\_libs\\\\hashtable_class_helper.pxi:7089\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[1;34m()\u001B[0m\n", + "\u001B[1;31mKeyError\u001B[0m: 'Variants_On_Genome'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001B[1;31mKeyError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[1;32mIn[46], line 1\u001B[0m\n\u001B[1;32m----> 1\u001B[0m \u001B[43msave_lovd_as_vcf\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdata\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mVariants_On_Genome\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43m./lovd.vcf\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n", + "File \u001B[1;32m~\\PycharmProjects\\kath\\api\\data\\refactoring.py:140\u001B[0m, in \u001B[0;36msave_lovd_as_vcf\u001B[1;34m(data, save_to)\u001B[0m\n\u001B[0;32m 134\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21msave_lovd_as_vcf\u001B[39m(data, save_to\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m./lovd.vcf\u001B[39m\u001B[38;5;124m\"\u001B[39m):\n\u001B[0;32m 135\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[0;32m 136\u001B[0m \u001B[38;5;124;03m Gets hg38 variants from LOVD and saves as VCF file.\u001B[39;00m\n\u001B[0;32m 137\u001B[0m \u001B[38;5;124;03m :param DataFrame data: LOVD DataFrame with data\u001B[39;00m\n\u001B[0;32m 138\u001B[0m \u001B[38;5;124;03m :param str save_to: path where to save VCF file.\u001B[39;00m\n\u001B[0;32m 139\u001B[0m \u001B[38;5;124;03m \"\"\"\u001B[39;00m\n\u001B[1;32m--> 140\u001B[0m df \u001B[38;5;241m=\u001B[39m \u001B[43mdata\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mVariants_On_Genome\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\n\u001B[0;32m 141\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mVariantOnGenome/DNA/hg38\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;129;01min\u001B[39;00m df\u001B[38;5;241m.\u001B[39mcolumns:\n\u001B[0;32m 142\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mVariantOnGenome/DNA/hg38 is not in the LOVD DataFrame.\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n", + "File \u001B[1;32m~\\PycharmProjects\\kath\\venv\\Lib\\site-packages\\pandas\\core\\frame.py:4102\u001B[0m, in \u001B[0;36mDataFrame.__getitem__\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 4100\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mnlevels \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[0;32m 4101\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_getitem_multilevel(key)\n\u001B[1;32m-> 4102\u001B[0m indexer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcolumns\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 4103\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m is_integer(indexer):\n\u001B[0;32m 4104\u001B[0m indexer \u001B[38;5;241m=\u001B[39m [indexer]\n", + "File \u001B[1;32m~\\PycharmProjects\\kath\\venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:3812\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 3807\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(casted_key, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;129;01mor\u001B[39;00m (\n\u001B[0;32m 3808\u001B[0m \u001B[38;5;28misinstance\u001B[39m(casted_key, abc\u001B[38;5;241m.\u001B[39mIterable)\n\u001B[0;32m 3809\u001B[0m \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28many\u001B[39m(\u001B[38;5;28misinstance\u001B[39m(x, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;28;01mfor\u001B[39;00m x \u001B[38;5;129;01min\u001B[39;00m casted_key)\n\u001B[0;32m 3810\u001B[0m ):\n\u001B[0;32m 3811\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m InvalidIndexError(key)\n\u001B[1;32m-> 3812\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[0;32m 3813\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n\u001B[0;32m 3814\u001B[0m \u001B[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001B[39;00m\n\u001B[0;32m 3815\u001B[0m \u001B[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001B[39;00m\n\u001B[0;32m 3816\u001B[0m \u001B[38;5;66;03m# the TypeError.\u001B[39;00m\n\u001B[0;32m 3817\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_check_indexing_error(key)\n", + "\u001B[1;31mKeyError\u001B[0m: 'Variants_On_Genome'" + ] + } + ], + "execution_count": 46 + }, + { + "cell_type": "code", + "id": "c7ff16903e0c52bd", + "metadata": { + "ExecuteTime": { + "end_time": "2024-07-31T14:38:52.603569Z", + "start_time": "2024-07-31T14:38:52.602570Z" + } + }, + "source": [ + "from subprocess import Popen\n", + "\n", + "\n", + "process = Popen(\"spliceai -I ./lovd.vcf -O ./lovd_output.vcf -R ../tools/spliceai/hg38.fa -A grch38\".split())\n", + "process.wait()" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "0514ccc3-5c91-41ad-ab15-f4158030ea14", + "metadata": { + "ExecuteTime": { + "end_time": "2024-07-31T14:38:52.605570Z", + "start_time": "2024-07-31T14:38:52.604570Z" + } + }, "source": [ - "from tools import get_revel_scores\n", + "from api.tools import get_revel_scores\n", "\n", "chromosome = 6\n", "position = 65655758\n", @@ -3922,7 +3007,55 @@ "results = get_revel_scores(chromosome, position)\n", "\n", "display(results)" - ] + ], + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "from api.data import request_clinvar_api_data\n", + "\n", + "some_id = 1519786\n", + "try:\n", + " frame = request_clinvar_api_data(some_id)\n", + " display(frame)\n", + "except Exception as e:\n", + " print(e)\n" + ], + "id": "576b841842a7ab61", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "import requests\n", + "from api.data import request_clinvar_api_data\n", + "\n", + "gene_id = '1519786'\n", + "# with gene_id = '1519787' error is raised\n", + "\n", + "#TODO inside request_clinvar_api_data\n", + "# 1. dinamically expand genes to dataframe (might be one, might be more)\n", + "# 2. dinamically expand variation_loc to dataframe (might be one, might be more)\n", + "frames = request_clinvar_api_data(gene_id)\n", + "\n", + "display(frames)" + ], + "id": "b21c3487476b684f", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": "", + "id": "a97fbf604bd8977b", + "outputs": [], + "execution_count": null } ], "metadata": { From ea192ee7589e0069e5fcc4e1d7a34ed122eb7eeb Mon Sep 17 00:00:00 2001 From: Kajus CC <42713684+KajusC@users.noreply.github.com> Date: Tue, 6 Aug 2024 18:02:36 +0300 Subject: [PATCH 02/72] dynamically adds variation properties --- api/data/refactoring.py | 67 ++++---- tests/pipeline.ipynb | 364 +++++++++++++++++++++++++++++++++++----- 2 files changed, 363 insertions(+), 68 deletions(-) diff --git a/api/data/refactoring.py b/api/data/refactoring.py index f7142a3..c6e82ea 100644 --- a/api/data/refactoring.py +++ b/api/data/refactoring.py @@ -193,50 +193,61 @@ def request_clinvar_api_data(gene_id: str): flattened_variation_set = pd.json_normalize(flattened_entry['variation_set'][0], sep='_') flattened_variation_xrefs = pd.json_normalize(flattened_variation_set['variation_xrefs'][0], sep='_') - flattened_variation_loc0 = pd.json_normalize(flattened_variation_set['variation_loc'][0][0], - sep='_') # 1/2 frames - flattened_variation_loc0 = flattened_variation_loc0.add_prefix('0_') - flattened_variation_loc1 = pd.json_normalize(flattened_variation_set['variation_loc'][0][1], - sep='_') # 2/2 frames - flattened_variation_loc1 = flattened_variation_loc1.add_prefix('1_') - flattened_allele_freq_set = pd.json_normalize(flattened_variation_set['allele_freq_set'][0], sep='_') - - flattened_genes0 = pd.json_normalize(flattened_entry['genes'][0][0], sep='_') # 1/2 frames - flattened_genes0 = flattened_genes0.add_prefix('0_') - flattened_genes1 = pd.json_normalize(flattened_entry['genes'][0][1], sep='_') # 2/2 frames - flattened_genes1 = flattened_genes1.add_prefix('1_') - - flattened_germline_classification_trait_set = pd.json_normalize( - flattened_entry['germline_classification_trait_set'][0], sep='_') - flattened_trait_xrefs = pd.json_normalize(flattened_germline_classification_trait_set['trait_xrefs'][0], - sep='_') + + variation_loc_size = len(flattened_variation_set['variation_loc'][0]) + for i in range(variation_loc_size): + flattened_variation_loc = pd.json_normalize(flattened_variation_set['variation_loc'][0][i], sep='_') + flattened_variation_loc = flattened_variation_loc.add_prefix(f'{i}_') + flattened_variation_set = pd.concat([flattened_variation_set, flattened_variation_loc], axis=1) + + allele_freq_set_size = len(flattened_variation_set['allele_freq_set'][0]) + for i in range(allele_freq_set_size): + flattened_allele_freq_set = pd.json_normalize(flattened_variation_set['allele_freq_set'][0][i], sep='_') + flattened_allele_freq_set = flattened_allele_freq_set.add_prefix(f'{i}_') + flattened_variation_set = pd.concat([flattened_variation_set, flattened_allele_freq_set], axis=1) + + gene_size = len(flattened_entry['genes'][0]) + for i in range(gene_size): + flattened_genes = pd.json_normalize(flattened_entry['genes'][0][i], sep='_') + flattened_genes = flattened_genes.add_prefix(f'{i}_') + flattened_entry = pd.concat([flattened_entry, flattened_genes], axis=1) + + gremline_classification_trait_set_size = len(flattened_entry['germline_classification_trait_set'][0]) + for i in range(gremline_classification_trait_set_size): + flattened_germline_classification_trait_set = pd.json_normalize( + flattened_entry['germline_classification_trait_set'][0][i], sep='_') + flattened_germline_classification_trait_set = flattened_germline_classification_trait_set.add_prefix( + f'{i}_') + + trait_xrefs_size = len(flattened_germline_classification_trait_set[f'{i}_trait_xrefs'][0]) + for j in range(trait_xrefs_size): + flattened_trait_xrefs = pd.json_normalize( + flattened_germline_classification_trait_set[f'{i}_trait_xrefs'][0][j], sep='_') + flattened_trait_xrefs = flattened_trait_xrefs.add_prefix(f'{j}_') + + flattened_germline_classification_trait_set = pd.concat( + [flattened_germline_classification_trait_set, flattened_trait_xrefs], axis=1) + + flattened_germline_classification_trait_set = flattened_germline_classification_trait_set.drop( + columns=[f'{i}_trait_xrefs'], axis=1) + flattened_entry = pd.concat([flattened_entry, flattened_germline_classification_trait_set], axis=1) # dropping extracted nests flattened_entry = flattened_entry.drop(columns=['variation_set', 'genes', 'germline_classification_trait_set'], axis=1) flattened_variation_set = flattened_variation_set.drop( columns=['variation_xrefs', 'variation_loc', 'allele_freq_set'], axis=1) - flattened_germline_classification_trait_set = flattened_germline_classification_trait_set.drop( - columns=['trait_xrefs'], axis=1) - - # adding extracted nests to the frames - flattened_germline_classification_trait_set = pd.concat( - [flattened_germline_classification_trait_set, flattened_trait_xrefs], axis=1) flattened_variation_set = pd.concat([flattened_variation_set, flattened_variation_xrefs], axis=1) - flattened_variation_set = pd.concat([flattened_variation_set, flattened_variation_loc0], axis=1) # might break - flattened_variation_set = pd.concat([flattened_variation_set, flattened_variation_loc1], axis=1) # might break flattened_variation_set = pd.concat([flattened_variation_set, flattened_allele_freq_set], axis=1) flattened_entry = pd.concat([flattened_entry, flattened_variation_set], axis=1) - flattened_entry = pd.concat([flattened_entry, flattened_genes0], axis=1) - flattened_entry = pd.concat([flattened_entry, flattened_genes1], axis=1) flattened_entry = pd.concat([flattened_entry, flattened_germline_classification_trait_set], axis=1) # Append the flattened entry to the list flattened_data.append(flattened_entry) - # Concatenate all flattened entries into a single DataFrame + # Concatenate all flattened entries into a single DataFrame df = pd.concat(flattened_data, ignore_index=True) return df diff --git a/tests/pipeline.ipynb b/tests/pipeline.ipynb index 210a5fe..12bb5a4 100644 --- a/tests/pipeline.ipynb +++ b/tests/pipeline.ipynb @@ -9,8 +9,8 @@ "outputs_hidden": true }, "ExecuteTime": { - "end_time": "2024-07-31T14:38:44.492053Z", - "start_time": "2024-07-31T14:38:44.487236Z" + "end_time": "2024-08-06T14:19:28.871130Z", + "start_time": "2024-08-06T14:19:28.169833Z" } }, "source": [ @@ -25,7 +25,7 @@ "pd.options.display.max_columns = 0" ], "outputs": [], - "execution_count": 41 + "execution_count": 3 }, { "cell_type": "code", @@ -36,23 +36,15 @@ "outputs_hidden": false }, "ExecuteTime": { - "end_time": "2024-07-31T14:38:44.501624Z", - "start_time": "2024-07-31T14:38:44.494128Z" + "end_time": "2024-08-06T14:19:29.382744Z", + "start_time": "2024-08-06T14:19:28.873646Z" } }, "source": [ "store_database_for_eys_gene(\"lovd\", override=False)" ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The file at ../data/lovd/lovd_data.txt already exists.\n" - ] - } - ], - "execution_count": 42 + "outputs": [], + "execution_count": 4 }, { "cell_type": "code", @@ -63,23 +55,23 @@ "outputs_hidden": false }, "ExecuteTime": { - "end_time": "2024-07-31T14:38:51.917606Z", - "start_time": "2024-07-31T14:38:44.502630Z" + "end_time": "2024-08-06T14:19:35.787015Z", + "start_time": "2024-08-06T14:19:29.383754Z" } }, "source": [ "data = parse_lovd(LOVD_PATH + \"/lovd_data.txt\")" ], "outputs": [], - "execution_count": 43 + "execution_count": 5 }, { "cell_type": "code", "id": "8a089e29bfc8c119", "metadata": { "ExecuteTime": { - "end_time": "2024-07-31T14:38:52.101863Z", - "start_time": "2024-07-31T14:38:51.918626Z" + "end_time": "2024-08-06T14:19:35.968177Z", + "start_time": "2024-08-06T14:19:35.788025Z" } }, "source": [ @@ -2471,7 +2463,7 @@ "output_type": "display_data" } ], - "execution_count": 44 + "execution_count": 6 }, { "cell_type": "code", @@ -2482,8 +2474,8 @@ "outputs_hidden": false }, "ExecuteTime": { - "end_time": "2024-07-31T14:38:52.345798Z", - "start_time": "2024-07-31T14:38:52.102870Z" + "end_time": "2024-08-06T14:19:36.151167Z", + "start_time": "2024-08-06T14:19:35.969184Z" } }, "source": [ @@ -2930,15 +2922,15 @@ "output_type": "display_data" } ], - "execution_count": 45 + "execution_count": 7 }, { "cell_type": "code", "id": "c968af1617be40db", "metadata": { "ExecuteTime": { - "end_time": "2024-07-31T14:38:52.601560Z", - "start_time": "2024-07-31T14:38:52.346875Z" + "end_time": "2024-08-06T14:19:36.920697Z", + "start_time": "2024-08-06T14:19:36.151683Z" } }, "source": [ @@ -2952,7 +2944,7 @@ "traceback": [ "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", "\u001B[1;31mKeyError\u001B[0m Traceback (most recent call last)", - "File \u001B[1;32m~\\PycharmProjects\\kath\\venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:3805\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 3804\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m-> 3805\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mcasted_key\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 3806\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n", + "File \u001B[1;32m~\\PycharmProjects\\KathChatGPT\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:3805\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 3804\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m-> 3805\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mcasted_key\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 3806\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n", "File \u001B[1;32mindex.pyx:167\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[1;34m()\u001B[0m\n", "File \u001B[1;32mindex.pyx:196\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[1;34m()\u001B[0m\n", "File \u001B[1;32mpandas\\\\_libs\\\\hashtable_class_helper.pxi:7081\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[1;34m()\u001B[0m\n", @@ -2960,23 +2952,23 @@ "\u001B[1;31mKeyError\u001B[0m: 'Variants_On_Genome'", "\nThe above exception was the direct cause of the following exception:\n", "\u001B[1;31mKeyError\u001B[0m Traceback (most recent call last)", - "Cell \u001B[1;32mIn[46], line 1\u001B[0m\n\u001B[1;32m----> 1\u001B[0m \u001B[43msave_lovd_as_vcf\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdata\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mVariants_On_Genome\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43m./lovd.vcf\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n", + "Cell \u001B[1;32mIn[8], line 1\u001B[0m\n\u001B[1;32m----> 1\u001B[0m \u001B[43msave_lovd_as_vcf\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdata\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mVariants_On_Genome\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43m./lovd.vcf\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n", "File \u001B[1;32m~\\PycharmProjects\\kath\\api\\data\\refactoring.py:140\u001B[0m, in \u001B[0;36msave_lovd_as_vcf\u001B[1;34m(data, save_to)\u001B[0m\n\u001B[0;32m 134\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21msave_lovd_as_vcf\u001B[39m(data, save_to\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m./lovd.vcf\u001B[39m\u001B[38;5;124m\"\u001B[39m):\n\u001B[0;32m 135\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[0;32m 136\u001B[0m \u001B[38;5;124;03m Gets hg38 variants from LOVD and saves as VCF file.\u001B[39;00m\n\u001B[0;32m 137\u001B[0m \u001B[38;5;124;03m :param DataFrame data: LOVD DataFrame with data\u001B[39;00m\n\u001B[0;32m 138\u001B[0m \u001B[38;5;124;03m :param str save_to: path where to save VCF file.\u001B[39;00m\n\u001B[0;32m 139\u001B[0m \u001B[38;5;124;03m \"\"\"\u001B[39;00m\n\u001B[1;32m--> 140\u001B[0m df \u001B[38;5;241m=\u001B[39m \u001B[43mdata\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mVariants_On_Genome\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\n\u001B[0;32m 141\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mVariantOnGenome/DNA/hg38\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;129;01min\u001B[39;00m df\u001B[38;5;241m.\u001B[39mcolumns:\n\u001B[0;32m 142\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mVariantOnGenome/DNA/hg38 is not in the LOVD DataFrame.\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n", - "File \u001B[1;32m~\\PycharmProjects\\kath\\venv\\Lib\\site-packages\\pandas\\core\\frame.py:4102\u001B[0m, in \u001B[0;36mDataFrame.__getitem__\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 4100\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mnlevels \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[0;32m 4101\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_getitem_multilevel(key)\n\u001B[1;32m-> 4102\u001B[0m indexer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcolumns\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 4103\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m is_integer(indexer):\n\u001B[0;32m 4104\u001B[0m indexer \u001B[38;5;241m=\u001B[39m [indexer]\n", - "File \u001B[1;32m~\\PycharmProjects\\kath\\venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:3812\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 3807\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(casted_key, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;129;01mor\u001B[39;00m (\n\u001B[0;32m 3808\u001B[0m \u001B[38;5;28misinstance\u001B[39m(casted_key, abc\u001B[38;5;241m.\u001B[39mIterable)\n\u001B[0;32m 3809\u001B[0m \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28many\u001B[39m(\u001B[38;5;28misinstance\u001B[39m(x, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;28;01mfor\u001B[39;00m x \u001B[38;5;129;01min\u001B[39;00m casted_key)\n\u001B[0;32m 3810\u001B[0m ):\n\u001B[0;32m 3811\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m InvalidIndexError(key)\n\u001B[1;32m-> 3812\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[0;32m 3813\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n\u001B[0;32m 3814\u001B[0m \u001B[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001B[39;00m\n\u001B[0;32m 3815\u001B[0m \u001B[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001B[39;00m\n\u001B[0;32m 3816\u001B[0m \u001B[38;5;66;03m# the TypeError.\u001B[39;00m\n\u001B[0;32m 3817\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_check_indexing_error(key)\n", + "File \u001B[1;32m~\\PycharmProjects\\KathChatGPT\\.venv\\Lib\\site-packages\\pandas\\core\\frame.py:4102\u001B[0m, in \u001B[0;36mDataFrame.__getitem__\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 4100\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mnlevels \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[0;32m 4101\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_getitem_multilevel(key)\n\u001B[1;32m-> 4102\u001B[0m indexer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcolumns\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 4103\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m is_integer(indexer):\n\u001B[0;32m 4104\u001B[0m indexer \u001B[38;5;241m=\u001B[39m [indexer]\n", + "File \u001B[1;32m~\\PycharmProjects\\KathChatGPT\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:3812\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 3807\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(casted_key, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;129;01mor\u001B[39;00m (\n\u001B[0;32m 3808\u001B[0m \u001B[38;5;28misinstance\u001B[39m(casted_key, abc\u001B[38;5;241m.\u001B[39mIterable)\n\u001B[0;32m 3809\u001B[0m \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28many\u001B[39m(\u001B[38;5;28misinstance\u001B[39m(x, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;28;01mfor\u001B[39;00m x \u001B[38;5;129;01min\u001B[39;00m casted_key)\n\u001B[0;32m 3810\u001B[0m ):\n\u001B[0;32m 3811\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m InvalidIndexError(key)\n\u001B[1;32m-> 3812\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[0;32m 3813\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n\u001B[0;32m 3814\u001B[0m \u001B[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001B[39;00m\n\u001B[0;32m 3815\u001B[0m \u001B[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001B[39;00m\n\u001B[0;32m 3816\u001B[0m \u001B[38;5;66;03m# the TypeError.\u001B[39;00m\n\u001B[0;32m 3817\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_check_indexing_error(key)\n", "\u001B[1;31mKeyError\u001B[0m: 'Variants_On_Genome'" ] } ], - "execution_count": 46 + "execution_count": 8 }, { "cell_type": "code", "id": "c7ff16903e0c52bd", "metadata": { "ExecuteTime": { - "end_time": "2024-07-31T14:38:52.603569Z", - "start_time": "2024-07-31T14:38:52.602570Z" + "end_time": "2024-08-06T14:19:36.921706Z", + "start_time": "2024-08-06T14:19:36.921706Z" } }, "source": [ @@ -2992,12 +2984,7 @@ { "cell_type": "code", "id": "0514ccc3-5c91-41ad-ab15-f4158030ea14", - "metadata": { - "ExecuteTime": { - "end_time": "2024-07-31T14:38:52.605570Z", - "start_time": "2024-07-31T14:38:52.604570Z" - } - }, + "metadata": {}, "source": [ "from api.tools import get_revel_scores\n", "\n", @@ -3049,11 +3036,308 @@ "outputs": [], "execution_count": null }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-08-06T14:58:14.898227Z", + "start_time": "2024-08-06T14:58:14.228473Z" + } + }, + "cell_type": "code", + "source": [ + "import requests\n", + "\n", + "gene_id = '1519785'\n", + "\n", + "path = f\"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=clinvar&id={gene_id}&retmode=json\"\n", + "\n", + "request = requests.get(path)\n", + "\n", + "if request.status_code != 200:\n", + " raise ValueError(f\"Request failed with status code {request.status_code}\")\n", + "\n", + "data = request.json()\n", + "\n", + " # Extract the 'result' part of the JSON\n", + "results = data['result']\n", + "\n", + "# Extract the 'uids' part of the JSON\n", + "flattened_data = []\n", + "\n", + "for uid in results['uids']:\n", + " entry = results[uid]\n", + "\n", + " # Using pd.json_normalize to flatten the JSON data\n", + " flattened_entry = pd.json_normalize(entry, sep='_')\n", + "\n", + " flattened_variation_set = pd.json_normalize(flattened_entry['variation_set'][0], sep='_')\n", + " flattened_variation_xrefs = pd.json_normalize(flattened_variation_set['variation_xrefs'][0], sep='_')\n", + " \n", + " variation_loc_size = len(flattened_variation_set['variation_loc'][0]) \n", + " for i in range(variation_loc_size):\n", + " flattened_variation_loc = pd.json_normalize(flattened_variation_set['variation_loc'][0][i], sep='_')\n", + " flattened_variation_loc = flattened_variation_loc.add_prefix(f'{i}_')\n", + " flattened_variation_set = pd.concat([flattened_variation_set, flattened_variation_loc], axis=1)\n", + " \n", + " allele_freq_set_size = len(flattened_variation_set['allele_freq_set'][0])\n", + " for i in range(allele_freq_set_size):\n", + " flattened_allele_freq_set = pd.json_normalize(flattened_variation_set['allele_freq_set'][0][i], sep='_')\n", + " flattened_allele_freq_set = flattened_allele_freq_set.add_prefix(f'{i}_')\n", + " flattened_variation_set = pd.concat([flattened_variation_set, flattened_allele_freq_set], axis=1)\n", + " \n", + " gene_size = len(flattened_entry['genes'][0])\n", + " for i in range(gene_size):\n", + " flattened_genes = pd.json_normalize(flattened_entry['genes'][0][i], sep='_')\n", + " flattened_genes = flattened_genes.add_prefix(f'{i}_')\n", + " flattened_entry = pd.concat([flattened_entry, flattened_genes], axis=1)\n", + "\n", + " gremline_classification_trait_set_size = len(flattened_entry['germline_classification_trait_set'][0])\n", + " for i in range(gremline_classification_trait_set_size):\n", + " flattened_germline_classification_trait_set = pd.json_normalize(flattened_entry['germline_classification_trait_set'][0][i], sep='_')\n", + " flattened_germline_classification_trait_set = flattened_germline_classification_trait_set.add_prefix(f'{i}_')\n", + " \n", + " trait_xrefs_size = len(flattened_germline_classification_trait_set[f'{i}_trait_xrefs'][0])\n", + " for j in range(trait_xrefs_size):\n", + " flattened_trait_xrefs = pd.json_normalize(flattened_germline_classification_trait_set[f'{i}_trait_xrefs'][0][j], sep='_')\n", + " flattened_trait_xrefs = flattened_trait_xrefs.add_prefix(f'{j}_')\n", + "\n", + " flattened_germline_classification_trait_set = pd.concat([flattened_germline_classification_trait_set, flattened_trait_xrefs], axis=1)\n", + " \n", + " flattened_germline_classification_trait_set = flattened_germline_classification_trait_set.drop(columns=[f'{i}_trait_xrefs'], axis=1)\n", + " flattened_entry = pd.concat([flattened_entry, flattened_germline_classification_trait_set], axis=1)\n", + " \n", + " # dropping extracted nests\n", + " flattened_entry = flattened_entry.drop(columns=['variation_set', 'genes', 'germline_classification_trait_set'],axis=1)\n", + " flattened_variation_set = flattened_variation_set.drop(columns=['variation_xrefs', 'variation_loc', 'allele_freq_set'], axis=1)\n", + "\n", + " flattened_variation_set = pd.concat([flattened_variation_set, flattened_variation_xrefs], axis=1)\n", + " flattened_variation_set = pd.concat([flattened_variation_set, flattened_allele_freq_set], axis=1)\n", + "\n", + " flattened_entry = pd.concat([flattened_entry, flattened_variation_set], axis=1)\n", + " flattened_entry = pd.concat([flattened_entry, flattened_germline_classification_trait_set], axis=1)\n", + "\n", + " # Append the flattened entry to the list\n", + " flattened_data.append(flattened_entry)\n", + "\n", + " # Concatenate all flattened entries into a single DataFrame\n", + "df = pd.concat(flattened_data, ignore_index=True)\n", + "\n", + "display(df)\n" + ], + "id": "7e9ca83a40035c14", + "outputs": [ + { + "data": { + "text/plain": [ + " uid obj_type ... 3_db_source 3_db_id\n", + "0 1519785 single nucleotide variant ... OMIM 614702\n", + "\n", + "[1 rows x 110 columns]" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidobj_typeaccessionaccession_versiontitlerecord_statusgene_sortchr_sortlocation_sortvariation_set_namevariation_set_idmolecular_consequence_listprotein_changefda_recognized_databasesupporting_submissions_scvsupporting_submissions_rcvgermline_classification_descriptiongermline_classification_last_evaluatedgermline_classification_review_statusgermline_classification_fda_recognized_databaseclinical_impact_classification_descriptionclinical_impact_classification_last_evaluatedclinical_impact_classification_review_statusclinical_impact_classification_fda_recognized_databaseclinical_impact_classification_trait_setoncogenicity_classification_descriptiononcogenicity_classification_last_evaluatedoncogenicity_classification_review_statusoncogenicity_classification_fda_recognized_databaseoncogenicity_classification_trait_set0_symbol0_geneid0_strand0_source0_trait_name0_db_source0_db_id1_db_source1_db_id2_db_source...1_stop1_inner_start1_inner_stop1_outer_start1_outer_stop1_display_start1_display_stop1_assembly_acc_ver1_annotation_release1_alt1_ref0_source0_value0_minor_allele1_source1_value1_minor_allele2_source2_value2_minor_allele3_source3_value3_minor_allele4_source4_value4_minor_alleledb_sourcedb_id4_source4_value4_minor_allele0_trait_name0_db_source0_db_id1_db_source1_db_id2_db_source2_db_id3_db_source3_db_id
01519785single nucleotide variantVCV001519785VCV001519785.NM_012123.4(MTO1):c.1465+4A>TMTO10600000000000073482248[intron variant][SCV002308196][RCV002024803]Uncertain significance2022/07/06 00:00criteria provided, single submitter1/01/01 00:00[]1/01/01 00:00[]MTO125821+submittedMitochondrial hypertrophic cardiomyopathy with...Orphanet314637MedGenC4749921MONDO...741919717419197174191971GCF_000001405.25Exome Aggregation Consortium (ExAC)0.00002The Genome Aggregation Database (gnomAD)0.00003The Genome Aggregation Database (gnomAD), exomes0.00004Trans-Omics for Precision Medicine (TOPMed)0.000051000 Genomes Project0.00020TdbSNP5550946771000 Genomes Project0.00020TMitochondrial hypertrophic cardiomyopathy with...Orphanet314637MedGenC4749921MONDOMONDO:0013865OMIM614702
\n", + "

1 rows × 110 columns

\n", + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "execution_count": 35 + }, { "metadata": {}, "cell_type": "code", "source": "", - "id": "a97fbf604bd8977b", + "id": "7df7d0cb3b874157", "outputs": [], "execution_count": null } From 6dba6fe6b5398256812ec0c8ca11e297e378a488 Mon Sep 17 00:00:00 2001 From: Kajus CC <42713684+KajusC@users.noreply.github.com> Date: Wed, 7 Aug 2024 16:03:04 +0300 Subject: [PATCH 03/72] Implemented a function which extracts variation ids from gene name, those ids are formatted to dataframe --- api/__init__.py | 3 +- api/data/__init__.py | 1 + api/data/refactoring.py | 148 +- tests/pipeline.ipynb | 4293 +++++++++++---------------------------- 4 files changed, 1262 insertions(+), 3183 deletions(-) diff --git a/api/__init__.py b/api/__init__.py index f54edfc..fb618dd 100644 --- a/api/__init__.py +++ b/api/__init__.py @@ -56,5 +56,6 @@ parse_lovd, from_clinvar_name_to_cdna_position, save_lovd_as_vcf, - request_clinvar_api_data + request_clinvar_api_data, + get_variant_ids_from_clinvar_name_api, ) diff --git a/api/data/__init__.py b/api/data/__init__.py index 001190d..34caf42 100644 --- a/api/data/__init__.py +++ b/api/data/__init__.py @@ -57,4 +57,5 @@ from_clinvar_name_to_cdna_position, save_lovd_as_vcf, request_clinvar_api_data, + get_variant_ids_from_clinvar_name_api, ) diff --git a/api/data/refactoring.py b/api/data/refactoring.py index c6e82ea..79cbe78 100644 --- a/api/data/refactoring.py +++ b/api/data/refactoring.py @@ -160,94 +160,116 @@ def save_lovd_as_vcf(data, save_to="./lovd.vcf"): f.write("\n") +def get_variant_ids_from_clinvar_name_api(name: str, count: int = 100): + """ + Extracts variant ids from ClinVar `name` variable. /n + key of dictionary is the size of the list of ids. + + :param str name: name of variant + :param int count: number of ids to extract + :returns: ids of variants + :rtype: str + """ + + result = {} + + separator = "," + clinvar_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=clinvar&term={name}&retmode=json&retmax={count}" + + request = requests.get(clinvar_url) + + if request.status_code != 200: + raise ValueError(f"Request failed with status code {request.status_code}") + + data = request.json() + + ids = data['esearchresult']['idlist'] + + result['idlist'] = ids + result['count'] = data['esearchresult']['count'] + + return result + + def request_clinvar_api_data(gene_id: str): """ - Requests ClinVar API for data about variant with given id.\n + Requests ClinVar API for data about variant with given id. Converts it to pandas dataframe. :param str gene_id: id of variant (may be multiple) :returns: dataframe from ClinVar API :rtype: dataframe """ + clinvar_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=clinvar&id={gene_id}&retmode=json" - path = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=clinvar&id={gene_id}&retmode=json" - - request = requests.get(path) + request = requests.get(clinvar_url) if request.status_code != 200: raise ValueError(f"Request failed with status code {request.status_code}") data = request.json() - # Extract the 'result' part of the JSON results = data['result'] - # Extract the 'uids' part of the JSON flattened_data = [] for uid in results['uids']: entry = results[uid] - # Using pd.json_normalize to flatten the JSON data flattened_entry = pd.json_normalize(entry, sep='_') - flattened_variation_set = pd.json_normalize(flattened_entry['variation_set'][0], sep='_') - flattened_variation_xrefs = pd.json_normalize(flattened_variation_set['variation_xrefs'][0], sep='_') - - variation_loc_size = len(flattened_variation_set['variation_loc'][0]) - for i in range(variation_loc_size): - flattened_variation_loc = pd.json_normalize(flattened_variation_set['variation_loc'][0][i], sep='_') - flattened_variation_loc = flattened_variation_loc.add_prefix(f'{i}_') - flattened_variation_set = pd.concat([flattened_variation_set, flattened_variation_loc], axis=1) - - allele_freq_set_size = len(flattened_variation_set['allele_freq_set'][0]) - for i in range(allele_freq_set_size): - flattened_allele_freq_set = pd.json_normalize(flattened_variation_set['allele_freq_set'][0][i], sep='_') - flattened_allele_freq_set = flattened_allele_freq_set.add_prefix(f'{i}_') - flattened_variation_set = pd.concat([flattened_variation_set, flattened_allele_freq_set], axis=1) - - gene_size = len(flattened_entry['genes'][0]) - for i in range(gene_size): - flattened_genes = pd.json_normalize(flattened_entry['genes'][0][i], sep='_') - flattened_genes = flattened_genes.add_prefix(f'{i}_') - flattened_entry = pd.concat([flattened_entry, flattened_genes], axis=1) - - gremline_classification_trait_set_size = len(flattened_entry['germline_classification_trait_set'][0]) - for i in range(gremline_classification_trait_set_size): - flattened_germline_classification_trait_set = pd.json_normalize( - flattened_entry['germline_classification_trait_set'][0][i], sep='_') - flattened_germline_classification_trait_set = flattened_germline_classification_trait_set.add_prefix( - f'{i}_') - - trait_xrefs_size = len(flattened_germline_classification_trait_set[f'{i}_trait_xrefs'][0]) - for j in range(trait_xrefs_size): - flattened_trait_xrefs = pd.json_normalize( - flattened_germline_classification_trait_set[f'{i}_trait_xrefs'][0][j], sep='_') - flattened_trait_xrefs = flattened_trait_xrefs.add_prefix(f'{j}_') - - flattened_germline_classification_trait_set = pd.concat( - [flattened_germline_classification_trait_set, flattened_trait_xrefs], axis=1) - - flattened_germline_classification_trait_set = flattened_germline_classification_trait_set.drop( - columns=[f'{i}_trait_xrefs'], axis=1) - flattened_entry = pd.concat([flattened_entry, flattened_germline_classification_trait_set], axis=1) - - # dropping extracted nests - flattened_entry = flattened_entry.drop(columns=['variation_set', 'genes', 'germline_classification_trait_set'], - axis=1) - flattened_variation_set = flattened_variation_set.drop( - columns=['variation_xrefs', 'variation_loc', 'allele_freq_set'], axis=1) - - flattened_variation_set = pd.concat([flattened_variation_set, flattened_variation_xrefs], axis=1) - flattened_variation_set = pd.concat([flattened_variation_set, flattened_allele_freq_set], axis=1) - - flattened_entry = pd.concat([flattened_entry, flattened_variation_set], axis=1) - flattened_entry = pd.concat([flattened_entry, flattened_germline_classification_trait_set], axis=1) - - # Append the flattened entry to the list + variation_set = flattened_entry.at[0, 'variation_set'] + for idx, var_set in enumerate(variation_set): + flat_var_set = pd.json_normalize(var_set, sep='_') + flat_var_set = flat_var_set.add_prefix(f'variation_set_{idx}_') + + variation_loc = var_set.get('variation_loc', []) + for loc_idx, loc in enumerate(variation_loc): + flat_loc = pd.json_normalize(loc, sep='_') + flat_loc = flat_loc.add_prefix(f'variation_set_{idx}_loc_{loc_idx}_') + flat_var_set = flat_var_set.join(flat_loc, rsuffix=f'_{idx}_{loc_idx}_vl') + + var_xrefs = var_set.get('variation_xrefs', []) + for var_xrefs_idx, var_xref in enumerate(var_xrefs): + flat_var_xrefs = pd.json_normalize(var_xref, sep='_') + flat_var_xrefs = flat_var_xrefs.add_prefix(f'variation_set_{idx}_var_xrefs_{var_xrefs_idx}_') + flat_var_set = flat_var_set.join(flat_var_xrefs, rsuffix=f'_{idx}_{var_xrefs_idx}_vx') + + allele_freq = var_set.get('allele_freq_set', []) + for allele_freq_idx, allele in enumerate(allele_freq): + flat_allele = pd.json_normalize(allele, sep='_') + flat_allele = flat_allele.add_prefix(f'variation_set_{idx}_allele_freq_{allele_freq_idx}_') + flat_var_set = flat_var_set.join(flat_allele, rsuffix=f'_{idx}_{allele_freq_idx}_af') + + flat_var_set = flat_var_set.drop( + columns=[f'variation_set_{idx}_variation_loc', f'variation_set_{idx}_variation_xrefs', + f'variation_set_{idx}_allele_freq_set']) + flattened_entry = flattened_entry.join(flat_var_set, rsuffix=f'_{idx}_vs') + + genes = flattened_entry.at[0, 'genes'] + for idx, gene in enumerate(genes): + flat_genes = pd.json_normalize(gene, sep='_') + flat_genes = flat_genes.add_prefix(f'gene_{idx}_') + flattened_entry = flattened_entry.join(flat_genes, rsuffix=f'_{idx}_g') + + germline_classification_trait_set = flattened_entry.at[0, 'germline_classification_trait_set'] + for idx, germline_set in enumerate(germline_classification_trait_set): + flat_germline_set = pd.json_normalize(germline_set, sep='_') + flat_germline_set = flat_germline_set.add_prefix(f'germline_set_{idx}_') + + trait_xrefs = flat_germline_set.at[0, f'germline_set_{idx}_trait_xrefs'] + for jdx, trait_xref in enumerate(trait_xrefs): + flat_trait_xrefs = pd.json_normalize(trait_xref, sep='_') + flat_trait_xrefs = flat_trait_xrefs.add_prefix(f'trait_xref_{jdx}_') + flat_germline_set = flat_germline_set.join(flat_trait_xrefs, rsuffix=f'_{idx}_{jdx}_tx') + + flat_germline_set = flat_germline_set.drop(columns=[f'germline_set_{idx}_trait_xrefs']) + flattened_entry = flattened_entry.join(flat_germline_set, rsuffix=f'_{idx}_gls') + + flattened_entry = flattened_entry.drop(columns=['variation_set', 'genes', 'germline_classification_trait_set']) + flattened_data.append(flattened_entry) - # Concatenate all flattened entries into a single DataFrame df = pd.concat(flattened_data, ignore_index=True) - return df + return df \ No newline at end of file diff --git a/tests/pipeline.ipynb b/tests/pipeline.ipynb index 12bb5a4..0fb9cfb 100644 --- a/tests/pipeline.ipynb +++ b/tests/pipeline.ipynb @@ -9,23 +9,27 @@ "outputs_hidden": true }, "ExecuteTime": { - "end_time": "2024-08-06T14:19:28.871130Z", - "start_time": "2024-08-06T14:19:28.169833Z" + "end_time": "2024-08-07T12:32:22.837138Z", + "start_time": "2024-08-07T12:32:21.979038Z" } }, "source": [ "import pandas as pd\n", + "import requests\n", "\n", "from api.data import (store_database_for_eys_gene,\n", " parse_lovd,\n", " LOVD_PATH,\n", " set_lovd_dtypes,\n", + " request_clinvar_api_data,\n", + " get_variant_ids_from_clinvar_name_api,\n", " )\n", "from api.data import save_lovd_as_vcf\n", + "\n", "pd.options.display.max_columns = 0" ], "outputs": [], - "execution_count": 3 + "execution_count": 1 }, { "cell_type": "code", @@ -34,17 +38,13 @@ "collapsed": false, "jupyter": { "outputs_hidden": false - }, - "ExecuteTime": { - "end_time": "2024-08-06T14:19:29.382744Z", - "start_time": "2024-08-06T14:19:28.873646Z" } }, "source": [ "store_database_for_eys_gene(\"lovd\", override=False)" ], "outputs": [], - "execution_count": 4 + "execution_count": null }, { "cell_type": "code", @@ -53,163 +53,309 @@ "collapsed": false, "jupyter": { "outputs_hidden": false - }, - "ExecuteTime": { - "end_time": "2024-08-06T14:19:35.787015Z", - "start_time": "2024-08-06T14:19:29.383754Z" } }, "source": [ "data = parse_lovd(LOVD_PATH + \"/lovd_data.txt\")" ], "outputs": [], - "execution_count": 5 + "execution_count": null }, { "cell_type": "code", "id": "8a089e29bfc8c119", + "metadata": {}, + "source": [ + "for i in data:\n", + " print(i)\n", + " display(data[i])" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "ef07740b2fa63e42", "metadata": { - "ExecuteTime": { - "end_time": "2024-08-06T14:19:35.968177Z", - "start_time": "2024-08-06T14:19:35.788025Z" + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, "source": [ + "set_lovd_dtypes(data)\n", "for i in data:\n", " print(i)\n", - " display(data[i])" + " display(data[i].info())" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "c968af1617be40db", + "metadata": {}, + "source": [ + "save_lovd_as_vcf(data[\"Variants_On_Genome\"], \"./lovd.vcf\")" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "c7ff16903e0c52bd", + "metadata": {}, + "source": [ + "from subprocess import Popen\n", + "\n", + "process = Popen(\"spliceai -I ./lovd.vcf -O ./lovd_output.vcf -R ../tools/spliceai/hg38.fa -A grch38\".split())\n", + "process.wait()" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "id": "0514ccc3-5c91-41ad-ab15-f4158030ea14", + "metadata": {}, + "source": [ + "from api.tools import get_revel_scores\n", + "\n", + "chromosome = 6\n", + "position = 65655758\n", + "\n", + "results = get_revel_scores(chromosome, position)\n", + "\n", + "display(results)" + ], + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "import requests\n", + "from api.data import request_clinvar_api_data\n", + "\n", + "gene_id = '1519785,1519786'\n", + "\n", + "frames = request_clinvar_api_data(gene_id)\n", + "\n", + "display(frames)" + ], + "id": "b21c3487476b684f", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "gene_id = '1519785'\n", + "\n", + "\n", + "clinvar_url = f\"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=clinvar&id={gene_id}&retmode=json\"\n", + "\n", + "request = requests.get(clinvar_url)\n", + "\n", + "if request.status_code != 200:\n", + " raise ValueError(f\"Request failed with status code {request.status_code}\")\n", + "\n", + "data = request.json()\n", + "\n", + "# Extract the 'result' part of the JSON\n", + "results = data['result']\n", + "\n", + "# Extract the 'uids' part of the JSON\n", + "flattened_data = []\n", + "\n", + "for uid in results['uids']:\n", + " entry = results[uid]\n", + "\n", + " # Using pd.json_normalize to flatten the JSON data\n", + " flattened_entry = pd.json_normalize(entry, sep='_')\n", + "\n", + " # Process variation_set\n", + " variation_set = flattened_entry.at[0, 'variation_set']\n", + " for idx, var_set in enumerate(variation_set):\n", + " flat_var_set = pd.json_normalize(var_set, sep='_')\n", + " flat_var_set = flat_var_set.add_prefix(f'variation_set_{idx}_')\n", + "\n", + " # Process variation_loc within variation_set\n", + " variation_loc = var_set.get('variation_loc', [])\n", + " for loc_idx, loc in enumerate(variation_loc):\n", + " flat_loc = pd.json_normalize(loc, sep='_')\n", + " flat_loc = flat_loc.add_prefix(f'variation_set_{idx}_loc_{loc_idx}_')\n", + " flat_var_set = flat_var_set.join(flat_loc, rsuffix=f'_{idx}_{loc_idx}_vl')\n", + " \n", + " var_xrefs = var_set.get('variation_xrefs', [])\n", + " for var_xrefs_idx, var_xref in enumerate(var_xrefs):\n", + " flat_var_xrefs = pd.json_normalize(var_xref, sep='_')\n", + " flat_var_xrefs = flat_var_xrefs.add_prefix(f'variation_set_{idx}_var_xrefs_{var_xrefs_idx}_')\n", + " flat_var_set = flat_var_set.join(flat_var_xrefs, rsuffix=f'_{idx}_{var_xrefs_idx}_vx')\n", + "\n", + "\n", + " allele_freq = var_set.get('allele_freq_set', [])\n", + " for allele_freq_idx, allele in enumerate(allele_freq):\n", + " flat_allele = pd.json_normalize(allele, sep='_')\n", + " flat_allele = flat_allele.add_prefix(f'variation_set_{idx}_allele_freq_{allele_freq_idx}_')\n", + " flat_var_set = flat_var_set.join(flat_allele, rsuffix=f'_{idx}_{allele_freq_idx}_af')\n", + " \n", + " # drop original nested lists columns\n", + " flat_var_set = flat_var_set.drop(columns=[f'variation_set_{idx}_variation_loc', f'variation_set_{idx}_variation_xrefs', f'variation_set_{idx}_allele_freq_set'])\n", + " \n", + " flattened_entry = flattened_entry.join(flat_var_set, rsuffix=f'_{idx}_vs')\n", + "\n", + " # Process genes\n", + " genes = flattened_entry.at[0, 'genes']\n", + " for idx, gene in enumerate(genes):\n", + " flat_genes = pd.json_normalize(gene, sep='_')\n", + " flat_genes = flat_genes.add_prefix(f'gene_{idx}_')\n", + " flattened_entry = flattened_entry.join(flat_genes, rsuffix=f'_{idx}_g')\n", + " # Process germline_classification_trait_set\n", + " germline_classification_trait_set = flattened_entry.at[0, 'germline_classification_trait_set']\n", + " for idx, germline_set in enumerate(germline_classification_trait_set):\n", + " flat_germline_set = pd.json_normalize(germline_set, sep='_')\n", + " flat_germline_set = flat_germline_set.add_prefix(f'germline_set_{idx}_')\n", + "\n", + " trait_xrefs = flat_germline_set.at[0, f'germline_set_{idx}_trait_xrefs']\n", + " for jdx, trait_xref in enumerate(trait_xrefs):\n", + " flat_trait_xrefs = pd.json_normalize(trait_xref, sep='_')\n", + " flat_trait_xrefs = flat_trait_xrefs.add_prefix(f'trait_xref_{jdx}_')\n", + " flat_germline_set = flat_germline_set.join(flat_trait_xrefs, rsuffix=f'_{idx}_{jdx}_tx')\n", + "\n", + " flat_germline_set = flat_germline_set.drop(columns=[f'germline_set_{idx}_trait_xrefs'])\n", + " flattened_entry = flattened_entry.join(flat_germline_set, rsuffix=f'_{idx}_gls')\n", + "\n", + " # Dropping original nested lists columns\n", + " flattened_entry = flattened_entry.drop(columns=['variation_set', 'genes', 'germline_classification_trait_set'])\n", + "\n", + " # Append the flattened entry to the list\n", + " flattened_data.append(flattened_entry)\n", + "\n", + "# Concatenate all flattened entries into a single DataFrame\n", + "df = pd.concat(flattened_data, ignore_index=True)\n", + "\n", + "display(df)" + ], + "id": "3b9b8bdad8bdb55d", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "Explanation of whats happening in the code below:\n", + "\n", + "Function to get all the ids from a gene name:\n", + "```python\n", + "get_variant_ids_from_clinvar_name_api(name: str, count: int)\n", + "```\n", + "\n", + "function gets the ids from the clinvar api, the name is the gene name and the count is the maximum number of ids to get (api's limit is 500)\n", + "\n", + "function returns a dictionary with the count and the list of ids:\n", + "\n", + "```json\n", + "{\n", + " 'count': int,\n", + " 'idlist': List[str]\n", + "}\n", + "```\n", + "\n", + "if the count is greater than the api's limit, the function will split the list of ids into smaller lists of 500 and then request the data from the api in chunks of 500 ids:\n", + "\n", + "```python\n", + "id_lists = [id_list[i:i + max] for i in range(0, size, max)]\n", + "```\n", + "\n", + "then the function will request the data from the api and concatenate the dataframes into a single dataframe:\n", + "\n", + "```python\n", + "frames = request_clinvar_api_data(join)\n", + "variations = pd.concat([variations, frames], ignore_index=True)\n", + "```\n", + "\n", + "The variant extraction function contains a lot of nested lists and dictionaries, so the function will flatten the data and then concatenate the dataframes into a single dataframe\n", + "\n", + "**NOTE**\n", + "\n", + "> joining function may have been implemented wrong due to the waiting time of the api.\n" + ], + "id": "655a935b2874c218" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-08-07T12:57:28.089588Z", + "start_time": "2024-08-07T12:55:09.972813Z" + } + }, + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "\n", + "variations = pd.DataFrame()\n", + "\n", + "max = 500\n", + "name = \"EYS\"\n", + "count = 2147483647\n", + "\n", + "id_array = get_variant_ids_from_clinvar_name_api(name, count)\n", + "size = int(id_array['count'])\n", + "id_list = id_array['idlist']\n", + "\n", + "id_lists = [id_list[i:i + max] for i in range(0, size, max)]\n", + "\n", + "track = 0\n", + "for lists in id_lists:\n", + " join = \",\".join(lists)\n", + " frame = request_clinvar_api_data(join)\n", + " \n", + " variations = pd.concat([variations, frame], ignore_index=True)\n", + " \n", + " print(f\"{track + 1}/{len(id_lists)}\")\n", + " track += 1\n", + "\n", + "display(variations)\n" ], + "id": "129175e3a2e568be", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Genes\n" - ] - }, - { - "data": { - "text/plain": [ - " id name ... updated_by updated_date\n", - "0 EYS eyes shut homolog (Drosophila) ... 00006 2024-05-31 11:42:04\n", - "\n", - "[1 rows x 34 columns]" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamechromosomechrom_bandimprintingrefseq_genomicrefseq_UDreferenceurl_homepageurl_externalallow_downloadid_hgncid_entrezid_omimshow_hgmdshow_genecardsshow_genetestsshow_orphanetnote_indexnote_listingrefseqrefseq_urldisclaimerdisclaimer_textheaderheader_alignfooterfooter_aligncreated_bycreated_dateedited_byedited_dateupdated_byupdated_date
0EYSeyes shut homolog (Drosophila)6q12unknownNG_023443.2UD_132085377375http://www.LOVD.nl/EYS1215553460076124241111<font color=\\\"#FF0000\\\">This database is one o...ghttp://databases.lovd.nl/shared/refseq/EYS_NM_...1<font color=\\\"#FF0000\\\">This database is one o...-1-1000012012-02-13 00:00:00000062023-08-30 13:08:19000062024-05-31 11:42:04
\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Transcripts\n" + "1/10\n", + "2/10\n", + "3/10\n", + "4/10\n", + "5/10\n", + "6/10\n", + "7/10\n", + "8/10\n", + "9/10\n", + "10/10\n" ] }, { "data": { "text/plain": [ - " id geneid ... edited_by edited_date\n", - "0 00007329 EYS ... \n", + " uid obj_type ... gene_1029_strand gene_1029_source\n", + "0 3251429 single nucleotide variant ... NaN NaN\n", + "1 3246148 Deletion ... NaN NaN\n", + "2 3246147 Deletion ... NaN NaN\n", + "3 3246146 Deletion ... NaN NaN\n", + "4 3246145 Deletion ... NaN NaN\n", + "... ... ... ... ... ...\n", + "4778 538 single nucleotide variant ... NaN NaN\n", + "4779 537 single nucleotide variant ... NaN NaN\n", + "4780 536 Deletion ... NaN NaN\n", + "4781 535 Deletion ... NaN NaN\n", + "4782 534 Deletion ... NaN NaN\n", "\n", - "[1 rows x 19 columns]" + "[4783 rows x 4428 columns]" ], "text/html": [ "
\n", @@ -230,3034 +376,951 @@ " \n", " \n", " \n", - " id\n", - " geneid\n", - " name\n", - " id_mutalyzer\n", - " id_ncbi\n", - " id_ensembl\n", - " id_protein_ncbi\n", - " id_protein_ensembl\n", - " id_protein_uniprot\n", - " remarks\n", - " position_c_mrna_start\n", - " position_c_mrna_end\n", - " position_c_cds_end\n", - " position_g_mrna_start\n", - " position_g_mrna_end\n", - " created_by\n", - " created_date\n", - " edited_by\n", - " edited_date\n", + " uid\n", + " obj_type\n", + " accession\n", + " accession_version\n", + " title\n", + " record_status\n", + " gene_sort\n", + " chr_sort\n", + " location_sort\n", + " variation_set_name\n", + " variation_set_id\n", + " molecular_consequence_list\n", + " protein_change\n", + " fda_recognized_database\n", + " supporting_submissions_scv\n", + " supporting_submissions_rcv\n", + " germline_classification_description\n", + " germline_classification_last_evaluated\n", + " germline_classification_review_status\n", + " germline_classification_fda_recognized_database\n", + " clinical_impact_classification_description\n", + " clinical_impact_classification_last_evaluated\n", + " clinical_impact_classification_review_status\n", + " clinical_impact_classification_fda_recognized_database\n", + " clinical_impact_classification_trait_set\n", + " oncogenicity_classification_description\n", + " oncogenicity_classification_last_evaluated\n", + " oncogenicity_classification_review_status\n", + " oncogenicity_classification_fda_recognized_database\n", + " oncogenicity_classification_trait_set\n", + " variation_set_0_measure_id\n", + " variation_set_0_variation_name\n", + " variation_set_0_cdna_change\n", + " variation_set_0_aliases\n", + " variation_set_0_variant_type\n", + " variation_set_0_canonical_spdi\n", + " variation_set_0_loc_0_status\n", + " variation_set_0_loc_0_assembly_name\n", + " variation_set_0_loc_0_chr\n", + " variation_set_0_loc_0_band\n", + " ...\n", + " gene_1020_symbol\n", + " gene_1020_geneid\n", + " gene_1020_strand\n", + " gene_1020_source\n", + " gene_1021_symbol\n", + " gene_1021_geneid\n", + " gene_1021_strand\n", + " gene_1021_source\n", + " gene_1022_symbol\n", + " gene_1022_geneid\n", + " gene_1022_strand\n", + " gene_1022_source\n", + " gene_1023_symbol\n", + " gene_1023_geneid\n", + " gene_1023_strand\n", + " gene_1023_source\n", + " gene_1024_symbol\n", + " gene_1024_geneid\n", + " gene_1024_strand\n", + " gene_1024_source\n", + " gene_1025_symbol\n", + " gene_1025_geneid\n", + " gene_1025_strand\n", + " gene_1025_source\n", + " gene_1026_symbol\n", + " gene_1026_geneid\n", + " gene_1026_strand\n", + " gene_1026_source\n", + " gene_1027_symbol\n", + " gene_1027_geneid\n", + " gene_1027_strand\n", + " gene_1027_source\n", + " gene_1028_symbol\n", + " gene_1028_geneid\n", + " gene_1028_strand\n", + " gene_1028_source\n", + " gene_1029_symbol\n", + " gene_1029_geneid\n", + " gene_1029_strand\n", + " gene_1029_source\n", " \n", " \n", " \n", " \n", " 0\n", - " 00007329\n", + " 3251429\n", + " single nucleotide variant\n", + " VCV003251429\n", + " VCV003251429.\n", + " NM_001142800.2(EYS):c.5886T>C (p.Thr1962=)\n", + " \n", " EYS\n", - " transcript variant 1\n", - " 001\n", - " NM_001142800.1\n", + " 06\n", + " 00000000000064436215\n", + " \n", + " \n", + " [synonymous variant]\n", + " \n", + " \n", + " [SCV005076913]\n", + " [RCV004587835]\n", + " Likely benign\n", + " 2024/04/08 00:00\n", + " criteria provided, single submitter\n", " \n", - " NP_001136272.1\n", " \n", + " 1/01/01 00:00\n", " \n", " \n", - " -538\n", - " 10051\n", - " 9435\n", - " 66417118\n", - " 64429876\n", + " []\n", " \n", - " 0000-00-00 00:00:00\n", + " 1/01/01 00:00\n", " \n", " \n", + " []\n", + " 3410228\n", + " NM_001142800.2(EYS):c.5886T>C (p.Thr1962=)\n", + " c.5886T>C\n", + " []\n", + " single nucleotide variant\n", + " NC_000006.12:64436214:A:G\n", + " current\n", + " GRCh38\n", + " 6\n", + " 6q12\n", + " ...\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", - " \n", - "\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Diseases\n" - ] - }, - { - "data": { - "text/plain": [ - " id symbol ... edited_by edited_date\n", - "0 00012 PSORS ... 00006 2019-08-12 13:38:21\n", - "1 00058 CORD ... 00006 2020-08-30 09:43:59\n", - "2 00112 RP ... 00006 2021-01-18 09:53:26\n", - "3 00139 ID ... 00006 2015-02-09 10:02:49\n", - "4 00173 SLOS ... 00006 2021-12-10 21:51:32\n", - "5 00198 ? ... 00006 2016-10-22 17:54:40\n", - "6 02156 - ... 00006 2021-12-10 21:51:32\n", - "7 02440 RP25 ... 00006 2021-12-10 21:51:32\n", - "8 04211 RPar ... \n", - "9 04214 - ... 00001 2023-03-09 14:26:26\n", - "10 04249 macular dystrophy ... 00006 2024-02-15 21:18:39\n", - "11 05086 HL ... 00006 2015-10-23 11:43:00\n", - "12 05415 USH ... \n", - "13 05468 uveitis ... \n", - "14 06906 DEE ... \n", - "\n", - "[15 rows x 12 columns]" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", " \n", " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", " \n", " \n", + " \n", " \n", - " \n", - " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", + " \n", " \n", " \n", + " \n", " \n", + " \n", " \n", - " \n", - " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - "
idsymbolnameinheritanceid_omimtissuesfeaturesremarkscreated_bycreated_dateedited_byedited_date
000012PSORSpsoriasis, pustular, generalized (PSORS)13246148DeletionVCV003246148VCV003246148.NC_000006.11:g.(?_66204859)_(66217229_?)delEYS0699999999999999999999[]000062012-07-06 21:50:32000062019-08-12 13:38:21
100058CORDdystrophy, cone-rod (CORD)[SCV005067530][RCV004578792]Pathogenic2023/01/02 00:00criteria provided, single submitter1/01/01 00:00[]1/01/01 00:00[]3403857NC_000006.11:g.(?_66204859)_(66217229_?)delNC_000006.11:g.(?_66204859)_(66217229_?)del[]Deletion000062012-09-22 11:31:25000062020-08-30 09:43:59previousGRCh3766q12...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
200112RPretinitis pigmentosa (RP)3246147DeletionVCV003246147VCV003246147.NC_000006.11:g.(?_64511633)_(64516181_?)delEYS0699999999999999999999[]268000[SCV005067529][RCV004578791]Likely pathogenic2023/03/08 00:00criteria provided, single submitter1/01/01 00:00[]1/01/01 00:00[]3403856NC_000006.11:g.(?_64511633)_(64516181_?)delNC_000006.11:g.(?_64511633)_(64516181_?)del[]Deletion000012013-02-21 17:12:36000062021-01-18 09:53:26previousGRCh3766q12...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
300139IDintellectual disability (ID)3246146DeletionVCV003246146VCV003246146.NC_000006.11:g.(?_65523280)_(65527746_?)delEYS0699999999999999999999[][SCV005067528][RCV004578790]Likely pathogenic2023/04/30 00:00criteria provided, single submitter1/01/01 00:00[]1/01/01 00:00[]3403855NC_000006.11:g.(?_65523280)_(65527746_?)delNC_000006.11:g.(?_65523280)_(65527746_?)del[]Deletion000842013-06-04 18:18:07000062015-02-09 10:02:49previousGRCh3766q12...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
400173SLOSSmith-Lemli-Opitz syndrome (SLOS)AR2704003246145DeletionVCV003246145VCV003246145.NC_000006.11:g.(?_65587645)_(65596716_?)delEYS0699999999999999999999[]000062013-08-01 11:16:14000062021-12-10 21:51:32
500198?unclassified / mixed[SCV005067527][RCV004578789]Likely pathogenic2023/06/27 00:00criteria provided, single submitter1/01/01 00:00000062013-09-13 14:21:47000062016-10-22 17:54:40
602156-retinitis pigmentosa, X-linked, and sinorespir...[]3004551/01/01 00:00[]3403854NC_000006.11:g.(?_65587645)_(65596716_?)delNC_000006.11:g.(?_65587645)_(65596716_?)del[]Deletion000062014-09-25 23:29:40000062021-12-10 21:51:32previousGRCh3766q12...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
......................................................................................................................................................................................................................................................
702440RP25retinitis pigmentosa, type 25 (RP25)AR6027724778538single nucleotide variantVCV000000538VCV000000538.NM_001142800.2(EYS):c.9405T>A (p.Tyr3135Ter)EYS0600000000000063720626000062014-09-25 23:29:40000062021-12-10 21:51:32
804211RParretinitis pigmentosa, autosomal recessive (RPar)[3 prime UTR variant, nonsense]Y3156*, Y3135*[SCV000020717, SCV000894389, SCV000709692, SCV...[RCV000000568, RCV000593252, RCV003914789, RCV...Pathogenic/Likely pathogenic2024/03/09 00:00criteria provided, multiple submitters, no con...1/01/01 00:00000062015-02-27 18:58:57[]1/01/01 00:00
904214-retinal disease[]15577NM_001142800.2(EYS):c.9405T>A (p.Tyr3135Ter)c.9405T>A[]single nucleotide variantNC_000006.12:63720625:A:TcurrentGRCh3866q12...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4779537single nucleotide variantVCV000000537VCV000000537.NM_001142800.2(EYS):c.5857G>T (p.Glu1953Ter)EYS0600000000000064436244[nonsense]E1953*000062015-02-27 19:48:07000012023-03-09 14:26:26
1004249macular dystrophydystrophy, macular[SCV000020716, SCV002519636, SCV004195857, SCV...[RCV000000567, RCV001387157]Pathogenic2024/02/15 00:00criteria provided, multiple submitters, no con...1/01/01 00:00[]000062015-05-04 22:10:58000062024-02-15 21:18:39
1105086HLhearing loss (HL)1/01/01 00:00[]15576NM_001142800.2(EYS):c.5857G>T (p.Glu1953Ter)c.5857G>T[]single nucleotide variantNC_000006.12:64436243:C:AcurrentGRCh3866q12...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4780536DeletionVCV000000536VCV000000536.NM_001142800.1(EYS):c.1767-24596_2023+238135delLOC4411550600000000000065057728[]000062015-10-23 11:41:05000062015-10-23 11:43:00
1205415USHUsher syndrome (USH)[SCV000020715][RCV000000566]Pathogenic2008/11/01 00:00no assertion criteria provided1/01/01 00:00000062018-04-02 16:40:44[]1/01/01 00:00
1305468uveitisuveitis[]15575NM_001142800.1(EYS):c.1767-24596_2023+238135delNM_001142800.1(EYS):c.1767-24596_2023+238135del[EX12DEL]DeletioncurrentGRCh3866q12...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4781535DeletionVCV000000535VCV000000535.NM_001142800.1(EYS):c.2260-51191_2992+45990delEYS0600000000000064840707[]000062018-08-22 09:47:04[SCV000020714][RCV000000565]Pathogenic2008/11/01 00:00no assertion criteria provided
1406906DEEencephalopathy, developmental and epileptic1/01/01 00:00[]1/01/01 00:00000062022-04-07 09:24:23[]15574NM_001142800.1(EYS):c.2260-51191_2992+45990delNM_001142800.1(EYS):c.2260-51191_2992+45990del[EX15-19DEL]DeletioncurrentGRCh3866q12...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4782534DeletionVCV000000534VCV000000534.NM_001142800.2(EYS):c.2710_2726del (p.Asp904fs)
\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Genes_To_Diseases\n" - ] - }, - { - "data": { - "text/plain": [ - " geneid diseaseid\n", - "0 EYS 00112\n", - "1 EYS 02440" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
geneiddiseaseid
0EYS00112
1EYS02440
\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Individuals\n" - ] - }, - { - "data": { - "text/plain": [ - " id fatherid ... Individual/Origin/Population Individual/Individual_ID\n", - "0 00000135 ... \n", - "1 00000210 ... \n", - "2 00001962 ... white \n", - "3 00016605 ... \n", - "4 00033096 ... \n", - "... ... ... ... ... ...\n", - "1460 00451166 ... 071788\n", - "1461 00451252 ... 072857\n", - "1462 00451259 ... 073069\n", - "1463 00451333 ... 075139\n", - "1464 00451348 ... 080622\n", - "\n", - "[1465 rows x 18 columns]" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idfatheridmotheridpanelidpanel_sizelicenseowned_byIndividual/ReferenceIndividual/RemarksIndividual/GenderIndividual/ConsanguinityIndividual/Origin/GeographicIndividual/Age_of_deathIndividual/VIPIndividual/Data_avIndividual/TreatmentIndividual/Origin/PopulationIndividual/Individual_ID
000000135300006{PMID:Marrakchi 2011:21848462}5-generation family, 3 affecteds (M)MyesTunisia
100000210100039{PMID:Abu-Safieh-2013:23105016}(Saudi Arabia)
200001962100025M?Germanywhite
3000166051005520
400033096100229{PMID:Neveling 2012:22334370}Mno0
.........................................................
146000451166100006{PMID:Hitti-Malin 2024:38540785}, {DOI:Hitti-M...0071788
146100451252100006{PMID:Hitti-Malin 2024:38540785}, {DOI:Hitti-M...0072857
146200451259100006{PMID:Hitti-Malin 2024:38540785}, {DOI:Hitti-M...0073069
146300451333100006{PMID:Hitti-Malin 2024:38540785}, {DOI:Hitti-M...0075139
146400451348100006{PMID:Hitti-Malin 2024:38540785}, {DOI:Hitti-M...0080622
\n", - "

1465 rows × 18 columns

\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Individuals_To_Diseases\n" - ] - }, - { - "data": { - "text/plain": [ - " individualid diseaseid\n", - "0 00000135 00012\n", - "1 00000210 00058\n", - "2 00001962 00173\n", - "3 00033096 04214\n", - "4 00033109 04214\n", - "... ... ...\n", - "1459 00451166 04249\n", - "1460 00451252 04249\n", - "1461 00451259 04249\n", - "1462 00451333 04249\n", - "1463 00451348 04249\n", - "\n", - "[1464 rows x 2 columns]" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
individualiddiseaseid
00000013500012
10000021000058
20000196200173
30003309604214
40003310904214
.........
14590045116604249
14600045125204249
14610045125904249
14620045133304249
14630045134804249
\n", - "

1464 rows × 2 columns

\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Phenotypes\n" - ] - }, - { - "data": { - "text/plain": [ - " id ... Phenotype/Diagnosis/Criteria\n", - "0 0000000008 ... \n", - "1 0000000026 ... \n", - "2 0000000941 ... \n", - "3 0000026525 ... \n", - "4 0000026538 ... \n", - "... ... ... ...\n", - "1272 0000339895 ... \n", - "1273 0000339896 ... \n", - "1274 0000339897 ... \n", - "1275 0000339898 ... \n", - "1276 0000339899 ... \n", - "\n", - "[1277 rows x 20 columns]" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
iddiseaseidindividualidowned_byPhenotype/InheritancePhenotype/AgePhenotype/AdditionalPhenotype/Biochem_paramPhenotype/Age/OnsetPhenotype/Age/DiagnosisPhenotype/Severity_scorePhenotype/OnsetPhenotype/ProteinPhenotype/Tumor/MSIPhenotype/Enzyme/CPKPhenotype/Heart/MyocardiumPhenotype/LungPhenotype/Diagnosis/DefinitePhenotype/Diagnosis/InitialPhenotype/Diagnosis/Criteria
00000000008000120000013500006Familial, autosomal recessive
10000000026000580000021000039Familial, autosomal recessive
20000000941001730000196200025Familial2-3 toe syndactyly5
30000026525042140003309600229Unknownretinitis pigmentosa
40000026538042140003310900229Unknownretinitis pigmentosa
...............................................................
12720000339895042490045084004405UnknownStargardt disease
12730000339896042490045084104405UnknownStargardt disease
12740000339897042490045084204405Unknowncone-rod dystrophy
12750000339898042490045084304405UnknownStargardt disease
12760000339899042490045084404405Unknownmacular dystrophy
\n", - "

1277 rows × 20 columns

\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Screenings\n" - ] - }, - { - "data": { - "text/plain": [ - " id individualid ... Screening/Tissue Screening/Remarks\n", - "0 0000000126 00000135 ... \n", - "1 0000000211 00000210 ... \n", - "2 0000001640 00001962 ... \n", - "3 0000016557 00016605 ... \n", - "4 0000033164 00033096 ... \n", - "... ... ... ... ... ...\n", - "1460 0000452765 00451166 ... smMIP-based 105 iMD/AMD genes\n", - "1461 0000452851 00451252 ... smMIP-based 105 iMD/AMD genes\n", - "1462 0000452858 00451259 ... smMIP-based 105 iMD/AMD genes\n", - "1463 0000452932 00451333 ... smMIP-based 105 iMD/AMD genes\n", - "1464 0000452947 00451348 ... smMIP-based 105 iMD/AMD genes\n", - "\n", - "[1465 rows x 12 columns]" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idindividualidvariants_foundowned_bycreated_bycreated_dateedited_byedited_dateScreening/TechniqueScreening/TemplateScreening/TissueScreening/Remarks
0000000012600000135100006000062012-07-07 19:04:19000062012-07-07 19:12:08RT-PCR;SEQDNA;RNA
1000000021100000210100039000062012-09-22 11:36:24SEQDNA
2000000164000001962100025000062010-03-11 16:36:41000252012-04-13 15:18:00SEQDNA
3000001655700016605100552005522014-05-23 13:12:43SEQ-NG-IDNA
4000003316400033096100229002292012-02-04 15:20:01000062012-05-18 13:59:33SEQ;SEQ-NG-SDNA
.......................................
1460000045276500451166100006000062024-05-31 11:39:36SEQDNAsmMIP-based 105 iMD/AMD genes
1461000045285100451252100006000062024-05-31 11:39:36SEQDNAsmMIP-based 105 iMD/AMD genes
1462000045285800451259100006000062024-05-31 11:39:36SEQDNAsmMIP-based 105 iMD/AMD genes
1463000045293200451333100006000062024-05-31 11:39:36SEQDNAsmMIP-based 105 iMD/AMD genes
1464000045294700451348100006000062024-05-31 11:39:36SEQDNAsmMIP-based 105 iMD/AMD genes
\n", - "

1465 rows × 12 columns

\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Screenings_To_Genes\n" - ] - }, - { - "data": { - "text/plain": [ - " screeningid geneid\n", - "0 0000000126 IL36RN\n", - "1 0000000211 MKS1\n", - "2 0000001640 DHCR7\n", - "3 0000033164 AHI1\n", - "4 0000033164 EYS\n", - "... ... ...\n", - "1311 0000437646 EYS\n", - "1312 0000437902 EYS\n", - "1313 0000437922 EYS\n", - "1314 0000443144 EYS\n", - "1315 0000443145 EYS\n", - "\n", - "[1316 rows x 2 columns]" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
screeningidgeneid
00000000126IL36RN
10000000211MKS1
20000001640DHCR7
30000033164AHI1
40000033164EYS
.........
13110000437646EYS
13120000437902EYS
13130000437922EYS
13140000443144EYS
13150000443145EYS
\n", - "

1316 rows × 2 columns

\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Variants_On_Genome\n" - ] - }, - { - "data": { - "text/plain": [ - " id ... VariantOnGenome/ClinicalClassification/Method\n", - "0 0000036426 ... \n", - "1 0000059881 ... \n", - "2 0000059883 ... \n", - "3 0000059884 ... \n", - "4 0000059885 ... \n", - "... ... ... ...\n", - "2555 0000987292 ... ACMG\n", - "2556 0000987305 ... ACMG\n", - "2557 0000987318 ... ACMG\n", - "2558 0000987322 ... ACMG\n", - "2559 0000987333 ... ACMG\n", - "\n", - "[2560 rows x 26 columns]" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idalleleeffectidchromosomeposition_g_startposition_g_endtypeaverage_frequencyowned_byVariantOnGenome/DBIDVariantOnGenome/DNAVariantOnGenome/FrequencyVariantOnGenome/ReferenceVariantOnGenome/Restriction_siteVariantOnGenome/Published_asVariantOnGenome/RemarksVariantOnGenome/Genetic_originVariantOnGenome/SegregationVariantOnGenome/dbSNPVariantOnGenome/VIPVariantOnGenome/MethylationVariantOnGenome/ISCNVariantOnGenome/DNA/hg38VariantOnGenome/ClinVarVariantOnGenome/ClinicalClassificationVariantOnGenome/ClinicalClassification/Method
0000003642635066449897164498971subst0.00074292200552EYS_000007g.64498971A>GGermline0g.63789078A>GVUS
1000005988135566565575865655758subst0.0011529700229EYS_000001g.65655758T>GExAC: 60, 19750, 0, 0.003038{PMID:Neveling 2012:22334370}Q770PGermlineyes0g.64945865T>GVUS
2000005988311166533614365336143subst0.22418900229EYS_000002g.65336143G>AExAC: 3936, 19366, 441, 0.2032{PMID:Neveling 2012:22334370}p.?unaffected brother also this variant homozygousGermlineno0g.64626250G>Abenign
3000005988411566530086965300869subst0.00083792800229EYS_000003g.65300869G>AExAC: 12, 19406, 0, 0.0006184{PMID:Neveling 2012:22334370}(P1631S)predicted benign, disease-related variant in o...Germline0g.64590976G>Abenign
4000005988511166501699865016999del000229EYS_000004g.65016998_65016999delExAC: 9866, 18292, 921, 0.5394{PMID:Neveling 2012:22334370}6045-4_6045-3delpredicted benignGermlineyes0g.64307105_64307106delbenign
.................................................................................
2555000098729207066443111164431111subst000006EYS_000662g.64431111C>T{PMID:Hitti-Malin 2024:38540785}, {DOI:Hitti-M...c.8879G>A (Cys2960Tyr)case unsolvedGermline0g.63721215C>Tlikely pathogenicACMG
2556000098730507066611516766115167del000006EYS_000933g.66115167del{PMID:Hitti-Malin 2024:38540785}, {DOI:Hitti-M...no variant 2nd chromosome, case unsolvedGermline0g.65405274dellikely pathogenicACMG
2557000098731807066443114864431148subst3.95361E-500006EYS_000067g.64431148A>G{PMID:Hitti-Malin 2024:38540785}, {DOI:Hitti-M...no variant 2nd chromosome, case unsolvedGermline0g.63721252A>Glikely pathogenicACMG
2558000098732209066443062664430629del000006EYS_000045g.64430626_64430629del{PMID:Hitti-Malin 2024:38540785}, {DOI:Hitti-M...no variant 2nd chromosome, case unsolvedGermline0g.63720730_63720733delpathogenicACMG
2559000098733309066443647764436477del1.349E-500006EYS_000397g.64436477del{PMID:Hitti-Malin 2024:38540785}, {DOI:Hitti-M...no variant 2nd chromosome, case unsolvedGermline0g.63726584delpathogenicACMG
\n", - "

2560 rows × 26 columns

\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Variants_On_Transcripts\n" - ] - }, - { - "data": { - "text/plain": [ - " id ... VariantOnTranscript/Exon\n", - "0 0000036426 ... 38\n", - "1 0000059881 ... 15\n", - "2 0000059883 ... 22i\n", - "3 0000059884 ... 26\n", - "4 0000059885 ... 29i\n", - "... ... ... ...\n", - "2555 0000987292 ... \n", - "2556 0000987305 ... \n", - "2557 0000987318 ... \n", - "2558 0000987322 ... \n", - "2559 0000987333 ... \n", - "\n", - "[2560 rows x 11 columns]" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idtranscriptideffectidposition_c_startposition_c_start_intronposition_c_endposition_c_end_intronVariantOnTranscript/DNAVariantOnTranscript/RNAVariantOnTranscript/ProteinVariantOnTranscript/Exon
0000003642600007329507558075580c.7558T>Cr.(?)p.(Phe2520Leu)38
1000005988100007329552309023090c.2309A>Cr.(?)p.(Gln770Pro)15
2000005988300007329113444-53444-5c.3444-5C>Tr.(?)p.(=)22i
3000005988400007329154891048910c.4891C>Tr.(?)p.(Pro1631Ser)26
4000005988500007329116079-46079-3c.6079-4_6079-3delr.(?)p.(=)29i
....................................
2555000098729200007329708816088160c.8816G>Ar.(?)p.(Cys2939Tyr)
25560000987305000073297095709570c.957delr.(?)p.(Glu319AspfsTer20)
2557000098731800007329708779087790c.8779T>Cr.(?)p.(Cys2927Arg)
2558000098732200007329909299093020c.9299_9302delr.(?)p.(Thr3100LysfsTer26)
2559000098733300007329908168081680c.8168delr.(?)p.(Gln2723ArgfsTer18)
\n", - "

2560 rows × 11 columns

\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Screenings_To_Variants\n" - ] - }, - { - "data": { - "text/plain": [ - " screeningid variantid\n", - "0 0000000126 0000783293\n", - "1 0000000211 0000790459\n", - "2 0000001640 0000235838\n", - "3 0000016557 0000036426\n", - "4 0000033164 0000059884\n", - "... ... ...\n", - "2163 0000452765 0000987322\n", - "2164 0000452851 0000987196\n", - "2165 0000452858 0000987333\n", - "2166 0000452932 0000987277\n", - "2167 0000452947 0000987292\n", - "\n", - "[2168 rows x 2 columns]" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
screeningidvariantid
000000001260000783293
100000002110000790459
200000016400000235838
300000165570000036426
400000331640000059884
.........
216300004527650000987322
216400004528510000987196
216500004528580000987333
216600004529320000987277
216700004529470000987292
\n", - "

2168 rows × 2 columns

\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "execution_count": 6 - }, - { - "cell_type": "code", - "id": "ef07740b2fa63e42", - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, - "ExecuteTime": { - "end_time": "2024-08-06T14:19:36.151167Z", - "start_time": "2024-08-06T14:19:35.969184Z" - } - }, - "source": [ - "set_lovd_dtypes(data)\n", - "for i in data:\n", - " print(i)\n", - " display(data[i].info())" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Genes\n", - "\n", - "RangeIndex: 1 entries, 0 to 0\n", - "Data columns (total 34 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 1 non-null string \n", - " 1 name 1 non-null string \n", - " 2 chromosome 1 non-null Int64 \n", - " 3 chrom_band 1 non-null string \n", - " 4 imprinting 1 non-null string \n", - " 5 refseq_genomic 1 non-null string \n", - " 6 refseq_UD 1 non-null string \n", - " 7 reference 1 non-null string \n", - " 8 url_homepage 1 non-null string \n", - " 9 url_external 1 non-null string \n", - " 10 allow_download 1 non-null bool \n", - " 11 id_hgnc 1 non-null Int64 \n", - " 12 id_entrez 1 non-null Int64 \n", - " 13 id_omim 1 non-null Int64 \n", - " 14 show_hgmd 1 non-null bool \n", - " 15 show_genecards 1 non-null bool \n", - " 16 show_genetests 1 non-null bool \n", - " 17 show_orphanet 1 non-null bool \n", - " 18 note_index 1 non-null string \n", - " 19 note_listing 1 non-null string \n", - " 20 refseq 1 non-null string \n", - " 21 refseq_url 1 non-null string \n", - " 22 disclaimer 1 non-null bool \n", - " 23 disclaimer_text 1 non-null string \n", - " 24 header 1 non-null string \n", - " 25 header_align 1 non-null Int64 \n", - " 26 footer 1 non-null string \n", - " 27 footer_align 1 non-null Int64 \n", - " 28 created_by 1 non-null Int64 \n", - " 29 created_date 1 non-null datetime64[ns]\n", - " 30 edited_by 1 non-null Int64 \n", - " 31 edited_date 1 non-null datetime64[ns]\n", - " 32 updated_by 1 non-null Int64 \n", - " 33 updated_date 1 non-null datetime64[ns]\n", - "dtypes: Int64(9), bool(6), datetime64[ns](3), string(16)\n", - "memory usage: 371.0 bytes\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Transcripts\n", - "\n", - "RangeIndex: 1 entries, 0 to 0\n", - "Data columns (total 19 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 1 non-null Int64 \n", - " 1 geneid 1 non-null string \n", - " 2 name 1 non-null string \n", - " 3 id_mutalyzer 1 non-null Int64 \n", - " 4 id_ncbi 1 non-null string \n", - " 5 id_ensembl 1 non-null string \n", - " 6 id_protein_ncbi 1 non-null string \n", - " 7 id_protein_ensembl 1 non-null string \n", - " 8 id_protein_uniprot 1 non-null string \n", - " 9 remarks 1 non-null string \n", - " 10 position_c_mrna_start 1 non-null Int64 \n", - " 11 position_c_mrna_end 1 non-null Int64 \n", - " 12 position_c_cds_end 1 non-null Int64 \n", - " 13 position_g_mrna_start 1 non-null Int64 \n", - " 14 position_g_mrna_end 1 non-null Int64 \n", - " 15 created_by 0 non-null Int64 \n", - " 16 created_date 0 non-null datetime64[ns]\n", - " 17 edited_by 0 non-null Int64 \n", - " 18 edited_date 0 non-null datetime64[ns]\n", - "dtypes: Int64(9), datetime64[ns](2), string(8)\n", - "memory usage: 293.0 bytes\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Diseases\n", - "\n", - "RangeIndex: 15 entries, 0 to 14\n", - "Data columns (total 12 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 15 non-null Int64 \n", - " 1 symbol 15 non-null string \n", - " 2 name 15 non-null string \n", - " 3 inheritance 15 non-null string \n", - " 4 id_omim 4 non-null Int64 \n", - " 5 tissues 15 non-null string \n", - " 6 features 15 non-null string \n", - " 7 remarks 15 non-null string \n", - " 8 created_by 15 non-null Int64 \n", - " 9 created_date 15 non-null datetime64[ns]\n", - " 10 edited_by 11 non-null Int64 \n", - " 11 edited_date 11 non-null datetime64[ns]\n", - "dtypes: Int64(4), datetime64[ns](2), string(6)\n", - "memory usage: 1.6 KB\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Genes_To_Diseases\n", - "\n", - "RangeIndex: 2 entries, 0 to 1\n", - "Data columns (total 2 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 geneid 2 non-null string\n", - " 1 diseaseid 2 non-null Int64 \n", - "dtypes: Int64(1), string(1)\n", - "memory usage: 166.0 bytes\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Individuals\n", - "\n", - "RangeIndex: 1465 entries, 0 to 1464\n", - "Data columns (total 18 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 1465 non-null Int64 \n", - " 1 fatherid 1465 non-null string\n", - " 2 motherid 1465 non-null string\n", - " 3 panelid 6 non-null Int64 \n", - " 4 panel_size 1465 non-null Int64 \n", - " 5 license 1465 non-null string\n", - " 6 owned_by 1465 non-null Int64 \n", - " 7 Individual/Reference 1465 non-null string\n", - " 8 Individual/Remarks 1465 non-null string\n", - " 9 Individual/Gender 1465 non-null string\n", - " 10 Individual/Consanguinity 1465 non-null string\n", - " 11 Individual/Origin/Geographic 1465 non-null string\n", - " 12 Individual/Age_of_death 1465 non-null string\n", - " 13 Individual/VIP 1465 non-null string\n", - " 14 Individual/Data_av 1465 non-null string\n", - " 15 Individual/Treatment 1465 non-null string\n", - " 16 Individual/Origin/Population 1465 non-null string\n", - " 17 Individual/Individual_ID 1465 non-null string\n", - "dtypes: Int64(4), string(14)\n", - "memory usage: 211.9 KB\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Individuals_To_Diseases\n", - "\n", - "RangeIndex: 1464 entries, 0 to 1463\n", - "Data columns (total 2 columns):\n", - " # Column Non-Null Count Dtype\n", - "--- ------ -------------- -----\n", - " 0 individualid 1464 non-null Int64\n", - " 1 diseaseid 1464 non-null Int64\n", - "dtypes: Int64(2)\n", - "memory usage: 25.9 KB\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Phenotypes\n", - "\n", - "RangeIndex: 1277 entries, 0 to 1276\n", - "Data columns (total 20 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 1277 non-null Int64 \n", - " 1 diseaseid 1277 non-null Int64 \n", - " 2 individualid 1277 non-null Int64 \n", - " 3 owned_by 1277 non-null Int64 \n", - " 4 Phenotype/Inheritance 1277 non-null string\n", - " 5 Phenotype/Age 1277 non-null string\n", - " 6 Phenotype/Additional 1277 non-null string\n", - " 7 Phenotype/Biochem_param 1277 non-null string\n", - " 8 Phenotype/Age/Onset 1277 non-null string\n", - " 9 Phenotype/Age/Diagnosis 1277 non-null string\n", - " 10 Phenotype/Severity_score 1277 non-null string\n", - " 11 Phenotype/Onset 1277 non-null string\n", - " 12 Phenotype/Protein 1277 non-null string\n", - " 13 Phenotype/Tumor/MSI 1277 non-null string\n", - " 14 Phenotype/Enzyme/CPK 1277 non-null string\n", - " 15 Phenotype/Heart/Myocardium 1277 non-null string\n", - " 16 Phenotype/Lung 1277 non-null string\n", - " 17 Phenotype/Diagnosis/Definite 1277 non-null string\n", - " 18 Phenotype/Diagnosis/Initial 1277 non-null string\n", - " 19 Phenotype/Diagnosis/Criteria 1277 non-null string\n", - "dtypes: Int64(4), string(16)\n", - "memory usage: 204.6 KB\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Screenings\n", - "\n", - "RangeIndex: 1465 entries, 0 to 1464\n", - "Data columns (total 12 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 1465 non-null Int64 \n", - " 1 individualid 1465 non-null Int64 \n", - " 2 variants_found 1465 non-null Int64 \n", - " 3 owned_by 1465 non-null Int64 \n", - " 4 created_by 1465 non-null Int64 \n", - " 5 created_date 1465 non-null datetime64[ns]\n", - " 6 edited_by 15 non-null Int64 \n", - " 7 edited_date 15 non-null datetime64[ns]\n", - " 8 Screening/Technique 1465 non-null string \n", - " 9 Screening/Template 1465 non-null string \n", - " 10 Screening/Tissue 1465 non-null string \n", - " 11 Screening/Remarks 1465 non-null string \n", - "dtypes: Int64(6), datetime64[ns](2), string(4)\n", - "memory usage: 146.1 KB\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Screenings_To_Genes\n", - "\n", - "RangeIndex: 1316 entries, 0 to 1315\n", - "Data columns (total 2 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 screeningid 1316 non-null Int64 \n", - " 1 geneid 1316 non-null string\n", - "dtypes: Int64(1), string(1)\n", - "memory usage: 22.0 KB\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Variants_On_Genome\n", - "\n", - "RangeIndex: 2560 entries, 0 to 2559\n", - "Data columns (total 26 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 2560 non-null Int64 \n", - " 1 allele 2560 non-null Int64 \n", - " 2 effectid 2560 non-null Int64 \n", - " 3 chromosome 2560 non-null Int64 \n", - " 4 position_g_start 2559 non-null Int64 \n", - " 5 position_g_end 2559 non-null Int64 \n", - " 6 type 2560 non-null string \n", - " 7 average_frequency 2559 non-null float64\n", - " 8 owned_by 2560 non-null Int64 \n", - " 9 VariantOnGenome/DBID 2560 non-null string \n", - " 10 VariantOnGenome/DNA 2560 non-null string \n", - " 11 VariantOnGenome/Frequency 2560 non-null string \n", - " 12 VariantOnGenome/Reference 2560 non-null string \n", - " 13 VariantOnGenome/Restriction_site 2560 non-null string \n", - " 14 VariantOnGenome/Published_as 2560 non-null string \n", - " 15 VariantOnGenome/Remarks 2560 non-null string \n", - " 16 VariantOnGenome/Genetic_origin 2560 non-null string \n", - " 17 VariantOnGenome/Segregation 2560 non-null string \n", - " 18 VariantOnGenome/dbSNP 2560 non-null string \n", - " 19 VariantOnGenome/VIP 2560 non-null string \n", - " 20 VariantOnGenome/Methylation 2560 non-null string \n", - " 21 VariantOnGenome/ISCN 2560 non-null string \n", - " 22 VariantOnGenome/DNA/hg38 2560 non-null string \n", - " 23 VariantOnGenome/ClinVar 2560 non-null string \n", - " 24 VariantOnGenome/ClinicalClassification 2560 non-null string \n", - " 25 VariantOnGenome/ClinicalClassification/Method 2560 non-null string \n", - "dtypes: Int64(7), float64(1), string(18)\n", - "memory usage: 537.6 KB\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Variants_On_Transcripts\n", - "\n", - "RangeIndex: 2560 entries, 0 to 2559\n", - "Data columns (total 11 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 id 2560 non-null Int64 \n", - " 1 transcriptid 2560 non-null Int64 \n", - " 2 effectid 2560 non-null Int64 \n", - " 3 position_c_start 2559 non-null Int64 \n", - " 4 position_c_start_intron 2560 non-null Int64 \n", - " 5 position_c_end 2559 non-null Int64 \n", - " 6 position_c_end_intron 2560 non-null Int64 \n", - " 7 VariantOnTranscript/DNA 2560 non-null string\n", - " 8 VariantOnTranscript/RNA 2560 non-null string\n", - " 9 VariantOnTranscript/Protein 2560 non-null string\n", - " 10 VariantOnTranscript/Exon 2560 non-null string\n", - "dtypes: Int64(7), string(4)\n", - "memory usage: 237.6 KB\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Screenings_To_Variants\n", - "\n", - "RangeIndex: 2168 entries, 0 to 2167\n", - "Data columns (total 2 columns):\n", - " # Column Non-Null Count Dtype\n", - "--- ------ -------------- -----\n", - " 0 screeningid 2168 non-null Int64\n", - " 1 variantid 2168 non-null Int64\n", - "dtypes: Int64(2)\n", - "memory usage: 38.2 KB\n" - ] - }, - { - "data": { - "text/plain": [ - "None" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "execution_count": 7 - }, - { - "cell_type": "code", - "id": "c968af1617be40db", - "metadata": { - "ExecuteTime": { - "end_time": "2024-08-06T14:19:36.920697Z", - "start_time": "2024-08-06T14:19:36.151683Z" - } - }, - "source": [ - "save_lovd_as_vcf(data[\"Variants_On_Genome\"], \"./lovd.vcf\")" - ], - "outputs": [ - { - "ename": "KeyError", - "evalue": "'Variants_On_Genome'", - "output_type": "error", - "traceback": [ - "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", - "\u001B[1;31mKeyError\u001B[0m Traceback (most recent call last)", - "File \u001B[1;32m~\\PycharmProjects\\KathChatGPT\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:3805\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 3804\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m-> 3805\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mcasted_key\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 3806\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n", - "File \u001B[1;32mindex.pyx:167\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[1;34m()\u001B[0m\n", - "File \u001B[1;32mindex.pyx:196\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[1;34m()\u001B[0m\n", - "File \u001B[1;32mpandas\\\\_libs\\\\hashtable_class_helper.pxi:7081\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[1;34m()\u001B[0m\n", - "File \u001B[1;32mpandas\\\\_libs\\\\hashtable_class_helper.pxi:7089\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[1;34m()\u001B[0m\n", - "\u001B[1;31mKeyError\u001B[0m: 'Variants_On_Genome'", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001B[1;31mKeyError\u001B[0m Traceback (most recent call last)", - "Cell \u001B[1;32mIn[8], line 1\u001B[0m\n\u001B[1;32m----> 1\u001B[0m \u001B[43msave_lovd_as_vcf\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdata\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mVariants_On_Genome\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43m./lovd.vcf\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n", - "File \u001B[1;32m~\\PycharmProjects\\kath\\api\\data\\refactoring.py:140\u001B[0m, in \u001B[0;36msave_lovd_as_vcf\u001B[1;34m(data, save_to)\u001B[0m\n\u001B[0;32m 134\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21msave_lovd_as_vcf\u001B[39m(data, save_to\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m./lovd.vcf\u001B[39m\u001B[38;5;124m\"\u001B[39m):\n\u001B[0;32m 135\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[0;32m 136\u001B[0m \u001B[38;5;124;03m Gets hg38 variants from LOVD and saves as VCF file.\u001B[39;00m\n\u001B[0;32m 137\u001B[0m \u001B[38;5;124;03m :param DataFrame data: LOVD DataFrame with data\u001B[39;00m\n\u001B[0;32m 138\u001B[0m \u001B[38;5;124;03m :param str save_to: path where to save VCF file.\u001B[39;00m\n\u001B[0;32m 139\u001B[0m \u001B[38;5;124;03m \"\"\"\u001B[39;00m\n\u001B[1;32m--> 140\u001B[0m df \u001B[38;5;241m=\u001B[39m \u001B[43mdata\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mVariants_On_Genome\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\n\u001B[0;32m 141\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mVariantOnGenome/DNA/hg38\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;129;01min\u001B[39;00m df\u001B[38;5;241m.\u001B[39mcolumns:\n\u001B[0;32m 142\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mVariantOnGenome/DNA/hg38 is not in the LOVD DataFrame.\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n", - "File \u001B[1;32m~\\PycharmProjects\\KathChatGPT\\.venv\\Lib\\site-packages\\pandas\\core\\frame.py:4102\u001B[0m, in \u001B[0;36mDataFrame.__getitem__\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 4100\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mnlevels \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[0;32m 4101\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_getitem_multilevel(key)\n\u001B[1;32m-> 4102\u001B[0m indexer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcolumns\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 4103\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m is_integer(indexer):\n\u001B[0;32m 4104\u001B[0m indexer \u001B[38;5;241m=\u001B[39m [indexer]\n", - "File \u001B[1;32m~\\PycharmProjects\\KathChatGPT\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:3812\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 3807\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(casted_key, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;129;01mor\u001B[39;00m (\n\u001B[0;32m 3808\u001B[0m \u001B[38;5;28misinstance\u001B[39m(casted_key, abc\u001B[38;5;241m.\u001B[39mIterable)\n\u001B[0;32m 3809\u001B[0m \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28many\u001B[39m(\u001B[38;5;28misinstance\u001B[39m(x, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;28;01mfor\u001B[39;00m x \u001B[38;5;129;01min\u001B[39;00m casted_key)\n\u001B[0;32m 3810\u001B[0m ):\n\u001B[0;32m 3811\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m InvalidIndexError(key)\n\u001B[1;32m-> 3812\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[0;32m 3813\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n\u001B[0;32m 3814\u001B[0m \u001B[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001B[39;00m\n\u001B[0;32m 3815\u001B[0m \u001B[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001B[39;00m\n\u001B[0;32m 3816\u001B[0m \u001B[38;5;66;03m# the TypeError.\u001B[39;00m\n\u001B[0;32m 3817\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_check_indexing_error(key)\n", - "\u001B[1;31mKeyError\u001B[0m: 'Variants_On_Genome'" - ] - } - ], - "execution_count": 8 - }, - { - "cell_type": "code", - "id": "c7ff16903e0c52bd", - "metadata": { - "ExecuteTime": { - "end_time": "2024-08-06T14:19:36.921706Z", - "start_time": "2024-08-06T14:19:36.921706Z" - } - }, - "source": [ - "from subprocess import Popen\n", - "\n", - "\n", - "process = Popen(\"spliceai -I ./lovd.vcf -O ./lovd_output.vcf -R ../tools/spliceai/hg38.fa -A grch38\".split())\n", - "process.wait()" - ], - "outputs": [], - "execution_count": null - }, - { - "cell_type": "code", - "id": "0514ccc3-5c91-41ad-ab15-f4158030ea14", - "metadata": {}, - "source": [ - "from api.tools import get_revel_scores\n", - "\n", - "chromosome = 6\n", - "position = 65655758\n", - "\n", - "results = get_revel_scores(chromosome, position)\n", - "\n", - "display(results)" - ], - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "from api.data import request_clinvar_api_data\n", - "\n", - "some_id = 1519786\n", - "try:\n", - " frame = request_clinvar_api_data(some_id)\n", - " display(frame)\n", - "except Exception as e:\n", - " print(e)\n" - ], - "id": "576b841842a7ab61", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "import requests\n", - "from api.data import request_clinvar_api_data\n", - "\n", - "gene_id = '1519786'\n", - "# with gene_id = '1519787' error is raised\n", - "\n", - "#TODO inside request_clinvar_api_data\n", - "# 1. dinamically expand genes to dataframe (might be one, might be more)\n", - "# 2. dinamically expand variation_loc to dataframe (might be one, might be more)\n", - "frames = request_clinvar_api_data(gene_id)\n", - "\n", - "display(frames)" - ], - "id": "b21c3487476b684f", - "outputs": [], - "execution_count": null - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2024-08-06T14:58:14.898227Z", - "start_time": "2024-08-06T14:58:14.228473Z" - } - }, - "cell_type": "code", - "source": [ - "import requests\n", - "\n", - "gene_id = '1519785'\n", - "\n", - "path = f\"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=clinvar&id={gene_id}&retmode=json\"\n", - "\n", - "request = requests.get(path)\n", - "\n", - "if request.status_code != 200:\n", - " raise ValueError(f\"Request failed with status code {request.status_code}\")\n", - "\n", - "data = request.json()\n", - "\n", - " # Extract the 'result' part of the JSON\n", - "results = data['result']\n", - "\n", - "# Extract the 'uids' part of the JSON\n", - "flattened_data = []\n", - "\n", - "for uid in results['uids']:\n", - " entry = results[uid]\n", - "\n", - " # Using pd.json_normalize to flatten the JSON data\n", - " flattened_entry = pd.json_normalize(entry, sep='_')\n", - "\n", - " flattened_variation_set = pd.json_normalize(flattened_entry['variation_set'][0], sep='_')\n", - " flattened_variation_xrefs = pd.json_normalize(flattened_variation_set['variation_xrefs'][0], sep='_')\n", - " \n", - " variation_loc_size = len(flattened_variation_set['variation_loc'][0]) \n", - " for i in range(variation_loc_size):\n", - " flattened_variation_loc = pd.json_normalize(flattened_variation_set['variation_loc'][0][i], sep='_')\n", - " flattened_variation_loc = flattened_variation_loc.add_prefix(f'{i}_')\n", - " flattened_variation_set = pd.concat([flattened_variation_set, flattened_variation_loc], axis=1)\n", - " \n", - " allele_freq_set_size = len(flattened_variation_set['allele_freq_set'][0])\n", - " for i in range(allele_freq_set_size):\n", - " flattened_allele_freq_set = pd.json_normalize(flattened_variation_set['allele_freq_set'][0][i], sep='_')\n", - " flattened_allele_freq_set = flattened_allele_freq_set.add_prefix(f'{i}_')\n", - " flattened_variation_set = pd.concat([flattened_variation_set, flattened_allele_freq_set], axis=1)\n", - " \n", - " gene_size = len(flattened_entry['genes'][0])\n", - " for i in range(gene_size):\n", - " flattened_genes = pd.json_normalize(flattened_entry['genes'][0][i], sep='_')\n", - " flattened_genes = flattened_genes.add_prefix(f'{i}_')\n", - " flattened_entry = pd.concat([flattened_entry, flattened_genes], axis=1)\n", - "\n", - " gremline_classification_trait_set_size = len(flattened_entry['germline_classification_trait_set'][0])\n", - " for i in range(gremline_classification_trait_set_size):\n", - " flattened_germline_classification_trait_set = pd.json_normalize(flattened_entry['germline_classification_trait_set'][0][i], sep='_')\n", - " flattened_germline_classification_trait_set = flattened_germline_classification_trait_set.add_prefix(f'{i}_')\n", - " \n", - " trait_xrefs_size = len(flattened_germline_classification_trait_set[f'{i}_trait_xrefs'][0])\n", - " for j in range(trait_xrefs_size):\n", - " flattened_trait_xrefs = pd.json_normalize(flattened_germline_classification_trait_set[f'{i}_trait_xrefs'][0][j], sep='_')\n", - " flattened_trait_xrefs = flattened_trait_xrefs.add_prefix(f'{j}_')\n", - "\n", - " flattened_germline_classification_trait_set = pd.concat([flattened_germline_classification_trait_set, flattened_trait_xrefs], axis=1)\n", - " \n", - " flattened_germline_classification_trait_set = flattened_germline_classification_trait_set.drop(columns=[f'{i}_trait_xrefs'], axis=1)\n", - " flattened_entry = pd.concat([flattened_entry, flattened_germline_classification_trait_set], axis=1)\n", - " \n", - " # dropping extracted nests\n", - " flattened_entry = flattened_entry.drop(columns=['variation_set', 'genes', 'germline_classification_trait_set'],axis=1)\n", - " flattened_variation_set = flattened_variation_set.drop(columns=['variation_xrefs', 'variation_loc', 'allele_freq_set'], axis=1)\n", - "\n", - " flattened_variation_set = pd.concat([flattened_variation_set, flattened_variation_xrefs], axis=1)\n", - " flattened_variation_set = pd.concat([flattened_variation_set, flattened_allele_freq_set], axis=1)\n", - "\n", - " flattened_entry = pd.concat([flattened_entry, flattened_variation_set], axis=1)\n", - " flattened_entry = pd.concat([flattened_entry, flattened_germline_classification_trait_set], axis=1)\n", - "\n", - " # Append the flattened entry to the list\n", - " flattened_data.append(flattened_entry)\n", - "\n", - " # Concatenate all flattened entries into a single DataFrame\n", - "df = pd.concat(flattened_data, ignore_index=True)\n", - "\n", - "display(df)\n" - ], - "id": "7e9ca83a40035c14", - "outputs": [ - { - "data": { - "text/plain": [ - " uid obj_type ... 3_db_source 3_db_id\n", - "0 1519785 single nucleotide variant ... OMIM 614702\n", - "\n", - "[1 rows x 110 columns]" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -3269,61 +1332,61 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
uidobj_typeaccessionaccession_versiontitlerecord_statusgene_sortchr_sortlocation_sortvariation_set_namevariation_set_idmolecular_consequence_listprotein_changefda_recognized_databasesupporting_submissions_scvsupporting_submissions_rcvgermline_classification_descriptiongermline_classification_last_evaluatedgermline_classification_review_statusgermline_classification_fda_recognized_databaseclinical_impact_classification_descriptionclinical_impact_classification_last_evaluatedclinical_impact_classification_review_statusclinical_impact_classification_fda_recognized_databaseclinical_impact_classification_trait_setoncogenicity_classification_descriptiononcogenicity_classification_last_evaluatedoncogenicity_classification_review_statusoncogenicity_classification_fda_recognized_databaseoncogenicity_classification_trait_set0_symbol0_geneid0_strand0_source0_trait_name0_db_source0_db_id1_db_source1_db_id2_db_source...1_stop1_inner_start1_inner_stop1_outer_start1_outer_stop1_display_start1_display_stop1_assembly_acc_ver1_annotation_release1_alt1_ref0_source0_value0_minor_allele1_source1_value1_minor_allele2_source2_value2_minor_allele3_source3_value3_minor_allele4_source4_value4_minor_alleledb_sourcedb_id4_source4_value4_minor_allele0_trait_name0_db_source0_db_id1_db_source1_db_id2_db_source2_db_id3_db_source3_db_id
01519785single nucleotide variantVCV001519785VCV001519785.NM_012123.4(MTO1):c.1465+4A>TMTO1EYS060000000000007348224800000000000064902416[intron variant][frameshift variant]D904fs[SCV002308196][RCV002024803]Uncertain significance2022/07/06 00:00criteria provided, single submitter[SCV000020713][RCV000000564]Pathogenic2008/11/01 00:00no assertion criteria provided1/01/01 00:00[]MTO125821+submittedMitochondrial hypertrophic cardiomyopathy with...Orphanet314637MedGenC4749921MONDO...741919717419197174191971GCF_000001405.25Exome Aggregation Consortium (ExAC)0.00002The Genome Aggregation Database (gnomAD)0.00003The Genome Aggregation Database (gnomAD), exomes0.00004Trans-Omics for Precision Medicine (TOPMed)0.000051000 Genomes Project0.00020TdbSNP5550946771000 Genomes Project0.00020TMitochondrial hypertrophic cardiomyopathy with...Orphanet314637MedGenC4749921MONDOMONDO:0013865OMIM61470215573NM_001142800.2(EYS):c.2710_2726del (p.Asp904fs)c.2710_2726del[]DeletionNC_000006.12:64902415:ACCATATCTTCACAGTCACCATA:...currentGRCh3866q12...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", - "

1 rows × 110 columns

\n", + "

4783 rows × 4428 columns

\n", "
" ] }, @@ -3331,15 +1394,7 @@ "output_type": "display_data" } ], - "execution_count": 35 - }, - { - "metadata": {}, - "cell_type": "code", - "source": "", - "id": "7df7d0cb3b874157", - "outputs": [], - "execution_count": null + "execution_count": 37 } ], "metadata": { From 35f43bf27f227ecf2fc4529d4d2ccc9f34abe409 Mon Sep 17 00:00:00 2001 From: Kajus CC <42713684+KajusC@users.noreply.github.com> Date: Mon, 19 Aug 2024 11:25:54 +0300 Subject: [PATCH 04/72] Reformatted the parse --- api/data/__init__.py | 1 + api/data/refactoring.py | 81 ++- tests/pipeline.ipynb | 1295 +++++---------------------------------- 3 files changed, 178 insertions(+), 1199 deletions(-) diff --git a/api/data/__init__.py b/api/data/__init__.py index 34caf42..5ccab15 100644 --- a/api/data/__init__.py +++ b/api/data/__init__.py @@ -58,4 +58,5 @@ save_lovd_as_vcf, request_clinvar_api_data, get_variant_ids_from_clinvar_name_api, + extract_nested_json, ) diff --git a/api/data/refactoring.py b/api/data/refactoring.py index 79cbe78..09eaca1 100644 --- a/api/data/refactoring.py +++ b/api/data/refactoring.py @@ -3,7 +3,6 @@ import os import logging -import pandas import requests import pandas as pd @@ -160,7 +159,7 @@ def save_lovd_as_vcf(data, save_to="./lovd.vcf"): f.write("\n") -def get_variant_ids_from_clinvar_name_api(name: str, count: int = 100): +def get_variant_ids_from_clinvar_name_api(name, count=100): """ Extracts variant ids from ClinVar `name` variable. /n key of dictionary is the size of the list of ids. @@ -172,9 +171,8 @@ def get_variant_ids_from_clinvar_name_api(name: str, count: int = 100): """ result = {} - - separator = "," - clinvar_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=clinvar&term={name}&retmode=json&retmax={count}" + base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=clinvar&term=" + clinvar_url = f"{base_url}{name}&retmode=json&retmax={count}" request = requests.get(clinvar_url) @@ -183,33 +181,46 @@ def get_variant_ids_from_clinvar_name_api(name: str, count: int = 100): data = request.json() - ids = data['esearchresult']['idlist'] - - result['idlist'] = ids + result['idlist'] = data['esearchresult']['idlist'] result['count'] = data['esearchresult']['count'] return result -def request_clinvar_api_data(gene_id: str): +def extract_nested_json(flat_parsed, parsed_from, required_column, prefix, join_prefix): + """ + Extracts nested JSON data from dictionary. + + :param DataFrame parsed_from: normalised JSON data + :param str required_column: column to extract + :param str prefix: prefix for extracted columns + """ + + data_set = parsed_from.get(required_column, []) + for idx, data in enumerate(data_set): + flat_data = pd.json_normalize(data, sep='_') + flat_data = flat_data.add_prefix(f'{prefix}_{idx}_') + flat_parsed = flat_parsed.join(flat_data, rsuffix=f'_{idx}_{join_prefix}') + + +def request_clinvar_api_data(gene_id): """ Requests ClinVar API for data about variant with given id. Converts it to pandas dataframe. :param str gene_id: id of variant (may be multiple) - :returns: dataframe from ClinVar API - :rtype: dataframe + :returns: DataFrame from ClinVar API + :rtype: DataFrame """ - clinvar_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=clinvar&id={gene_id}&retmode=json" + base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=clinvar&id=" + clinvar_url = f"{base_url}{gene_id}&retmode=json" request = requests.get(clinvar_url) if request.status_code != 200: raise ValueError(f"Request failed with status code {request.status_code}") - data = request.json() - - results = data['result'] + results = request.json()['result'] flattened_data = [] @@ -223,53 +234,41 @@ def request_clinvar_api_data(gene_id: str): flat_var_set = pd.json_normalize(var_set, sep='_') flat_var_set = flat_var_set.add_prefix(f'variation_set_{idx}_') - variation_loc = var_set.get('variation_loc', []) - for loc_idx, loc in enumerate(variation_loc): - flat_loc = pd.json_normalize(loc, sep='_') - flat_loc = flat_loc.add_prefix(f'variation_set_{idx}_loc_{loc_idx}_') - flat_var_set = flat_var_set.join(flat_loc, rsuffix=f'_{idx}_{loc_idx}_vl') - - var_xrefs = var_set.get('variation_xrefs', []) - for var_xrefs_idx, var_xref in enumerate(var_xrefs): - flat_var_xrefs = pd.json_normalize(var_xref, sep='_') - flat_var_xrefs = flat_var_xrefs.add_prefix(f'variation_set_{idx}_var_xrefs_{var_xrefs_idx}_') - flat_var_set = flat_var_set.join(flat_var_xrefs, rsuffix=f'_{idx}_{var_xrefs_idx}_vx') - - allele_freq = var_set.get('allele_freq_set', []) - for allele_freq_idx, allele in enumerate(allele_freq): - flat_allele = pd.json_normalize(allele, sep='_') - flat_allele = flat_allele.add_prefix(f'variation_set_{idx}_allele_freq_{allele_freq_idx}_') - flat_var_set = flat_var_set.join(flat_allele, rsuffix=f'_{idx}_{allele_freq_idx}_af') + extract_nested_json(flat_var_set, var_set, 'variation_loc', f'variation_set_{idx}_loc', 'loc') + extract_nested_json(flat_var_set, var_set, 'variation_xrefs', f'variation_set_{idx}_xrefs', 'xrefs') + extract_nested_json(flat_var_set, var_set, 'allele_freq_set', f'variation_set_{idx}_allele_freq', 'allele_freq') flat_var_set = flat_var_set.drop( - columns=[f'variation_set_{idx}_variation_loc', f'variation_set_{idx}_variation_xrefs', + columns=[f'variation_set_{idx}_variation_loc', + f'variation_set_{idx}_variation_xrefs', f'variation_set_{idx}_allele_freq_set']) flattened_entry = flattened_entry.join(flat_var_set, rsuffix=f'_{idx}_vs') + # this extraction is different from the previous ones + genes = flattened_entry.at[0, 'genes'] for idx, gene in enumerate(genes): flat_genes = pd.json_normalize(gene, sep='_') flat_genes = flat_genes.add_prefix(f'gene_{idx}_') flattened_entry = flattened_entry.join(flat_genes, rsuffix=f'_{idx}_g') - germline_classification_trait_set = flattened_entry.at[0, 'germline_classification_trait_set'] + germline_classification_trait_set = flattened_entry.at[0, + 'germline_classification_trait_set'] for idx, germline_set in enumerate(germline_classification_trait_set): flat_germline_set = pd.json_normalize(germline_set, sep='_') flat_germline_set = flat_germline_set.add_prefix(f'germline_set_{idx}_') - trait_xrefs = flat_germline_set.at[0, f'germline_set_{idx}_trait_xrefs'] - for jdx, trait_xref in enumerate(trait_xrefs): - flat_trait_xrefs = pd.json_normalize(trait_xref, sep='_') - flat_trait_xrefs = flat_trait_xrefs.add_prefix(f'trait_xref_{jdx}_') - flat_germline_set = flat_germline_set.join(flat_trait_xrefs, rsuffix=f'_{idx}_{jdx}_tx') + extract_nested_json(flat_germline_set, germline_set, 'trait_xrefs', f'germline_set_{idx}_trait_xrefs', 'trait_xrefs') flat_germline_set = flat_germline_set.drop(columns=[f'germline_set_{idx}_trait_xrefs']) flattened_entry = flattened_entry.join(flat_germline_set, rsuffix=f'_{idx}_gls') - flattened_entry = flattened_entry.drop(columns=['variation_set', 'genes', 'germline_classification_trait_set']) + flattened_entry = flattened_entry.drop(columns=['variation_set', + 'genes', + 'germline_classification_trait_set']) flattened_data.append(flattened_entry) df = pd.concat(flattened_data, ignore_index=True) - return df \ No newline at end of file + return df diff --git a/tests/pipeline.ipynb b/tests/pipeline.ipynb index 0fb9cfb..91f5df4 100644 --- a/tests/pipeline.ipynb +++ b/tests/pipeline.ipynb @@ -9,8 +9,8 @@ "outputs_hidden": true }, "ExecuteTime": { - "end_time": "2024-08-07T12:32:22.837138Z", - "start_time": "2024-08-07T12:32:21.979038Z" + "end_time": "2024-08-19T08:25:04.230992Z", + "start_time": "2024-08-19T08:25:03.038301Z" } }, "source": [ @@ -131,231 +131,30 @@ "outputs": [], "execution_count": null }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "import requests\n", - "from api.data import request_clinvar_api_data\n", - "\n", - "gene_id = '1519785,1519786'\n", - "\n", - "frames = request_clinvar_api_data(gene_id)\n", - "\n", - "display(frames)" - ], - "id": "b21c3487476b684f", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "gene_id = '1519785'\n", - "\n", - "\n", - "clinvar_url = f\"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=clinvar&id={gene_id}&retmode=json\"\n", - "\n", - "request = requests.get(clinvar_url)\n", - "\n", - "if request.status_code != 200:\n", - " raise ValueError(f\"Request failed with status code {request.status_code}\")\n", - "\n", - "data = request.json()\n", - "\n", - "# Extract the 'result' part of the JSON\n", - "results = data['result']\n", - "\n", - "# Extract the 'uids' part of the JSON\n", - "flattened_data = []\n", - "\n", - "for uid in results['uids']:\n", - " entry = results[uid]\n", - "\n", - " # Using pd.json_normalize to flatten the JSON data\n", - " flattened_entry = pd.json_normalize(entry, sep='_')\n", - "\n", - " # Process variation_set\n", - " variation_set = flattened_entry.at[0, 'variation_set']\n", - " for idx, var_set in enumerate(variation_set):\n", - " flat_var_set = pd.json_normalize(var_set, sep='_')\n", - " flat_var_set = flat_var_set.add_prefix(f'variation_set_{idx}_')\n", - "\n", - " # Process variation_loc within variation_set\n", - " variation_loc = var_set.get('variation_loc', [])\n", - " for loc_idx, loc in enumerate(variation_loc):\n", - " flat_loc = pd.json_normalize(loc, sep='_')\n", - " flat_loc = flat_loc.add_prefix(f'variation_set_{idx}_loc_{loc_idx}_')\n", - " flat_var_set = flat_var_set.join(flat_loc, rsuffix=f'_{idx}_{loc_idx}_vl')\n", - " \n", - " var_xrefs = var_set.get('variation_xrefs', [])\n", - " for var_xrefs_idx, var_xref in enumerate(var_xrefs):\n", - " flat_var_xrefs = pd.json_normalize(var_xref, sep='_')\n", - " flat_var_xrefs = flat_var_xrefs.add_prefix(f'variation_set_{idx}_var_xrefs_{var_xrefs_idx}_')\n", - " flat_var_set = flat_var_set.join(flat_var_xrefs, rsuffix=f'_{idx}_{var_xrefs_idx}_vx')\n", - "\n", - "\n", - " allele_freq = var_set.get('allele_freq_set', [])\n", - " for allele_freq_idx, allele in enumerate(allele_freq):\n", - " flat_allele = pd.json_normalize(allele, sep='_')\n", - " flat_allele = flat_allele.add_prefix(f'variation_set_{idx}_allele_freq_{allele_freq_idx}_')\n", - " flat_var_set = flat_var_set.join(flat_allele, rsuffix=f'_{idx}_{allele_freq_idx}_af')\n", - " \n", - " # drop original nested lists columns\n", - " flat_var_set = flat_var_set.drop(columns=[f'variation_set_{idx}_variation_loc', f'variation_set_{idx}_variation_xrefs', f'variation_set_{idx}_allele_freq_set'])\n", - " \n", - " flattened_entry = flattened_entry.join(flat_var_set, rsuffix=f'_{idx}_vs')\n", - "\n", - " # Process genes\n", - " genes = flattened_entry.at[0, 'genes']\n", - " for idx, gene in enumerate(genes):\n", - " flat_genes = pd.json_normalize(gene, sep='_')\n", - " flat_genes = flat_genes.add_prefix(f'gene_{idx}_')\n", - " flattened_entry = flattened_entry.join(flat_genes, rsuffix=f'_{idx}_g')\n", - " # Process germline_classification_trait_set\n", - " germline_classification_trait_set = flattened_entry.at[0, 'germline_classification_trait_set']\n", - " for idx, germline_set in enumerate(germline_classification_trait_set):\n", - " flat_germline_set = pd.json_normalize(germline_set, sep='_')\n", - " flat_germline_set = flat_germline_set.add_prefix(f'germline_set_{idx}_')\n", - "\n", - " trait_xrefs = flat_germline_set.at[0, f'germline_set_{idx}_trait_xrefs']\n", - " for jdx, trait_xref in enumerate(trait_xrefs):\n", - " flat_trait_xrefs = pd.json_normalize(trait_xref, sep='_')\n", - " flat_trait_xrefs = flat_trait_xrefs.add_prefix(f'trait_xref_{jdx}_')\n", - " flat_germline_set = flat_germline_set.join(flat_trait_xrefs, rsuffix=f'_{idx}_{jdx}_tx')\n", - "\n", - " flat_germline_set = flat_germline_set.drop(columns=[f'germline_set_{idx}_trait_xrefs'])\n", - " flattened_entry = flattened_entry.join(flat_germline_set, rsuffix=f'_{idx}_gls')\n", - "\n", - " # Dropping original nested lists columns\n", - " flattened_entry = flattened_entry.drop(columns=['variation_set', 'genes', 'germline_classification_trait_set'])\n", - "\n", - " # Append the flattened entry to the list\n", - " flattened_data.append(flattened_entry)\n", - "\n", - "# Concatenate all flattened entries into a single DataFrame\n", - "df = pd.concat(flattened_data, ignore_index=True)\n", - "\n", - "display(df)" - ], - "id": "3b9b8bdad8bdb55d", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": [ - "Explanation of whats happening in the code below:\n", - "\n", - "Function to get all the ids from a gene name:\n", - "```python\n", - "get_variant_ids_from_clinvar_name_api(name: str, count: int)\n", - "```\n", - "\n", - "function gets the ids from the clinvar api, the name is the gene name and the count is the maximum number of ids to get (api's limit is 500)\n", - "\n", - "function returns a dictionary with the count and the list of ids:\n", - "\n", - "```json\n", - "{\n", - " 'count': int,\n", - " 'idlist': List[str]\n", - "}\n", - "```\n", - "\n", - "if the count is greater than the api's limit, the function will split the list of ids into smaller lists of 500 and then request the data from the api in chunks of 500 ids:\n", - "\n", - "```python\n", - "id_lists = [id_list[i:i + max] for i in range(0, size, max)]\n", - "```\n", - "\n", - "then the function will request the data from the api and concatenate the dataframes into a single dataframe:\n", - "\n", - "```python\n", - "frames = request_clinvar_api_data(join)\n", - "variations = pd.concat([variations, frames], ignore_index=True)\n", - "```\n", - "\n", - "The variant extraction function contains a lot of nested lists and dictionaries, so the function will flatten the data and then concatenate the dataframes into a single dataframe\n", - "\n", - "**NOTE**\n", - "\n", - "> joining function may have been implemented wrong due to the waiting time of the api.\n" - ], - "id": "655a935b2874c218" - }, { "metadata": { "ExecuteTime": { - "end_time": "2024-08-07T12:57:28.089588Z", - "start_time": "2024-08-07T12:55:09.972813Z" + "end_time": "2024-08-19T08:25:08.217689Z", + "start_time": "2024-08-19T08:25:06.865107Z" } }, "cell_type": "code", "source": [ - "import pandas as pd\n", - "\n", - "variations = pd.DataFrame()\n", + "variation_ids = '148002'\n", "\n", - "max = 500\n", - "name = \"EYS\"\n", - "count = 2147483647\n", - "\n", - "id_array = get_variant_ids_from_clinvar_name_api(name, count)\n", - "size = int(id_array['count'])\n", - "id_list = id_array['idlist']\n", - "\n", - "id_lists = [id_list[i:i + max] for i in range(0, size, max)]\n", - "\n", - "track = 0\n", - "for lists in id_lists:\n", - " join = \",\".join(lists)\n", - " frame = request_clinvar_api_data(join)\n", - " \n", - " variations = pd.concat([variations, frame], ignore_index=True)\n", - " \n", - " print(f\"{track + 1}/{len(id_lists)}\")\n", - " track += 1\n", + "frames = request_clinvar_api_data(variation_ids)\n", "\n", - "display(variations)\n" + "display(frames)" ], - "id": "129175e3a2e568be", + "id": "b21c3487476b684f", "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1/10\n", - "2/10\n", - "3/10\n", - "4/10\n", - "5/10\n", - "6/10\n", - "7/10\n", - "8/10\n", - "9/10\n", - "10/10\n" - ] - }, { "data": { "text/plain": [ - " uid obj_type ... gene_1029_strand gene_1029_source\n", - "0 3251429 single nucleotide variant ... NaN NaN\n", - "1 3246148 Deletion ... NaN NaN\n", - "2 3246147 Deletion ... NaN NaN\n", - "3 3246146 Deletion ... NaN NaN\n", - "4 3246145 Deletion ... NaN NaN\n", - "... ... ... ... ... ...\n", - "4778 538 single nucleotide variant ... NaN NaN\n", - "4779 537 single nucleotide variant ... NaN NaN\n", - "4780 536 Deletion ... NaN NaN\n", - "4781 535 Deletion ... NaN NaN\n", - "4782 534 Deletion ... NaN NaN\n", + " uid ... germline_set_0_trait_name\n", + "0 148002 ... See cases\n", "\n", - "[4783 rows x 4428 columns]" + "[1 rows x 37 columns]" ], "text/html": [ "
\n", @@ -412,746 +211,30 @@ " variation_set_0_aliases\n", " variation_set_0_variant_type\n", " variation_set_0_canonical_spdi\n", - " variation_set_0_loc_0_status\n", - " variation_set_0_loc_0_assembly_name\n", - " variation_set_0_loc_0_chr\n", - " variation_set_0_loc_0_band\n", - " ...\n", - " gene_1020_symbol\n", - " gene_1020_geneid\n", - " gene_1020_strand\n", - " gene_1020_source\n", - " gene_1021_symbol\n", - " gene_1021_geneid\n", - " gene_1021_strand\n", - " gene_1021_source\n", - " gene_1022_symbol\n", - " gene_1022_geneid\n", - " gene_1022_strand\n", - " gene_1022_source\n", - " gene_1023_symbol\n", - " gene_1023_geneid\n", - " gene_1023_strand\n", - " gene_1023_source\n", - " gene_1024_symbol\n", - " gene_1024_geneid\n", - " gene_1024_strand\n", - " gene_1024_source\n", - " gene_1025_symbol\n", - " gene_1025_geneid\n", - " gene_1025_strand\n", - " gene_1025_source\n", - " gene_1026_symbol\n", - " gene_1026_geneid\n", - " gene_1026_strand\n", - " gene_1026_source\n", - " gene_1027_symbol\n", - " gene_1027_geneid\n", - " gene_1027_strand\n", - " gene_1027_source\n", - " gene_1028_symbol\n", - " gene_1028_geneid\n", - " gene_1028_strand\n", - " gene_1028_source\n", - " gene_1029_symbol\n", - " gene_1029_geneid\n", - " gene_1029_strand\n", - " gene_1029_source\n", + " germline_set_0_trait_name\n", " \n", " \n", " \n", " \n", " 0\n", - " 3251429\n", - " single nucleotide variant\n", - " VCV003251429\n", - " VCV003251429.\n", - " NM_001142800.2(EYS):c.5886T>C (p.Thr1962=)\n", + " 148002\n", + " copy number gain\n", + " VCV000148002\n", + " VCV000148002.\n", + " GRCh38/hg38 6p12.1-q12(chr6:53931543-68149750)x3\n", " \n", - " EYS\n", + " BAG2\n", " 06\n", - " 00000000000064436215\n", - " \n", - " \n", - " [synonymous variant]\n", - " \n", - " \n", - " [SCV005076913]\n", - " [RCV004587835]\n", - " Likely benign\n", - " 2024/04/08 00:00\n", - " criteria provided, single submitter\n", - " \n", - " \n", - " 1/01/01 00:00\n", + " 00000000000053931543\n", " \n", " \n", " []\n", " \n", - " 1/01/01 00:00\n", - " \n", " \n", - " []\n", - " 3410228\n", - " NM_001142800.2(EYS):c.5886T>C (p.Thr1962=)\n", - " c.5886T>C\n", - " []\n", - " single nucleotide variant\n", - " NC_000006.12:64436214:A:G\n", - " current\n", - " GRCh38\n", - " 6\n", - " 6q12\n", - " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 1\n", - " 3246148\n", - " Deletion\n", - " VCV003246148\n", - " VCV003246148.\n", - " NC_000006.11:g.(?_66204859)_(66217229_?)del\n", - " \n", - " EYS\n", - " 06\n", - " 99999999999999999999\n", - " \n", - " \n", - " []\n", - " \n", - " \n", - " [SCV005067530]\n", - " [RCV004578792]\n", + " [SCV000177291]\n", + " [RCV000137097]\n", " Pathogenic\n", - " 2023/01/02 00:00\n", - " criteria provided, single submitter\n", - " \n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " 3403857\n", - " NC_000006.11:g.(?_66204859)_(66217229_?)del\n", - " NC_000006.11:g.(?_66204859)_(66217229_?)del\n", - " []\n", - " Deletion\n", - " \n", - " previous\n", - " GRCh37\n", - " 6\n", - " 6q12\n", - " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 2\n", - " 3246147\n", - " Deletion\n", - " VCV003246147\n", - " VCV003246147.\n", - " NC_000006.11:g.(?_64511633)_(64516181_?)del\n", - " \n", - " EYS\n", - " 06\n", - " 99999999999999999999\n", - " \n", - " \n", - " []\n", - " \n", - " \n", - " [SCV005067529]\n", - " [RCV004578791]\n", - " Likely pathogenic\n", - " 2023/03/08 00:00\n", - " criteria provided, single submitter\n", - " \n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " 3403856\n", - " NC_000006.11:g.(?_64511633)_(64516181_?)del\n", - " NC_000006.11:g.(?_64511633)_(64516181_?)del\n", - " []\n", - " Deletion\n", - " \n", - " previous\n", - " GRCh37\n", - " 6\n", - " 6q12\n", - " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 3\n", - " 3246146\n", - " Deletion\n", - " VCV003246146\n", - " VCV003246146.\n", - " NC_000006.11:g.(?_65523280)_(65527746_?)del\n", - " \n", - " EYS\n", - " 06\n", - " 99999999999999999999\n", - " \n", - " \n", - " []\n", - " \n", - " \n", - " [SCV005067528]\n", - " [RCV004578790]\n", - " Likely pathogenic\n", - " 2023/04/30 00:00\n", - " criteria provided, single submitter\n", - " \n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " 3403855\n", - " NC_000006.11:g.(?_65523280)_(65527746_?)del\n", - " NC_000006.11:g.(?_65523280)_(65527746_?)del\n", - " []\n", - " Deletion\n", - " \n", - " previous\n", - " GRCh37\n", - " 6\n", - " 6q12\n", - " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 4\n", - " 3246145\n", - " Deletion\n", - " VCV003246145\n", - " VCV003246145.\n", - " NC_000006.11:g.(?_65587645)_(65596716_?)del\n", - " \n", - " EYS\n", - " 06\n", - " 99999999999999999999\n", - " \n", - " \n", - " []\n", - " \n", - " \n", - " [SCV005067527]\n", - " [RCV004578789]\n", - " Likely pathogenic\n", - " 2023/06/27 00:00\n", - " criteria provided, single submitter\n", - " \n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " 3403854\n", - " NC_000006.11:g.(?_65587645)_(65596716_?)del\n", - " NC_000006.11:g.(?_65587645)_(65596716_?)del\n", - " []\n", - " Deletion\n", - " \n", - " previous\n", - " GRCh37\n", - " 6\n", - " 6q12\n", - " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 4778\n", - " 538\n", - " single nucleotide variant\n", - " VCV000000538\n", - " VCV000000538.\n", - " NM_001142800.2(EYS):c.9405T>A (p.Tyr3135Ter)\n", - " \n", - " EYS\n", - " 06\n", - " 00000000000063720626\n", - " \n", - " \n", - " [3 prime UTR variant, nonsense]\n", - " Y3156*, Y3135*\n", - " \n", - " [SCV000020717, SCV000894389, SCV000709692, SCV...\n", - " [RCV000000568, RCV000593252, RCV003914789, RCV...\n", - " Pathogenic/Likely pathogenic\n", - " 2024/03/09 00:00\n", - " criteria provided, multiple submitters, no con...\n", - " \n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " 15577\n", - " NM_001142800.2(EYS):c.9405T>A (p.Tyr3135Ter)\n", - " c.9405T>A\n", - " []\n", - " single nucleotide variant\n", - " NC_000006.12:63720625:A:T\n", - " current\n", - " GRCh38\n", - " 6\n", - " 6q12\n", - " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 4779\n", - " 537\n", - " single nucleotide variant\n", - " VCV000000537\n", - " VCV000000537.\n", - " NM_001142800.2(EYS):c.5857G>T (p.Glu1953Ter)\n", - " \n", - " EYS\n", - " 06\n", - " 00000000000064436244\n", - " \n", - " \n", - " [nonsense]\n", - " E1953*\n", - " \n", - " [SCV000020716, SCV002519636, SCV004195857, SCV...\n", - " [RCV000000567, RCV001387157]\n", - " Pathogenic\n", - " 2024/02/15 00:00\n", - " criteria provided, multiple submitters, no con...\n", - " \n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " 15576\n", - " NM_001142800.2(EYS):c.5857G>T (p.Glu1953Ter)\n", - " c.5857G>T\n", - " []\n", - " single nucleotide variant\n", - " NC_000006.12:64436243:C:A\n", - " current\n", - " GRCh38\n", - " 6\n", - " 6q12\n", - " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 4780\n", - " 536\n", - " Deletion\n", - " VCV000000536\n", - " VCV000000536.\n", - " NM_001142800.1(EYS):c.1767-24596_2023+238135del\n", - " \n", - " LOC441155\n", - " 06\n", - " 00000000000065057728\n", - " \n", - " \n", - " []\n", - " \n", - " \n", - " [SCV000020715]\n", - " [RCV000000566]\n", - " Pathogenic\n", - " 2008/11/01 00:00\n", + " 2010/12/22 00:00\n", " no assertion criteria provided\n", " \n", " \n", @@ -1164,229 +247,16 @@ " \n", " \n", " []\n", - " 15575\n", - " NM_001142800.1(EYS):c.1767-24596_2023+238135del\n", - " NM_001142800.1(EYS):c.1767-24596_2023+238135del\n", - " [EX12DEL]\n", - " Deletion\n", - " \n", - " current\n", - " GRCh38\n", - " 6\n", - " 6q12\n", - " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 4781\n", - " 535\n", - " Deletion\n", - " VCV000000535\n", - " VCV000000535.\n", - " NM_001142800.1(EYS):c.2260-51191_2992+45990del\n", - " \n", - " EYS\n", - " 06\n", - " 00000000000064840707\n", - " \n", - " \n", - " []\n", - " \n", - " \n", - " [SCV000020714]\n", - " [RCV000000565]\n", - " Pathogenic\n", - " 2008/11/01 00:00\n", - " no assertion criteria provided\n", - " \n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", + " 157753\n", + " GRCh38/hg38 6p12.1-q12(chr6:53931543-68149750)x3\n", + " GRCh38/hg38 6p12.1-q12(chr6:53931543-68149750)x3\n", " []\n", + " copy number gain\n", " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " 15574\n", - " NM_001142800.1(EYS):c.2260-51191_2992+45990del\n", - " NM_001142800.1(EYS):c.2260-51191_2992+45990del\n", - " [EX15-19DEL]\n", - " Deletion\n", - " \n", - " current\n", - " GRCh38\n", - " 6\n", - " 6q12\n", - " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 4782\n", - " 534\n", - " Deletion\n", - " VCV000000534\n", - " VCV000000534.\n", - " NM_001142800.2(EYS):c.2710_2726del (p.Asp904fs)\n", - " \n", - " EYS\n", - " 06\n", - " 00000000000064902416\n", - " \n", - " \n", - " [frameshift variant]\n", - " D904fs\n", - " \n", - " [SCV000020713]\n", - " [RCV000000564]\n", - " Pathogenic\n", - " 2008/11/01 00:00\n", - " no assertion criteria provided\n", - " \n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " \n", - " 1/01/01 00:00\n", - " \n", - " \n", - " []\n", - " 15573\n", - " NM_001142800.2(EYS):c.2710_2726del (p.Asp904fs)\n", - " c.2710_2726del\n", - " []\n", - " Deletion\n", - " NC_000006.12:64902415:ACCATATCTTCACAGTCACCATA:...\n", - " current\n", - " GRCh38\n", - " 6\n", - " 6q12\n", - " ...\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " See cases\n", " \n", " \n", "\n", - "

4783 rows × 4428 columns

\n", "
" ] }, @@ -1394,7 +264,116 @@ "output_type": "display_data" } ], - "execution_count": 37 + "execution_count": 2 + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "clinvar_data = pd.read_csv(\"C:\\\\Users\\\\Kajus\\\\Desktop\\\\clinvar_results.txt\", sep='\\t')\n", + "\n", + "display(clinvar_data)" + ], + "id": "8cb4bbe3f35562d5", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "Explanation of whats happening in the code below:\n", + "\n", + "Function to get all the ids from a gene name:\n", + "```python\n", + "get_variant_ids_from_clinvar_name_api(name: str, count: int)\n", + "```\n", + "\n", + "function gets the ids from the clinvar api, the name is the gene name and the count is the maximum number of ids to get (api's limit is 500)\n", + "\n", + "function returns a dictionary with the count and the list of ids:\n", + "\n", + "```json\n", + "{\n", + " 'count': int,\n", + " 'idlist': List[str]\n", + "}\n", + "```\n", + "\n", + "if the count is greater than the api's limit, the function will split the list of ids into smaller lists of 500 and then request the data from the api in chunks of 500 ids:\n", + "\n", + "```python\n", + "id_lists = [id_list[i:i + max] for i in range(0, size, max)]\n", + "```\n", + "\n", + "then the function will request the data from the api and concatenate the dataframes into a single dataframe:\n", + "\n", + "```python\n", + "frames = request_clinvar_api_data(join)\n", + "variations = pd.concat([variations, frames], ignore_index=True)\n", + "```\n", + "\n", + "The variant extraction function contains a lot of nested lists and dictionaries, so the function will flatten the data and then concatenate the dataframes into a single dataframe\n", + "\n", + "**NOTE**\n", + "\n", + "> joining function may have been implemented wrong due to the waiting time of the api.\n" + ], + "id": "655a935b2874c218" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "\n", + "variations = pd.DataFrame()\n", + "\n", + "max = 500\n", + "name = \"EYS\"\n", + "count = 2147483647\n", + "\n", + "id_array = get_variant_ids_from_clinvar_name_api(name, count)\n", + "size = int(id_array['count'])\n", + "id_list = id_array['idlist']\n", + "\n", + "id_lists = [id_list[i:i + max] for i in range(0, size, max)]\n", + "\n", + "track = 0\n", + "for lists in id_lists:\n", + " join = \",\".join(lists)\n", + " frame = request_clinvar_api_data(join)\n", + " \n", + " variations = pd.concat([variations, frame], ignore_index=True)\n", + " \n", + " print(f\"{track + 1}/{len(id_lists)}\")\n", + " track += 1\n", + "\n", + "display(variations)\n" + ], + "id": "129175e3a2e568be", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "clinvar_data = pd.read_csv('C:\\\\Users\\\\Kajus\\\\Desktop\\\\clinvar_result.txt', sep='\\t')\n", + "\n", + "display(clinvar_data)" + ], + "id": "c85507a3e2c584da", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": "", + "id": "8e0e2f2853152d96", + "outputs": [], + "execution_count": null } ], "metadata": { From 5243f8e43cfabe4b532b74c29b455cc3ccd616c5 Mon Sep 17 00:00:00 2001 From: Kajus CC <42713684+KajusC@users.noreply.github.com> Date: Thu, 22 Aug 2024 17:16:16 +0300 Subject: [PATCH 05/72] Created a function which collects EYS data from gnomAD api. --- api/data/__init__.py | 1 + api/data/refactoring.py | 127 ++++++++++++++++ tests/pipeline.ipynb | 324 ++++++++++++++++++++++++---------------- 3 files changed, 322 insertions(+), 130 deletions(-) diff --git a/api/data/__init__.py b/api/data/__init__.py index 5ccab15..9598171 100644 --- a/api/data/__init__.py +++ b/api/data/__init__.py @@ -59,4 +59,5 @@ request_clinvar_api_data, get_variant_ids_from_clinvar_name_api, extract_nested_json, + request_gnomad_api_data, ) diff --git a/api/data/refactoring.py b/api/data/refactoring.py index 09eaca1..de2ff91 100644 --- a/api/data/refactoring.py +++ b/api/data/refactoring.py @@ -272,3 +272,130 @@ def request_clinvar_api_data(gene_id): df = pd.concat(flattened_data, ignore_index=True) return df + + +def request_gnomad_api_data(to_file=True): + """ + Requests gnomAD API for data about EYS gene containing: + - variant_id + - cDNA change + - protein change + - allele frequency + - homozygote count + - popmax + - popmax population + + :param bool to_file: if True, saves data to variants.csv + :returns: DataFrame from gnomAD API + :rtype: DataFrame + """ + + url = 'https://gnomad.broadinstitute.org/api' + query = """ + query{ + gene(gene_id: "ENSG00000188107", reference_genome: GRCh38) { + variants(dataset: gnomad_r4) + { + variant_id + chrom + pos + ref + hgvsc + hgvsp + alt + exome { + ac + an + ac_hom + populations + { + id + ac + an + } + } + genome + { + ac + an + ac_hom + populations + { + id + ac + an + } + } + } + } + } + """ + response = requests.post(url, json={'query': query}) + if response.status_code == 200: + data = response.json()['data']['gene']['variants'] + + df = pd.json_normalize(data) + + df['total_ac'] = df['exome.ac'].fillna(0) + df['genome.ac'].fillna(0) + df['total_an'] = df['exome.an'].fillna(0) + df['genome.an'].fillna(0) + + df['cDNA change'] = df['hgvsc'].fillna(0) + df['Protein change'] = df['hgvsp'].fillna(0) + + df['Allele Frequency'] = df['total_ac'] / df['total_an'] + df['Homozygote Count'] = df['exome.ac_hom'].fillna(0) + df['genome.ac_hom'].fillna(0) + exome_populations = df['exome.populations'] + genome_populations = df['genome.populations'] + ids = ['afr', 'eas', 'asj', 'sas', 'nfe', 'fin', 'mid', 'amr', 'ami', 'remaining'] + + def process_population_data(pop_data, name, pop_ids, index): + for pop_id in pop_ids: + df.loc[index, f'{name}_ac_{pop_id}'] = 0 + df.loc[index, f'{name}_an_{pop_id}'] = 0 + if type(pop_data) == list: + for pop in pop_data: + id = pop['id'] + df.loc[index, f'{name}_ac_{id}'] = pop['ac'] + df.loc[index, f'{name}_an_{id}'] = pop['an'] + + for i in range(len(exome_populations)): + exome_pop = exome_populations[i] + process_population_data(exome_pop, 'exome', ids, i) + genome_pop = genome_populations[i] + process_population_data(genome_pop, 'genome', ids, i) + + for id in ids: + df[f'Allele_Frequency_{id}'] = (df[f'exome_ac_{id}'].fillna(0) + df[f'genome_ac_{id}'].fillna(0)) / ( + df[f'exome_an_{id}'].fillna(0) + df[f'genome_an_{id}'].fillna(0)) + population_mapping = { + 'afr': 'African/African American', + 'eas': 'East Asian', + 'asj': 'Ashkenazi Jew', + 'sas': 'South Asian', + 'nfe': 'European (non-Finnish)', + 'fin': 'European (Finnish)', + 'mid': 'Middle Eastern', + 'amr': 'Admixed American', + 'ami': "Amish", + 'remaining': 'Remaining', + '': '' + } + for i in range(len(df)): + max = 0 + maxid = '' + for id in ids: + if df.loc[i, f'Allele_Frequency_{id}'] > max: + max = df.loc[i, f'Allele_Frequency_{id}'] + maxid = id + df.loc[i, 'Popmax'] = max + df.loc[i, 'Popmax population'] = population_mapping[maxid] + not_to_drop = ['Popmax', 'Popmax population', 'Homozygote Count', 'Allele Frequency', 'variant_id', + 'cDNA change', 'Protein change'] + df = df.drop([col for col in df.columns if col not in not_to_drop], axis=1) + if to_file: + df.to_csv('variants.csv', index=True) + + else: + print('Error:', response.status_code) + + return df diff --git a/tests/pipeline.ipynb b/tests/pipeline.ipynb index 91f5df4..6c6ef65 100644 --- a/tests/pipeline.ipynb +++ b/tests/pipeline.ipynb @@ -9,8 +9,8 @@ "outputs_hidden": true }, "ExecuteTime": { - "end_time": "2024-08-19T08:25:04.230992Z", - "start_time": "2024-08-19T08:25:03.038301Z" + "end_time": "2024-08-21T19:51:25.871973Z", + "start_time": "2024-08-21T19:51:25.105850Z" } }, "source": [ @@ -23,6 +23,7 @@ " set_lovd_dtypes,\n", " request_clinvar_api_data,\n", " get_variant_ids_from_clinvar_name_api,\n", + " request_gnomad_api_data,\n", " )\n", "from api.data import save_lovd_as_vcf\n", "\n", @@ -132,12 +133,7 @@ "execution_count": null }, { - "metadata": { - "ExecuteTime": { - "end_time": "2024-08-19T08:25:08.217689Z", - "start_time": "2024-08-19T08:25:06.865107Z" - } - }, + "metadata": {}, "cell_type": "code", "source": [ "variation_ids = '148002'\n", @@ -147,124 +143,8 @@ "display(frames)" ], "id": "b21c3487476b684f", - "outputs": [ - { - "data": { - "text/plain": [ - " uid ... germline_set_0_trait_name\n", - "0 148002 ... See cases\n", - "\n", - "[1 rows x 37 columns]" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
uidobj_typeaccessionaccession_versiontitlerecord_statusgene_sortchr_sortlocation_sortvariation_set_namevariation_set_idmolecular_consequence_listprotein_changefda_recognized_databasesupporting_submissions_scvsupporting_submissions_rcvgermline_classification_descriptiongermline_classification_last_evaluatedgermline_classification_review_statusgermline_classification_fda_recognized_databaseclinical_impact_classification_descriptionclinical_impact_classification_last_evaluatedclinical_impact_classification_review_statusclinical_impact_classification_fda_recognized_databaseclinical_impact_classification_trait_setoncogenicity_classification_descriptiononcogenicity_classification_last_evaluatedoncogenicity_classification_review_statusoncogenicity_classification_fda_recognized_databaseoncogenicity_classification_trait_setvariation_set_0_measure_idvariation_set_0_variation_namevariation_set_0_cdna_changevariation_set_0_aliasesvariation_set_0_variant_typevariation_set_0_canonical_spdigermline_set_0_trait_name
0148002copy number gainVCV000148002VCV000148002.GRCh38/hg38 6p12.1-q12(chr6:53931543-68149750)x3BAG20600000000000053931543[][SCV000177291][RCV000137097]Pathogenic2010/12/22 00:00no assertion criteria provided1/01/01 00:00[]1/01/01 00:00[]157753GRCh38/hg38 6p12.1-q12(chr6:53931543-68149750)x3GRCh38/hg38 6p12.1-q12(chr6:53931543-68149750)x3[]copy number gainSee cases
\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "execution_count": 2 + "outputs": [], + "execution_count": null }, { "metadata": {}, @@ -319,7 +199,7 @@ "\n", "> joining function may have been implemented wrong due to the waiting time of the api.\n" ], - "id": "655a935b2874c218" + "id": "976f9632a8ef29e3" }, { "metadata": {}, @@ -367,13 +247,197 @@ "outputs": [], "execution_count": null }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-08-21T19:54:33.516081Z", + "start_time": "2024-08-21T19:52:03.354634Z" + } + }, + "cell_type": "code", + "source": [ + "gnomad_from_api = request_gnomad_api_data(False)\n", + "\n", + "display(gnomad_from_api)" + ], + "id": "64482c033c794fb4", + "outputs": [ + { + "data": { + "text/plain": [ + " variant_id cDNA change ... Popmax Popmax population\n", + "0 6-63720525-A-G c.*71T>C ... 0.000016 African/African American\n", + "1 6-63720525-A-T c.*71T>A ... 0.000192 East Asian\n", + "2 6-63720525-A-C c.*71T>G ... 0.000000 \n", + "3 6-63720526-T-A c.*70A>T ... 0.000020 South Asian\n", + "4 6-63720527-G-T c.*69C>A ... 0.000000 \n", + "... ... ... ... ... ...\n", + "14295 6-65495479-G-T c.-69C>A ... 0.000000 \n", + "14296 6-65495479-G-A c.-69C>T ... 0.000031 African/African American\n", + "14297 6-65495482-A-G c.-72T>C ... 0.000070 Admixed American\n", + "14298 6-65495484-T-G c.-74A>C ... 0.000060 South Asian\n", + "14299 6-65495485-T-C c.-75A>G ... 0.000012 South Asian\n", + "\n", + "[14300 rows x 7 columns]" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
variant_idcDNA changeProtein changeAllele FrequencyHomozygote CountPopmaxPopmax population
06-63720525-A-Gc.*71T>C01.807419e-060.00.000016African/African American
16-63720525-A-Tc.*71T>A06.573844e-060.00.000192East Asian
26-63720525-A-Cc.*71T>G00.000000e+000.00.000000
36-63720526-T-Ac.*70A>T01.045299e-060.00.000020South Asian
46-63720527-G-Tc.*69C>A00.000000e+000.00.000000
........................
142956-65495479-G-Tc.-69C>A00.000000e+000.00.000000
142966-65495479-G-Ac.-69C>T01.446349e-060.00.000031African/African American
142976-65495482-A-Gc.-72T>C02.629510e-060.00.000070Admixed American
142986-65495484-T-Gc.-74A>C03.645085e-060.00.000060South Asian
142996-65495485-T-Cc.-75A>G07.310070e-070.00.000012South Asian
\n", + "

14300 rows × 7 columns

\n", + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "execution_count": 2 + }, { "metadata": {}, "cell_type": "code", - "source": "", - "id": "8e0e2f2853152d96", "outputs": [], - "execution_count": null + "execution_count": null, + "source": "", + "id": "6f0abfb50bd211a0" } ], "metadata": { From 5c30ce16e8e859bb53c98e18eff4db0e314c250a Mon Sep 17 00:00:00 2001 From: Kajus CC <42713684+KajusC@users.noreply.github.com> Date: Thu, 22 Aug 2024 20:24:16 +0300 Subject: [PATCH 06/72] Added ability to get any gene from API --- api/data/refactoring.py | 36 +++++++++++++++++++----------------- tests/pipeline.ipynb | 10 +++++----- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/api/data/refactoring.py b/api/data/refactoring.py index de2ff91..8057842 100644 --- a/api/data/refactoring.py +++ b/api/data/refactoring.py @@ -274,9 +274,9 @@ def request_clinvar_api_data(gene_id): return df -def request_gnomad_api_data(to_file=True): +def request_gnomad_api_data(gene_name, to_file=True): """ - Requests gnomAD API for data about EYS gene containing: + Requests gnomAD API for data about a specific gene containing: - variant_id - cDNA change - protein change @@ -285,17 +285,18 @@ def request_gnomad_api_data(to_file=True): - popmax - popmax population + :param str gene_name: name of gene :param bool to_file: if True, saves data to variants.csv :returns: DataFrame from gnomAD API :rtype: DataFrame """ url = 'https://gnomad.broadinstitute.org/api' - query = """ - query{ - gene(gene_id: "ENSG00000188107", reference_genome: GRCh38) { + query = f""" + query{{ + gene(gene_symbol: "{gene_name}", reference_genome: GRCh38) {{ variants(dataset: gnomad_r4) - { + {{ variant_id chrom pos @@ -303,33 +304,34 @@ def request_gnomad_api_data(to_file=True): hgvsc hgvsp alt - exome { + exome {{ ac an ac_hom populations - { + {{ id ac an - } - } + }} + }} genome - { + {{ ac an ac_hom populations - { + {{ id ac an - } - } - } - } - } + }} + }} + }} + }} + }} """ + response = requests.post(url, json={'query': query}) if response.status_code == 200: data = response.json()['data']['gene']['variants'] diff --git a/tests/pipeline.ipynb b/tests/pipeline.ipynb index 6c6ef65..a838cd4 100644 --- a/tests/pipeline.ipynb +++ b/tests/pipeline.ipynb @@ -9,8 +9,8 @@ "outputs_hidden": true }, "ExecuteTime": { - "end_time": "2024-08-21T19:51:25.871973Z", - "start_time": "2024-08-21T19:51:25.105850Z" + "end_time": "2024-08-22T17:20:23.240355Z", + "start_time": "2024-08-22T17:20:21.651097Z" } }, "source": [ @@ -250,13 +250,13 @@ { "metadata": { "ExecuteTime": { - "end_time": "2024-08-21T19:54:33.516081Z", - "start_time": "2024-08-21T19:52:03.354634Z" + "end_time": "2024-08-22T17:23:41.828469Z", + "start_time": "2024-08-22T17:21:09.627424Z" } }, "cell_type": "code", "source": [ - "gnomad_from_api = request_gnomad_api_data(False)\n", + "gnomad_from_api = request_gnomad_api_data(\"EYS\", False)\n", "\n", "display(gnomad_from_api)" ], From 197864c56ecd4d396ddac539c55680bcce5c17c8 Mon Sep 17 00:00:00 2001 From: Kajus CC <42713684+KajusC@users.noreply.github.com> Date: Thu, 22 Aug 2024 20:28:25 +0300 Subject: [PATCH 07/72] Removed from last branch --- api/__init__.py | 2 - api/data/__init__.py | 3 -- api/data/refactoring.py | 115 ---------------------------------------- tests/pipeline.ipynb | 115 ---------------------------------------- 4 files changed, 235 deletions(-) diff --git a/api/__init__.py b/api/__init__.py index fb618dd..459952b 100644 --- a/api/__init__.py +++ b/api/__init__.py @@ -56,6 +56,4 @@ parse_lovd, from_clinvar_name_to_cdna_position, save_lovd_as_vcf, - request_clinvar_api_data, - get_variant_ids_from_clinvar_name_api, ) diff --git a/api/data/__init__.py b/api/data/__init__.py index 9598171..7cd3997 100644 --- a/api/data/__init__.py +++ b/api/data/__init__.py @@ -56,8 +56,5 @@ parse_lovd, from_clinvar_name_to_cdna_position, save_lovd_as_vcf, - request_clinvar_api_data, - get_variant_ids_from_clinvar_name_api, - extract_nested_json, request_gnomad_api_data, ) diff --git a/api/data/refactoring.py b/api/data/refactoring.py index 8057842..1ac916b 100644 --- a/api/data/refactoring.py +++ b/api/data/refactoring.py @@ -159,121 +159,6 @@ def save_lovd_as_vcf(data, save_to="./lovd.vcf"): f.write("\n") -def get_variant_ids_from_clinvar_name_api(name, count=100): - """ - Extracts variant ids from ClinVar `name` variable. /n - key of dictionary is the size of the list of ids. - - :param str name: name of variant - :param int count: number of ids to extract - :returns: ids of variants - :rtype: str - """ - - result = {} - base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=clinvar&term=" - clinvar_url = f"{base_url}{name}&retmode=json&retmax={count}" - - request = requests.get(clinvar_url) - - if request.status_code != 200: - raise ValueError(f"Request failed with status code {request.status_code}") - - data = request.json() - - result['idlist'] = data['esearchresult']['idlist'] - result['count'] = data['esearchresult']['count'] - - return result - - -def extract_nested_json(flat_parsed, parsed_from, required_column, prefix, join_prefix): - """ - Extracts nested JSON data from dictionary. - - :param DataFrame parsed_from: normalised JSON data - :param str required_column: column to extract - :param str prefix: prefix for extracted columns - """ - - data_set = parsed_from.get(required_column, []) - for idx, data in enumerate(data_set): - flat_data = pd.json_normalize(data, sep='_') - flat_data = flat_data.add_prefix(f'{prefix}_{idx}_') - flat_parsed = flat_parsed.join(flat_data, rsuffix=f'_{idx}_{join_prefix}') - - -def request_clinvar_api_data(gene_id): - """ - Requests ClinVar API for data about variant with given id. - Converts it to pandas dataframe. - - :param str gene_id: id of variant (may be multiple) - :returns: DataFrame from ClinVar API - :rtype: DataFrame - """ - base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=clinvar&id=" - clinvar_url = f"{base_url}{gene_id}&retmode=json" - - request = requests.get(clinvar_url) - - if request.status_code != 200: - raise ValueError(f"Request failed with status code {request.status_code}") - - results = request.json()['result'] - - flattened_data = [] - - for uid in results['uids']: - entry = results[uid] - - flattened_entry = pd.json_normalize(entry, sep='_') - - variation_set = flattened_entry.at[0, 'variation_set'] - for idx, var_set in enumerate(variation_set): - flat_var_set = pd.json_normalize(var_set, sep='_') - flat_var_set = flat_var_set.add_prefix(f'variation_set_{idx}_') - - extract_nested_json(flat_var_set, var_set, 'variation_loc', f'variation_set_{idx}_loc', 'loc') - extract_nested_json(flat_var_set, var_set, 'variation_xrefs', f'variation_set_{idx}_xrefs', 'xrefs') - extract_nested_json(flat_var_set, var_set, 'allele_freq_set', f'variation_set_{idx}_allele_freq', 'allele_freq') - - flat_var_set = flat_var_set.drop( - columns=[f'variation_set_{idx}_variation_loc', - f'variation_set_{idx}_variation_xrefs', - f'variation_set_{idx}_allele_freq_set']) - flattened_entry = flattened_entry.join(flat_var_set, rsuffix=f'_{idx}_vs') - - # this extraction is different from the previous ones - - genes = flattened_entry.at[0, 'genes'] - for idx, gene in enumerate(genes): - flat_genes = pd.json_normalize(gene, sep='_') - flat_genes = flat_genes.add_prefix(f'gene_{idx}_') - flattened_entry = flattened_entry.join(flat_genes, rsuffix=f'_{idx}_g') - - germline_classification_trait_set = flattened_entry.at[0, - 'germline_classification_trait_set'] - for idx, germline_set in enumerate(germline_classification_trait_set): - flat_germline_set = pd.json_normalize(germline_set, sep='_') - flat_germline_set = flat_germline_set.add_prefix(f'germline_set_{idx}_') - - extract_nested_json(flat_germline_set, germline_set, 'trait_xrefs', f'germline_set_{idx}_trait_xrefs', 'trait_xrefs') - - flat_germline_set = flat_germline_set.drop(columns=[f'germline_set_{idx}_trait_xrefs']) - flattened_entry = flattened_entry.join(flat_germline_set, rsuffix=f'_{idx}_gls') - - flattened_entry = flattened_entry.drop(columns=['variation_set', - 'genes', - 'germline_classification_trait_set']) - - flattened_data.append(flattened_entry) - - df = pd.concat(flattened_data, ignore_index=True) - - return df - - def request_gnomad_api_data(gene_name, to_file=True): """ Requests gnomAD API for data about a specific gene containing: diff --git a/tests/pipeline.ipynb b/tests/pipeline.ipynb index a838cd4..044e76d 100644 --- a/tests/pipeline.ipynb +++ b/tests/pipeline.ipynb @@ -132,121 +132,6 @@ "outputs": [], "execution_count": null }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "variation_ids = '148002'\n", - "\n", - "frames = request_clinvar_api_data(variation_ids)\n", - "\n", - "display(frames)" - ], - "id": "b21c3487476b684f", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "clinvar_data = pd.read_csv(\"C:\\\\Users\\\\Kajus\\\\Desktop\\\\clinvar_results.txt\", sep='\\t')\n", - "\n", - "display(clinvar_data)" - ], - "id": "8cb4bbe3f35562d5", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "markdown", - "source": [ - "Explanation of whats happening in the code below:\n", - "\n", - "Function to get all the ids from a gene name:\n", - "```python\n", - "get_variant_ids_from_clinvar_name_api(name: str, count: int)\n", - "```\n", - "\n", - "function gets the ids from the clinvar api, the name is the gene name and the count is the maximum number of ids to get (api's limit is 500)\n", - "\n", - "function returns a dictionary with the count and the list of ids:\n", - "\n", - "```json\n", - "{\n", - " 'count': int,\n", - " 'idlist': List[str]\n", - "}\n", - "```\n", - "\n", - "if the count is greater than the api's limit, the function will split the list of ids into smaller lists of 500 and then request the data from the api in chunks of 500 ids:\n", - "\n", - "```python\n", - "id_lists = [id_list[i:i + max] for i in range(0, size, max)]\n", - "```\n", - "\n", - "then the function will request the data from the api and concatenate the dataframes into a single dataframe:\n", - "\n", - "```python\n", - "frames = request_clinvar_api_data(join)\n", - "variations = pd.concat([variations, frames], ignore_index=True)\n", - "```\n", - "\n", - "The variant extraction function contains a lot of nested lists and dictionaries, so the function will flatten the data and then concatenate the dataframes into a single dataframe\n", - "\n", - "**NOTE**\n", - "\n", - "> joining function may have been implemented wrong due to the waiting time of the api.\n" - ], - "id": "976f9632a8ef29e3" - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "import pandas as pd\n", - "\n", - "variations = pd.DataFrame()\n", - "\n", - "max = 500\n", - "name = \"EYS\"\n", - "count = 2147483647\n", - "\n", - "id_array = get_variant_ids_from_clinvar_name_api(name, count)\n", - "size = int(id_array['count'])\n", - "id_list = id_array['idlist']\n", - "\n", - "id_lists = [id_list[i:i + max] for i in range(0, size, max)]\n", - "\n", - "track = 0\n", - "for lists in id_lists:\n", - " join = \",\".join(lists)\n", - " frame = request_clinvar_api_data(join)\n", - " \n", - " variations = pd.concat([variations, frame], ignore_index=True)\n", - " \n", - " print(f\"{track + 1}/{len(id_lists)}\")\n", - " track += 1\n", - "\n", - "display(variations)\n" - ], - "id": "129175e3a2e568be", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "cell_type": "code", - "source": [ - "clinvar_data = pd.read_csv('C:\\\\Users\\\\Kajus\\\\Desktop\\\\clinvar_result.txt', sep='\\t')\n", - "\n", - "display(clinvar_data)" - ], - "id": "c85507a3e2c584da", - "outputs": [], - "execution_count": null - }, { "metadata": { "ExecuteTime": { From 4d3575a2e77834118bf4d4a51c2ccf6ae95f83c5 Mon Sep 17 00:00:00 2001 From: Kajus CC <42713684+KajusC@users.noreply.github.com> Date: Wed, 28 Aug 2024 21:33:19 +0300 Subject: [PATCH 08/72] Downloaded data from API, formatted code --- api/data/__init__.py | 7 +- api/data/refactoring.py | 110 ++++++------ tests/pipeline.ipynb | 359 ++++++++++++++++------------------------ 3 files changed, 203 insertions(+), 273 deletions(-) diff --git a/api/data/__init__.py b/api/data/__init__.py index 7cd3997..bd40c79 100644 --- a/api/data/__init__.py +++ b/api/data/__init__.py @@ -28,7 +28,9 @@ LOVD_TABLES_DATA_TYPES, # Paths for database downloads - DATABASES_DOWNLOAD_PATHS + DATABASES_DOWNLOAD_PATHS, + + GNOMAD_PATH, ) # DATA COLLECTION IMPORT @@ -57,4 +59,7 @@ from_clinvar_name_to_cdna_position, save_lovd_as_vcf, request_gnomad_api_data, + merge_gnomad_lovd, + parse_gnomad, + set_gnomad_dtypes, ) diff --git a/api/data/refactoring.py b/api/data/refactoring.py index 8aa880d..f2fd6cd 100644 --- a/api/data/refactoring.py +++ b/api/data/refactoring.py @@ -247,7 +247,18 @@ def save_lovd_as_vcf(data, save_to="./lovd.vcf"): f.write("\n") -def request_gnomad_api_data(gene_name, to_file=True): +def process_population_data(df, pop_data, name, pop_ids, index): + for pop_id in pop_ids: + df.loc[index, f'{name}_ac_{pop_id}'] = 0 + df.loc[index, f'{name}_an_{pop_id}'] = 0 + if isinstance(pop_data, list): + for pop in pop_data: + variant_id = pop['id'] + df.loc[index, f'{name}_ac_{variant_id}'] = pop['ac'] + df.loc[index, f'{name}_an_{variant_id}'] = pop['an'] + + +def request_gnomad_api_data(gene_name): """ Requests gnomAD API for data about a specific gene containing: - variant_id @@ -305,44 +316,38 @@ def request_gnomad_api_data(gene_name, to_file=True): }} """ - response = requests.post(url, json={'query': query}) - if response.status_code == 200: - data = response.json()['data']['gene']['variants'] - - df = pd.json_normalize(data) - - df['total_ac'] = df['exome.ac'].fillna(0) + df['genome.ac'].fillna(0) - df['total_an'] = df['exome.an'].fillna(0) + df['genome.an'].fillna(0) - - df['cDNA change'] = df['hgvsc'].fillna(0) - df['Protein change'] = df['hgvsp'].fillna(0) - - df['Allele Frequency'] = df['total_ac'] / df['total_an'] - df['Homozygote Count'] = df['exome.ac_hom'].fillna(0) + df['genome.ac_hom'].fillna(0) - exome_populations = df['exome.populations'] - genome_populations = df['genome.populations'] - ids = ['afr', 'eas', 'asj', 'sas', 'nfe', 'fin', 'mid', 'amr', 'ami', 'remaining'] - - def process_population_data(pop_data, name, pop_ids, index): - for pop_id in pop_ids: - df.loc[index, f'{name}_ac_{pop_id}'] = 0 - df.loc[index, f'{name}_an_{pop_id}'] = 0 - if type(pop_data) == list: - for pop in pop_data: - id = pop['id'] - df.loc[index, f'{name}_ac_{id}'] = pop['ac'] - df.loc[index, f'{name}_an_{id}'] = pop['an'] - - for i in range(len(exome_populations)): - exome_pop = exome_populations[i] - process_population_data(exome_pop, 'exome', ids, i) - genome_pop = genome_populations[i] - process_population_data(genome_pop, 'genome', ids, i) - - for id in ids: - df[f'Allele_Frequency_{id}'] = (df[f'exome_ac_{id}'].fillna(0) + df[f'genome_ac_{id}'].fillna(0)) / ( - df[f'exome_an_{id}'].fillna(0) + df[f'genome_an_{id}'].fillna(0)) - population_mapping = { + response = requests.post(url, json={'query': query}, timeout=300)# timeout set to 5 minutes + + if response.status_code != 200: + print('Error:', response.status_code) + return None + + data = response.json()['data']['gene']['variants'] + + df = pd.json_normalize(data) + + df['total_ac'] = df['exome.ac'].fillna(0) + df['genome.ac'].fillna(0) + df['total_an'] = df['exome.an'].fillna(0) + df['genome.an'].fillna(0) + + df['HGVS Consequence'] = df['hgvsc'].fillna(0) # cDNA change + df['Protein Consequence'] = df['hgvsp'].fillna(0) # Protein change + + df['Allele Frequency'] = df['total_ac'] / df['total_an'] + df['Homozygote Count'] = df['exome.ac_hom'].fillna(0) + df['genome.ac_hom'].fillna(0) + exome_populations = df['exome.populations'] + genome_populations = df['genome.populations'] + ids = ['afr', 'eas', 'asj', 'sas', 'nfe', 'fin', 'mid', 'amr', 'ami', 'remaining'] + + for i in range(len(exome_populations)): + exome_pop = exome_populations[i] + process_population_data(df, exome_pop, 'exome', ids, i) + genome_pop = genome_populations[i] + process_population_data(df, genome_pop, 'genome', ids, i) + + for variant_id in ids: + df[f'Allele_Frequency_{variant_id}'] = (df[f'exome_ac_{variant_id}'].fillna(0) + df[f'genome_ac_{variant_id}'].fillna(0)) / ( + df[f'exome_an_{variant_id}'].fillna(0) + df[f'genome_an_{variant_id}'].fillna(0)) + population_mapping = { 'afr': 'African/African American', 'eas': 'East Asian', 'asj': 'Ashkenazi Jew', @@ -355,22 +360,19 @@ def process_population_data(pop_data, name, pop_ids, index): 'remaining': 'Remaining', '': '' } - for i in range(len(df)): - max = 0 - maxid = '' - for id in ids: - if df.loc[i, f'Allele_Frequency_{id}'] > max: - max = df.loc[i, f'Allele_Frequency_{id}'] - maxid = id - df.loc[i, 'Popmax'] = max - df.loc[i, 'Popmax population'] = population_mapping[maxid] - not_to_drop = ['Popmax', 'Popmax population', 'Homozygote Count', 'Allele Frequency', 'variant_id', + for i in range(len(df)): + max_pop = 0 + maxid = '' + for variant_id in ids: + if df.loc[i, f'Allele_Frequency_{variant_id}'] > max_pop: + max_pop = df.loc[i, f'Allele_Frequency_{variant_id}'] + maxid = variant_id + df.loc[i, 'Popmax'] = max_pop + df.loc[i, 'Popmax population'] = population_mapping[maxid] + not_to_drop = ['Popmax', 'Popmax population', 'Homozygote Count', 'Allele Frequency', 'variant_id', 'cDNA change', 'Protein change'] - df = df.drop([col for col in df.columns if col not in not_to_drop], axis=1) - if to_file: - df.to_csv('variants.csv', index=True) + df = df.drop([col for col in df.columns if col not in not_to_drop], axis=1) - else: - print('Error:', response.status_code) + df.rename(columns={'variant_id': 'gnomAD ID'}, inplace=True) return df diff --git a/tests/pipeline.ipynb b/tests/pipeline.ipynb index fb86c24..6734e80 100644 --- a/tests/pipeline.ipynb +++ b/tests/pipeline.ipynb @@ -7,10 +7,6 @@ "collapsed": true, "jupyter": { "outputs_hidden": true - }, - "ExecuteTime": { - "end_time": "2024-08-22T17:20:23.240355Z", - "start_time": "2024-08-22T17:20:21.651097Z" } }, "source": [ @@ -19,30 +15,28 @@ "\n", "from api.data import (store_database_for_eys_gene,\n", " parse_lovd,\n", + " parse_gnomad,\n", " LOVD_PATH,\n", " set_lovd_dtypes,\n", - " request_clinvar_api_data,\n", - " get_variant_ids_from_clinvar_name_api,\n", + " set_gnomad_dtypes,\n", " request_gnomad_api_data,\n", + " merge_gnomad_lovd,\n", + " GNOMAD_PATH,\n", " )\n", "from api.data import save_lovd_as_vcf\n", "\n", + "\n", "pd.options.display.max_columns = 0" ], "outputs": [], - "execution_count": 1 + "execution_count": null }, { "cell_type": "code", "id": "f49f7691a27aa7b4", "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-08-11T16:16:57.305309Z", - "start_time": "2024-08-11T16:16:56.668571Z" - } + "collapsed": false }, - "source": [ "store_database_for_eys_gene(\"lovd\", override=False)" ], @@ -53,12 +47,10 @@ "cell_type": "code", "id": "cf5c45c0f7b9de0f", "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false } - }, "source": [ "data = parse_lovd(LOVD_PATH + \"/lovd_data.txt\")" @@ -67,62 +59,177 @@ "execution_count": null }, { + "metadata": {}, "cell_type": "code", - "id": "8a089e29bfc8c119", + "source": [ + "gnomad_data = request_gnomad_api_data(\"EYS\")\n", + "\n", + "display(gnomad_data)" + ], + "id": "64482c033c794fb4", + "outputs": [], + "execution_count": null + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-08-28T18:11:25.802540Z", + "start_time": "2024-08-28T18:11:25.715039Z" + } + }, + "cell_type": "code", + "source": [ + "store_database_for_eys_gene('gnomad', False)\n", + "\n", + "gnomad_data_2 = parse_gnomad(GNOMAD_PATH +'/gnomad_data.csv')" + ], + "id": "60f3f3074a9b19f4", + "outputs": [], + "execution_count": 24 + }, + { "metadata": {}, + "cell_type": "code", + "source": "display(gnomad_data_2)", + "id": "9d3e4d6b5f7be127", + "outputs": [], + "execution_count": null + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-08-28T18:11:35.536411Z", + "start_time": "2024-08-28T18:11:35.258009Z" + } + }, + "cell_type": "code", "source": [ - "for i in data:\n", - " print(i)\n", - " display(data[i])" + "gnomad_data_2.to_csv('C:\\\\Users\\\\Kajus\\\\Desktop\\\\gnomad_data_downloaded.csv', index=False)\n", + "gnomad_data.to_csv('C:\\\\Users\\\\Kajus\\\\Desktop\\\\gnomad_data_api.csv', index=False)" + ], + "id": "2e869f5c77dbe3d3", + "outputs": [], + "execution_count": 26 + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "len(gnomad_data_2), len(gnomad_data)\n", + "\n", + "print(len(gnomad_data_2) - len(gnomad_data))" ], + "id": "9efafb201061c146", "outputs": [], "execution_count": null }, { + "metadata": {}, "cell_type": "code", - "id": "ef07740b2fa63e42", + "source": [ + "gnomad_data_2.rename(columns={'gnomAD ID': 'variant_id'}, inplace=True)\n", + "\n", + "missing_from_api = []\n", + "\n", + "for i in gnomad_data['variant_id']:\n", + " if(i in gnomad_data_2['variant_id'].values):\n", + " continue\n", + " missing_from_api.append(i)\n", + "\n", + "len(missing_from_api)\n", + "\n", + "missing_data = gnomad_data.loc[gnomad_data['variant_id'].isin(missing_from_api)]\n", + "\n", + "missing_data" + ], + "id": "d0eb0a6db96d31c8", + "outputs": [], + "execution_count": null + }, + { "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false + "ExecuteTime": { + "end_time": "2024-08-28T18:06:31.488622Z", + "start_time": "2024-08-28T18:06:31.471299Z" } }, + "cell_type": "code", + "source": "missing_data.to_csv('C:\\\\Users\\\\Kajus\\\\Desktop\\\\gnomad_data_missing.csv', index=False)", + "id": "388120b03b094511", + "outputs": [], + "execution_count": 23 + }, + { + "metadata": {}, + "cell_type": "code", "source": [ "set_lovd_dtypes(data)\n", + "set_gnomad_dtypes(gnomad_data)\n", + "\n", + "variants_on_genome = data[\"Variants_On_Genome\"].copy()\n", + "\n", + "lovd_data = pd.merge(data[\"Variants_On_Transcripts\"],\n", + " variants_on_genome[['id','VariantOnGenome/DNA/hg38']],\n", + " on='id',\n", + " how='left')\n", + "\n", + "gnomad_data = gnomad_data.copy()\n", + "final_data = merge_gnomad_lovd(lovd_data, gnomad_data)\n", + "final_data" + ], + "id": "96453d88e353aeb1", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ "for i in data:\n", " print(i)\n", - " display(data[i].info())" + " display(data[i])" ], + "id": "8a089e29bfc8c119", "outputs": [], "execution_count": null }, { - "cell_type": "code", - "id": "c968af1617be40db", "metadata": {}, + "cell_type": "code", "source": [ - "save_lovd_as_vcf(data[\"Variants_On_Genome\"], \"./lovd.vcf\")" + "set_lovd_dtypes(data)\n", + "for i in data:\n", + " print(i)\n", + " display(data[i].info())" ], + "id": "ef07740b2fa63e42", "outputs": [], "execution_count": null }, { + "metadata": {}, "cell_type": "code", - "id": "c7ff16903e0c52bd", + "source": "save_lovd_as_vcf(data[\"Variants_On_Genome\"], \"./lovd.vcf\")", + "id": "c968af1617be40db", + "outputs": [], + "execution_count": null + }, + { "metadata": {}, + "cell_type": "code", "source": [ "from subprocess import Popen\n", "\n", "process = Popen(\"spliceai -I ./lovd.vcf -O ./lovd_output.vcf -R ../tools/spliceai/hg38.fa -A grch38\".split())\n", "process.wait()" ], + "id": "c7ff16903e0c52bd", "outputs": [], "execution_count": null }, { - "cell_type": "code", - "id": "0514ccc3-5c91-41ad-ab15-f4158030ea14", "metadata": {}, + "cell_type": "code", "source": [ "from api.tools import get_revel_scores\n", "\n", @@ -133,201 +240,17 @@ "\n", "display(results)" ], + "id": "0514ccc3-5c91-41ad-ab15-f4158030ea14", "outputs": [], "execution_count": null }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2024-08-22T17:23:41.828469Z", - "start_time": "2024-08-22T17:21:09.627424Z" - } - }, - "cell_type": "code", - "source": [ - "gnomad_from_api = request_gnomad_api_data(\"EYS\", False)\n", - "\n", - "display(gnomad_from_api)" - ], - "id": "64482c033c794fb4", - "outputs": [ - { - "data": { - "text/plain": [ - " variant_id cDNA change ... Popmax Popmax population\n", - "0 6-63720525-A-G c.*71T>C ... 0.000016 African/African American\n", - "1 6-63720525-A-T c.*71T>A ... 0.000192 East Asian\n", - "2 6-63720525-A-C c.*71T>G ... 0.000000 \n", - "3 6-63720526-T-A c.*70A>T ... 0.000020 South Asian\n", - "4 6-63720527-G-T c.*69C>A ... 0.000000 \n", - "... ... ... ... ... ...\n", - "14295 6-65495479-G-T c.-69C>A ... 0.000000 \n", - "14296 6-65495479-G-A c.-69C>T ... 0.000031 African/African American\n", - "14297 6-65495482-A-G c.-72T>C ... 0.000070 Admixed American\n", - "14298 6-65495484-T-G c.-74A>C ... 0.000060 South Asian\n", - "14299 6-65495485-T-C c.-75A>G ... 0.000012 South Asian\n", - "\n", - "[14300 rows x 7 columns]" - ], - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
variant_idcDNA changeProtein changeAllele FrequencyHomozygote CountPopmaxPopmax population
06-63720525-A-Gc.*71T>C01.807419e-060.00.000016African/African American
16-63720525-A-Tc.*71T>A06.573844e-060.00.000192East Asian
26-63720525-A-Cc.*71T>G00.000000e+000.00.000000
36-63720526-T-Ac.*70A>T01.045299e-060.00.000020South Asian
46-63720527-G-Tc.*69C>A00.000000e+000.00.000000
........................
142956-65495479-G-Tc.-69C>A00.000000e+000.00.000000
142966-65495479-G-Ac.-69C>T01.446349e-060.00.000031African/African American
142976-65495482-A-Gc.-72T>C02.629510e-060.00.000070Admixed American
142986-65495484-T-Gc.-74A>C03.645085e-060.00.000060South Asian
142996-65495485-T-Cc.-75A>G07.310070e-070.00.000012South Asian
\n", - "

14300 rows × 7 columns

\n", - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "execution_count": 2 - }, { "metadata": {}, "cell_type": "code", - "outputs": [], - "execution_count": null, "source": "", - "id": "6f0abfb50bd211a0" - + "id": "6f0abfb50bd211a0", + "outputs": [], + "execution_count": null } ], "metadata": { From 429f4ccc0d5a9c336e76f1b09702289a84c36375 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Thu, 29 Aug 2024 01:38:56 +0300 Subject: [PATCH 09/72] MDE/PKFE-19 implemented styled context menu dialog --- .../fileTreeItemContextMenuStyledDialog.tsx | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuStyledDialog.tsx diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuStyledDialog.tsx b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuStyledDialog.tsx new file mode 100644 index 0000000..69209d6 --- /dev/null +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuStyledDialog.tsx @@ -0,0 +1,13 @@ +import { Dialog, styled } from '@mui/material'; + +export const FileTreeItemContextMenuStyledDialog = styled(Dialog)(({ theme }) => ({ + '& .MuiDialogActions-root': { + padding: '1.5rem', + }, + '& .MuiDialog-paper': { + borderRadius: '1.5rem', + minWidth: '20%', + backgroundColor: theme.palette.background.paper, + backgroundImage: 'none', + }, +})); From bfbb210b6fabae46e0f8357f074fafd4ab4fe81e Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Thu, 29 Aug 2024 01:41:29 +0300 Subject: [PATCH 10/72] MDE/PKFE-19 defined new constants and types --- app/back-end/src/constants.py | 5 ++++ .../src/types/constants/endpoints.ts | 23 ++++++++++++++--- app/front-end/src/types/constants/events.ts | 25 +++++++++++++++++++ app/front-end/src/types/enums/fileTypes.ts | 11 +++++--- 4 files changed, 57 insertions(+), 7 deletions(-) diff --git a/app/back-end/src/constants.py b/app/back-end/src/constants.py index c7d91f5..e33bc17 100644 --- a/app/back-end/src/constants.py +++ b/app/back-end/src/constants.py @@ -28,7 +28,12 @@ # Routes BASE_ROUTE = "/api/v1" WORKSPACE_ROUTE = "/workspace" +WORKSPACE_FILE_ROUTE = "/workspace/file" +WORKSPACE_CREATE_ROUTE = "/workspace/create" +WORKSPACE_RENAME_ROUTE = "/workspace/rename" +WORKSPACE_DELETE_ROUTE = "/workspace/delete" # Events CONSOLE_FEEDBACK_EVENT = "console_feedback" WORKSPACE_FILE_SAVE_FEEDBACK_EVENT = "workspace_file_save_feedback" +WORKSPACE_UPDATE_FEEDBACK_EVENT = "workspace_update_feedback" diff --git a/app/front-end/src/types/constants/endpoints.ts b/app/front-end/src/types/constants/endpoints.ts index 644272d..e95c382 100644 --- a/app/front-end/src/types/constants/endpoints.ts +++ b/app/front-end/src/types/constants/endpoints.ts @@ -1,25 +1,40 @@ /** - * Endpoints object defines the base URL paths for various API endpoints used in the application. + * `Endpoints` object defines the base URL paths for various API endpoints used in the application. * * @description This object contains string constants representing the relative paths for different API endpoints. These paths * are used to construct full URLs for making HTTP requests. The `Endpoints` object centralizes endpoint definitions, making it * easier to manage and update API paths across the application. * * The object includes: - * - `EXAMPLE`: The endpoint path for example requests. + * - `EXAMPLE`: The endpoint path for example data. * - `WORKSPACE`: The endpoint path for workspace-related requests. + * - `WORKSPACE_FILE`: The endpoint path for file operations within the workspace. + * - `WORKSPACE_CREATE`: The endpoint path for creating new workspace items. + * - `WORKSPACE_RENAME`: The endpoint path for renaming workspace items. + * - `WORKSPACE_DELETE`: The endpoint path for deleting workspace items. * * @constant {Object} Endpoints - The object containing API endpoint paths. * @property {string} EXAMPLE - The endpoint path for example data. - * @property {string} WORKSPACE - The endpoint path for workspace data. + * @property {string} WORKSPACE - The endpoint path for workspace-related data. + * @property {string} WORKSPACE_FILE - The endpoint path for file operations within the workspace. + * @property {string} WORKSPACE_CREATE - The endpoint path for creating new items in the workspace. + * @property {string} WORKSPACE_RENAME - The endpoint path for renaming items in the workspace. + * @property {string} WORKSPACE_DELETE - The endpoint path for deleting items in the workspace. * * @example * // Example usage of the Endpoints object * const exampleUrl = `${API_URL}${Endpoints.EXAMPLE}`; * const workspaceUrl = `${API_URL}${Endpoints.WORKSPACE}`; + * const fileUrl = `${API_URL}${Endpoints.WORKSPACE_FILE}`; + * const createUrl = `${API_URL}${Endpoints.WORKSPACE_CREATE}`; + * const renameUrl = `${API_URL}${Endpoints.WORKSPACE_RENAME}`; + * const deleteUrl = `${API_URL}${Endpoints.WORKSPACE_DELETE}`; */ -/* eslint-disable quotes */ export const Endpoints = { EXAMPLE: `/example`, WORKSPACE: `/workspace`, + WORKSPACE_FILE: `/workspace/file`, + WORKSPACE_CREATE: `/workspace/create`, + WORKSPACE_RENAME: `/workspace/rename`, + WORKSPACE_DELETE: `/workspace/delete`, }; diff --git a/app/front-end/src/types/constants/events.ts b/app/front-end/src/types/constants/events.ts index 8d69421..33b7072 100644 --- a/app/front-end/src/types/constants/events.ts +++ b/app/front-end/src/types/constants/events.ts @@ -1,4 +1,29 @@ +/** + * `Events` object defines the event names used for communication within the application. + * + * @description This object contains string constants representing the names of various events that are used for handling + * real-time communications or updates in the application. These event names are used with event-driven systems, such as + * WebSocket connections, to emit and listen for specific events. + * + * The object includes: + * - `CONSOLE_FEEDBACK_EVENT`: The event name for console feedback messages. + * - `WORKSPACE_FILE_SAVE_FEEDBACK_EVENT`: The event name for feedback related to saving files in the workspace. + * - `WORKSPACE_UPDATE_FEEDBACK_EVENT`: The event name for feedback related to updates in the workspace. + * + * @constant {Object} Events - The object containing event names. + * @property {string} CONSOLE_FEEDBACK_EVENT - The event name for receiving console feedback messages. + * @property {string} WORKSPACE_FILE_SAVE_FEEDBACK_EVENT - The event name for receiving feedback on file save operations within the workspace. + * @property {string} WORKSPACE_UPDATE_FEEDBACK_EVENT - The event name for receiving feedback on workspace updates. + * + * @example + * // Example usage of the Events object + * socket.emit(Events.CONSOLE_FEEDBACK_EVENT, feedbackData); + * socket.on(Events.WORKSPACE_FILE_SAVE_FEEDBACK_EVENT, (response) => { + * console.log('File save feedback:', response); + * }); + */ export const Events = { CONSOLE_FEEDBACK_EVENT: 'console_feedback', WORKSPACE_FILE_SAVE_FEEDBACK_EVENT: 'workspace_file_save_feedback', + WORKSPACE_UPDATE_FEEDBACK_EVENT: 'workspace_update_feedback', }; diff --git a/app/front-end/src/types/enums/fileTypes.ts b/app/front-end/src/types/enums/fileTypes.ts index 62da405..e253426 100644 --- a/app/front-end/src/types/enums/fileTypes.ts +++ b/app/front-end/src/types/enums/fileTypes.ts @@ -1,17 +1,19 @@ /** - * FileTypes enumeration defines the different types of files and folders used in the application. + * `FileTypes` enumeration defines the different types of files and folders used in the application. * - * @description This enum provides a set of constants representing various file types and folder categories. Each constant maps - * to a string value that corresponds to a specific file or folder type. This enumeration helps in managing and distinguishing + * @description This enum provides a set of constants representing various file types and folder categories. Each constant + * maps to a string value that corresponds to a specific file or folder type. This enumeration helps in managing and distinguishing * between different file formats and organizational structures within the application. * * The enum includes: + * - `FILE`: A generic type representing a file. * - `TXT`: Represents a plain text file with a `.txt` extension. * - `CSV`: Represents a comma-separated values file with a `.csv` extension. * - `DOC`: Represents a document file with a `.doc` extension. * - `FOLDER`: Represents a folder or directory. * * @enum {string} + * @property {string} FILE - A generic type for files. * @property {string} TXT - The file type for plain text files. * @property {string} CSV - The file type for CSV (comma-separated values) files. * @property {string} DOC - The file type for document files. @@ -25,6 +27,9 @@ * } */ export enum FileTypes { + // generic + FILE = 'file', + TXT = 'txt', CSV = 'csv', DOC = 'doc', From c02ba583a65a0028c4d40587809d0800b5e220d7 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Thu, 29 Aug 2024 01:43:46 +0300 Subject: [PATCH 11/72] MDE/PKFE-19 implemented confirmation and texfield dialogs --- ...eTreeItemContextMenuConfirmationDialog.tsx | 107 ++++++++++++ ...fileTreeItemContextMenuTextfieldDialog.tsx | 155 ++++++++++++++++++ 2 files changed, 262 insertions(+) create mode 100644 app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuConfirmationDialog.tsx create mode 100644 app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuTextfieldDialog.tsx diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuConfirmationDialog.tsx b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuConfirmationDialog.tsx new file mode 100644 index 0000000..e4fd98c --- /dev/null +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuConfirmationDialog.tsx @@ -0,0 +1,107 @@ +import { FileTreeItemContextMenuStyledDialog } from '@/features/editor/components/fileTreeView/fileTreeItem'; +import { Close as CloseIcon } from '@mui/icons-material'; +import { + Button, + DialogActions, + DialogContent, + DialogTitle, + Grid, + IconButton, + Typography, + useTheme, +} from '@mui/material'; + +export interface FileTreeItemContextMenuConfirmationDialogProps { + open: boolean; + action: string; + content: { text: string; boldText: string }; + onClose: () => void; + onConfirm: () => void; +} + +/** + * `FileTreeItemContextMenuConfirmationDialog` component provides a confirmation dialog for actions related to file tree items. + * + * @description This component displays a confirmation dialog with a customizable action, content, and styling. It is used + * to confirm user actions such as deletion or renaming of file tree items. The dialog includes a title, content with bold text, + * and buttons for confirming or canceling the action. + * + * The dialog is styled using `FileTreeItemContextMenuStyledDialog` to ensure consistent appearance within the application. + * + * @component + * + * @param {FileTreeItemContextMenuConfirmationDialogProps} props - The props for the component. + * @param {boolean} props.open - A boolean indicating whether the dialog is open or not. + * @param {string} props.action - The action to be confirmed (e.g., "Delete", "Rename"). + * @param {Object} props.content - The content to be displayed in the dialog. + * @param {string} props.content.text - The main text content of the dialog. + * @param {string} props.content.boldText - The text that should be displayed in bold. + * @param {Function} props.onClose - The function to be called when the dialog is closed. + * @param {Function} props.onConfirm - The function to be called when the action is confirmed. + * + * @example + * // Example usage of the FileTreeItemContextMenuConfirmationDialog component + * + * + * @returns {JSX.Element} The confirmation dialog component. + */ +export const FileTreeItemContextMenuConfirmationDialog: React.FC = ({ + open, + action, + content, + onClose, + onConfirm, +}) => { + const Theme = useTheme(); + return ( + + + + + Confirm {action} + + + + + + + + + + + {content.text} + {content.boldText}? + + + + + + + + ); +}; diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuTextfieldDialog.tsx b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuTextfieldDialog.tsx new file mode 100644 index 0000000..d01e2a7 --- /dev/null +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuTextfieldDialog.tsx @@ -0,0 +1,155 @@ +import { FileTreeItemContextMenuStyledDialog } from '@/features/editor/components/fileTreeView/fileTreeItem'; +import { FileTreeViewItemProps } from '@/features/editor/types'; +import { FileTypes } from '@/types'; +import { Close as CloseIcon } from '@mui/icons-material'; +import { + Button, + DialogActions, + DialogContent, + DialogTitle, + Grid, + IconButton, + TextField, + Typography, + useTheme, +} from '@mui/material'; +import { useEffect, useState } from 'react'; + +export interface FileTreeItemContextMenuTextfieldDialogProps { + open: boolean; + title: string; + label: string; + item?: FileTreeViewItemProps; + onClose: () => void; + onSave: (label: string) => void; +} + +/** + * `FileTreeItemContextMenuTextfieldDialog` component provides a dialog for editing the label of a file tree item. + * + * @description This component displays a modal dialog for users to input or edit a label for a file or folder in the file tree. + * It includes validation to ensure the input is not empty and does not exceed 50 characters. The dialog features a title + * and a text field for input, with a save and cancel button. The dialog is styled with the application's theme for a consistent + * look and feel. + * + * @component + * + * @param {FileTreeItemContextMenuTextfieldDialogProps} props - The props for the component. + * @param {boolean} props.open - A boolean indicating whether the dialog is open or closed. + * @param {string} props.title - The title of the dialog. + * @param {string} props.label - The label for the text field. + * @param {FileTreeViewItemProps} [props.item] - Optional file tree item object that includes the current label and file type. + * @param {() => void} props.onClose - Callback function to be called when the dialog is closed. + * @param {(label: string) => void} props.onSave - Callback function to be called when the user saves the input. + * + * @example + * // Example usage of the FileTreeItemContextMenuTextfieldDialog component + * setDialogOpen(false)} + * onSave={(newLabel) => console.log('New label:', newLabel)} + * /> + * + * @returns {JSX.Element} The dialog component with a title, text field, and action buttons for saving or closing. + */ +export const FileTreeItemContextMenuTextfieldDialog: React.FC = ({ + open, + title, + label, + item, + onClose, + onSave, +}) => { + const Theme = useTheme(); + const [value, setValue] = useState(item?.label || ''); + const [error, setError] = useState(null); + + useEffect(() => { + if (open) { + setValue(item?.label || ''); + setError(null); + } + }, [open, item?.label]); + + const validateInput = (input: string) => { + if (!input.trim()) { + return 'Input cannot be empty'; + } + + if (input.length > 50) { + return 'Input must be less than 50 characters'; + } + + return null; // No error + }; + + const handleSave = () => { + const validationError = validateInput(value); + if (validationError) { + setError(validationError); + } else { + onSave(value); + } + }; + + return ( + + + + + + {title} {item ? (item.fileType === FileTypes.FOLDER ? 'Folder' : 'File') : ''} + + + + + + + + + + + setValue(event.target.value)} + error={Boolean(error)} + helperText={error} + sx={{ + ':hover': { borderColor: Theme.palette.primary.main }, + backgroundColor: Theme.palette.background.paper, + justifyItems: 'center', + }} + /> + + + + + + + ); +}; From d4f9565fee7e4c8ecb1ee1b2b2028503ba27416f Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Thu, 29 Aug 2024 01:44:38 +0300 Subject: [PATCH 12/72] MDE/PKFE-19 implemented context menu with functionality --- .../fileTreeItem/fileTreeItemContextMenu.tsx | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenu.tsx diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenu.tsx b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenu.tsx new file mode 100644 index 0000000..cc6ed7a --- /dev/null +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenu.tsx @@ -0,0 +1,177 @@ +import { + FileTreeItemContextMenuConfirmationDialog, + FileTreeItemContextMenuTextfieldDialog, +} from '@/features/editor/components/fileTreeView/fileTreeItem'; +import { useWorkspaceContext } from '@/features/editor/hooks'; +import { FileTreeViewItemProps } from '@/features/editor/types'; +import { axios } from '@/lib'; +import { Endpoints, FileTypes } from '@/types'; +import { Divider, Menu, MenuItem } from '@mui/material'; +import { useState } from 'react'; + +export interface FileTreeItemContextMenuProps { + item: FileTreeViewItemProps; + anchorPosition: { top: number; left: number }; + open: boolean; + onClose: () => void; +} + +export const FileTreeItemContextMenu: React.FC = ({ + item, + anchorPosition, + open, + onClose, +}) => { + const Workspace = useWorkspaceContext(); + const [newFileDialogOpen, setNewFileDialogOpen] = useState(false); + const [newFolderDialogOpen, setNewFolderDialogOpen] = useState(false); + const [renameDialogOpen, setRenameDialogOpen] = useState(false); + const [deleteDialogOpen, setDeleteDialogOpen] = useState(false); + const menuItems = []; + + if (item.fileType === undefined) { + menuItems.push( + handleActionContextMenu('newFile')}> + New file... + , + handleActionContextMenu('newFolder')}> + New folder... + , + , + handleActionContextMenu('import')} disabled> + Import... + + ); + } else { + if (item.fileType === FileTypes.FOLDER || item.fileType === undefined) { + menuItems.push( + handleActionContextMenu('newFile')}> + New file... + , + handleActionContextMenu('newFolder')}> + New folder... + , + , + handleActionContextMenu('import')} disabled> + Import... + , + + ); + } else { + menuItems.push( + handleActionContextMenu('export')} disabled> + Export... + , + + ); + } + + menuItems.push( + handleActionContextMenu('rename')}> + Rename... + , + handleActionContextMenu('delete')}> + Delete + + ); + } + + const handleActionContextMenu = (action: string) => { + onClose(); + switch (action) { + case 'newFile': + setNewFileDialogOpen(true); + break; + case 'newFolder': + setNewFolderDialogOpen(true); + break; + case 'import': + // TODO: Implement file import + console.log('import'); + break; + case 'export': + // TODO: Implement file export + console.log('export'); + break; + case 'rename': + setRenameDialogOpen(true); + break; + case 'delete': + setDeleteDialogOpen(true); + break; + default: + break; + } + }; + + const handleNewFileSave = async (label: string) => { + setNewFileDialogOpen(false); + const data = { label, type: FileTypes.FILE }; + await axios.put(`${Endpoints.WORKSPACE_CREATE}/${item.id}`, data); + }; + + const handleNewFolderSave = async (label: string) => { + setNewFolderDialogOpen(false); + const data = { label, type: FileTypes.FOLDER }; + await axios.put(`${Endpoints.WORKSPACE_CREATE}/${item.id}`, data); + }; + + const handleRenameSave = async (label: string) => { + setRenameDialogOpen(false); + const data = { label, type: item.fileType === FileTypes.FOLDER ? FileTypes.FOLDER : FileTypes.FILE }; + const response = await axios.put(`${Endpoints.WORKSPACE_RENAME}/${item.id}`, data); + Workspace.remove(item.id); + if (Workspace.fileId === item.id) + Workspace.update(response.data.newId, response.data.newLabel, response.data.newType || FileTypes.FILE); + }; + + const handleDeleteConfirm = async () => { + setDeleteDialogOpen(false); + const data = { type: item.fileType === FileTypes.FOLDER ? FileTypes.FOLDER : FileTypes.FILE }; + await axios.put(`${Endpoints.WORKSPACE_DELETE}/${item.id}`, data); + Workspace.remove(item.id); + }; + + return ( + <> + + {menuItems} + + setNewFileDialogOpen(false)} + onSave={handleNewFileSave} + /> + setNewFolderDialogOpen(false)} + onSave={handleNewFolderSave} + /> + setRenameDialogOpen(false)} + onSave={handleRenameSave} + /> + setDeleteDialogOpen(false)} + onConfirm={handleDeleteConfirm} + /> + + ); +}; From 9a351b1c14ba3f5ee4eeee68df26c3803647e134 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Thu, 29 Aug 2024 01:45:52 +0300 Subject: [PATCH 13/72] MDE/PKFE-19 implemented context menu into file tree item. In hierarchy field implemented with a button press --- .../fileTreeItem/fileTreeItem.tsx | 31 ++++++- .../fileTreeView/fileTreeItem/index.ts | 7 ++ .../components/fileTreeView/fileTreeView.tsx | 85 ++++++++++++++----- 3 files changed, 98 insertions(+), 25 deletions(-) diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItem.tsx b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItem.tsx index 78fb186..109c98f 100644 --- a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItem.tsx +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItem.tsx @@ -1,3 +1,4 @@ +import { FileTreeItemContextMenu, FileTreeItemLabel } from '@/features/editor/components/fileTreeView/fileTreeItem'; import { useWorkspaceContext } from '@/features/editor/hooks'; import { getIconFromFileType, isExpandable } from '@/features/editor/utils'; import { FileTypes } from '@/types'; @@ -12,8 +13,7 @@ import { TreeItem2DragAndDropOverlay } from '@mui/x-tree-view/TreeItem2DragAndDr import { unstable_useTreeItem2 as useTreeItem2, UseTreeItem2Parameters } from '@mui/x-tree-view/useTreeItem2'; import { animated, useSpring } from '@react-spring/web'; import clsx from 'clsx'; -import React from 'react'; -import { FileTreeItemLabel } from '.'; +import React, { useState } from 'react'; const StyledFileTreeItemRoot = styled(TreeItem2Root)(({ theme }) => ({ //color: theme.palette.mode === 'light' ? theme.palette.grey[800] : theme.palette.grey[400], @@ -136,6 +136,11 @@ export const FileTreeItem = React.forwardRef(function CustomTreeItem( } const Workspace = useWorkspaceContext(); + const [contextMenu, setContextMenu] = useState<(EventTarget & HTMLDivElement) | null>(null); + const [contextMenuPosition, setContextMenuPosition] = useState<{ top: number; left: number }>({ + top: 0, + left: 0, + }); const handleClick = (newId: string, newLabel: string, newType: FileTypes) => { if (newType === FileTypes.FOLDER) return; @@ -143,6 +148,21 @@ export const FileTreeItem = React.forwardRef(function CustomTreeItem( Workspace.update(newId, newLabel, newType); }; + const handleOpenContextMenu = (event: React.MouseEvent) => { + event.stopPropagation(); + event.preventDefault(); + setContextMenu(event.currentTarget); + setContextMenuPosition({ + top: event.clientY, + left: event.clientX, + }); + }; + + const handleCloseContextMenu = () => { + setContextMenu(null); + setContextMenuPosition({ top: 0, left: 0 }); + }; + return ( @@ -152,6 +172,7 @@ export const FileTreeItem = React.forwardRef(function CustomTreeItem( if (getContentProps().onClick) getContentProps().onClick(event); handleClick(item.id, item.label, item.fileType); }, + onContextMenu: (event) => handleOpenContextMenu(event), className: clsx('content', { 'Mui-expanded': status.expanded, 'Mui-selected': status.selected, @@ -168,6 +189,12 @@ export const FileTreeItem = React.forwardRef(function CustomTreeItem( + {children && } diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/index.ts b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/index.ts index 5180b58..d7190c2 100644 --- a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/index.ts +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/index.ts @@ -1,2 +1,9 @@ export { FileTreeItem } from './fileTreeItem'; +export { FileTreeItemContextMenu } from './fileTreeItemContextMenu'; +export type { FileTreeItemContextMenuProps } from './fileTreeItemContextMenu'; +export { FileTreeItemContextMenuConfirmationDialog } from './fileTreeItemContextMenuConfirmationDialog'; +export type { FileTreeItemContextMenuConfirmationDialogProps } from './fileTreeItemContextMenuConfirmationDialog'; +export { FileTreeItemContextMenuStyledDialog } from './fileTreeItemContextMenuStyledDialog'; +export { FileTreeItemContextMenuTextfieldDialog } from './fileTreeItemContextMenuTextfieldDialog'; +export type { FileTreeItemContextMenuTextfieldDialogProps } from './fileTreeItemContextMenuTextfieldDialog'; export { FileTreeItemLabel } from './fileTreeItemLabel'; diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeView.tsx b/app/front-end/src/features/editor/components/fileTreeView/fileTreeView.tsx index 105dfb5..01067d6 100644 --- a/app/front-end/src/features/editor/components/fileTreeView/fileTreeView.tsx +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeView.tsx @@ -1,12 +1,12 @@ +import { FileTreeItem, FileTreeItemContextMenu } from '@/features/editor/components/fileTreeView/fileTreeItem'; import { FileTreeViewItemProps } from '@/features/editor/types'; import { useSessionContext } from '@/hooks'; -import { axios } from '@/lib'; -import { Endpoints } from '@/types'; -import { Box, LinearProgress } from '@mui/material'; +import { axios, socket } from '@/lib'; +import { Endpoints, Events } from '@/types'; +import { Box, Button, LinearProgress } from '@mui/material'; import { TreeViewBaseItem } from '@mui/x-tree-view'; import { RichTreeView } from '@mui/x-tree-view/RichTreeView'; -import { useEffect, useState } from 'react'; -import { FileTreeItem } from './fileTreeItem'; +import { useCallback, useEffect, useState } from 'react'; declare module 'react' { interface CSSProperties { @@ -40,23 +40,51 @@ export const FileTreeView: React.FC = () => { const [fileTreeViewData, setFileTreeViewData] = useState[]>([]); const [isLoading, setIsLoading] = useState(false); + const [contextMenu, setContextMenu] = useState<(EventTarget & HTMLButtonElement) | null>(null); + const [contextMenuPosition, setContextMenuPosition] = useState<{ top: number; left: number }>({ + top: 0, + left: 0, + }); + + const handleOpenContextMenu = (event: React.MouseEvent) => { + event.stopPropagation(); + event.preventDefault(); + setContextMenu(event.currentTarget); + setContextMenuPosition({ + top: event.clientY, + left: event.clientX, + }); + }; + + const handleCloseContextMenu = () => { + setContextMenu(null); + setContextMenuPosition({ top: 0, left: 0 }); + }; + + const getWorkspace = useCallback(async () => { + setIsLoading(true); + + try { + const response = await axios.get(Endpoints.WORKSPACE); + setFileTreeViewData(response.data); + } catch (error) { + console.error('Failed to fetch workspace data:', error); + } finally { + setIsLoading(false); + } + }, []); useEffect(() => { - const getWorkspace = async () => { - setIsLoading(true); + if (connected) { + getWorkspace(); + } - try { - const response = await axios.get(Endpoints.WORKSPACE); - setFileTreeViewData(response.data); - } catch (error) { - console.error('Failed to fetch workspace data:', error); - } finally { - setIsLoading(false); - } - }; + socket.on(Events.WORKSPACE_UPDATE_FEEDBACK_EVENT, getWorkspace); - if (connected) getWorkspace(); - }, [connected]); + return () => { + socket.off(Events.WORKSPACE_UPDATE_FEEDBACK_EVENT); + }; + }, [connected, getWorkspace]); return ( <> @@ -65,11 +93,22 @@ export const FileTreeView: React.FC = () => { ) : ( - + <> + + + + )} ); From b02578b25f31a153d0a2e36b48dfabd5ce7aac20 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Thu, 29 Aug 2024 01:49:31 +0300 Subject: [PATCH 14/72] MDE/PKFE-19 implemented back-end logic for file/folder creation, renaming and deletion --- app/back-end/src/routes/workspace_route.py | 518 ++++++++++++++++++++- 1 file changed, 500 insertions(+), 18 deletions(-) diff --git a/app/back-end/src/routes/workspace_route.py b/app/back-end/src/routes/workspace_route.py index c821341..0b39a47 100644 --- a/app/back-end/src/routes/workspace_route.py +++ b/app/back-end/src/routes/workspace_route.py @@ -90,6 +90,11 @@ WORKSPACE_DIR, WORKSPACE_TEMPLATE_DIR, WORKSPACE_ROUTE, + WORKSPACE_FILE_ROUTE, + WORKSPACE_CREATE_ROUTE, + WORKSPACE_RENAME_ROUTE, + WORKSPACE_DELETE_ROUTE, + WORKSPACE_UPDATE_FEEDBACK_EVENT, CONSOLE_FEEDBACK_EVENT, WORKSPACE_FILE_SAVE_FEEDBACK_EVENT, ) @@ -221,7 +226,7 @@ def get_workspace(): return jsonify({"error": "An internal error occurred"}), 500 -@workspace_route_bp.route(f"{WORKSPACE_ROUTE}/", methods=["GET"]) +@workspace_route_bp.route(f"{WORKSPACE_FILE_ROUTE}/", methods=["GET"]) @compress.compressed() def get_workspace_file(relative_path): """ @@ -289,6 +294,7 @@ def get_workspace_file(relative_path): page = int(request.args.get("page", 0)) rows_per_page = int(request.args.get("rowsPerPage", 100)) + header = "" total_rows = 0 paginated_rows = [] @@ -301,25 +307,29 @@ def get_workspace_file(relative_path): # Copy the template from the template directory to the user's workspace shutil.copytree(WORKSPACE_TEMPLATE_DIR, user_workspace_dir) - # Costly operation to read the file and return the required rows. - # It gets more expensive as the page number increases, needs to go deeper into the file. - # Currently supports CSV files only. + # Check if file is empty + if os.path.getsize(file_path) != 0: - # Read the file and retrieve the rows - with open(file_path, "r", encoding="utf-8") as file: - reader = csv.reader(file) - # First line as header - header = next(reader) + # Costly operation to read the file and return the required rows. + # It gets more expensive as the page number increases, needs to go deeper into the file. + # Currently supports CSV files only. - # Read the rows within the specified range, otherwise skip to the next row. - # Loop ends when the end row is reached or the end of the file is reached. - for i, row in enumerate(reader): - if start_row <= i < end_row: - paginated_rows.append(row) - total_rows += 1 + # Read the file and retrieve the rows + with open(file_path, "r", encoding="utf-8") as file: + reader = csv.reader(file) + # First line as header + header = next(reader) - if i >= end_row: - break + if header: + # Read the rows within the specified range, otherwise skip to the next row. + # Loop ends when the end row is reached or the end of the file is reached. + for i, row in enumerate(reader): + if start_row <= i < end_row: + paginated_rows.append(row) + total_rows += 1 + + if i >= end_row: + break # Build the response data response_data = { @@ -384,7 +394,7 @@ def get_workspace_file(relative_path): return jsonify({"error": "An internal error occurred"}), 500 -@workspace_route_bp.route(f"{WORKSPACE_ROUTE}/", methods=["PUT"]) +@workspace_route_bp.route(f"{WORKSPACE_FILE_ROUTE}/", methods=["PUT"]) @compress.compressed() def put_workspace_file(relative_path): """ @@ -569,3 +579,475 @@ def put_workspace_file(relative_path): sid, ) return jsonify({"error": "An internal error occurred"}), 500 + + +@workspace_route_bp.route(f"{WORKSPACE_CREATE_ROUTE}/", methods=["PUT"]) +@workspace_route_bp.route(f"{WORKSPACE_CREATE_ROUTE}/", methods=["PUT"]) +@compress.compressed() +def put_workspace_create(relative_path=None): + """ + Creates a new file or directory in the user's workspace. + + This endpoint handles: + - PUT `/workspace/create/`: Create at a specified `relative_path`. + - PUT `/workspace/create/`: Create at the root of the workspace. + + Parameters: + - `relative_path` (str, optional): Path in the workspace where the entity is created. + - Request Headers: + - `uuid` (str): User identifier (required). + - `sid` (str): User session identifier (required). + - Request Body (JSON): + - `label` (str): Name of the new file or directory. + - `type` (str): Type, either "file" or "folder". + + Responses: + - **200 OK**: JSON with `newId`, `newLabel`, and `newType`. + - **400 Bad Request**: Missing `uuid` or `sid` headers. + - **403 Forbidden**: Permission issues. + - **404 Not Found**: Path or file not found. + - **500 Internal Server Error**: Unexpected errors. + + Emits: + - Console and workspace update feedback via WebSocket. + + Example Request: + ``` + PUT /workspace/create/myfolder + Headers: + uuid: + sid: + Body: + { + "label": "newfile.txt", + "type": "file" + } + ``` + + Example Response: + ```json + { + "newId": "myfolder/newfile.txt", + "newLabel": "newfile.txt", + "newType": "file" + } + ``` + """ + + uuid = request.headers.get("uuid") + sid = request.headers.get("sid") + + # Ensure the uuid header is present + if not uuid: + return jsonify({"error": "UUID header is missing"}), 400 + + # Ensure the sid header is present + if not sid: + return jsonify({"error": "SID header is missing"}), 400 + + data = request.json + label = data.get("label") + file_type = data.get("type") + + if relative_path is None: + relative_path = "" + + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + {"type": "info", "message": f"Creating {file_type} at '{relative_path}'..."}, + uuid, + sid, + ) + + user_workspace_dir = os.path.join(WORKSPACE_DIR, uuid) + folder_path = os.path.join(user_workspace_dir, relative_path) + destination_path = os.path.join(folder_path, label) + + try: + # Ensure the user specific directory exists + if not os.path.exists(user_workspace_dir): + # Copy the template from the template directory to the user's workspace + shutil.copytree(WORKSPACE_TEMPLATE_DIR, user_workspace_dir) + + # Ensure the directory exists + os.makedirs(os.path.dirname(folder_path), exist_ok=True) + + if file_type == "file": + open(destination_path, "w", encoding="utf-8").close() + elif file_type == "folder": + os.mkdir(destination_path) + + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + {"type": "succ", "message": f"Successfully created '{relative_path}/{label}'."}, + uuid, + sid, + ) + + # Emit a feedback to the user's workspace + socketio_emit_to_user_session( + WORKSPACE_UPDATE_FEEDBACK_EVENT, + {"status": "updated"}, + uuid, + sid, + ) + + # Build the response data + response_data = { + "newId": f"{relative_path}/{label}" if relative_path else label, + "newLabel": label, + "newType": file_type, + } + + return jsonify(response_data) + + except FileNotFoundError as e: + logger.error("FileNotFoundError: %s while creating %s", e, destination_path) + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + { + "type": "errr", + "message": f"FileNotFoundError: {e} while creating {destination_path}", + }, + uuid, + sid, + ) + return jsonify({"error": "Requested file not found"}), 404 + except PermissionError as e: + logger.error("PermissionError: %s while creating %s", e, destination_path) + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + { + "type": "errr", + "message": f"PermissionError: {e} while creating {destination_path}", + }, + uuid, + sid, + ) + return jsonify({"error": "Permission denied"}), 403 + except UnexpectedError as e: + logger.error("UnexpectedError: %s while creating %s", e.message, destination_path) + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + { + "type": "errr", + "message": f"UnexpectedError: {e.message} while creating {destination_path}", + }, + uuid, + sid, + ) + return jsonify({"error": "An internal error occurred"}), 500 + + +@workspace_route_bp.route(f"{WORKSPACE_RENAME_ROUTE}/", methods=["PUT"]) +@compress.compressed() +def put_workspace_rename(relative_path): + """ + Renames a file or directory in the user's workspace. + + - PUT `/workspace/rename/`: Rename the item at `relative_path`. + + Parameters: + - `relative_path` (str): Path of the item to be renamed. + - Request Headers: + - `uuid` (str): User identifier (required). + - `sid` (str): User session identifier (required). + - Request Body (JSON): + - `label` (str): New name for the item. + - `type` (str): Type, either "file" or "folder". + + Responses: + - **200 OK**: JSON with `newId`, `newLabel`, and `newType`. + - **400 Bad Request**: Missing `uuid` or `sid` headers. + - **403 Forbidden**: Permission issues. + - **404 Not Found**: Item not found. + - **500 Internal Server Error**: Unexpected errors. + + Emits: + - Console and workspace update feedback via WebSocket. + + Example Request: + ``` + PUT /workspace/rename/myfolder + Headers: + uuid: + sid: + Body: + { + "label": "newname", + "type": "folder" + } + ``` + + Example Response: + ```json + { + "newId": "myfolder/newname", + "newLabel": "newname", + "newType": "folder" + } + ``` + """ + + uuid = request.headers.get("uuid") + sid = request.headers.get("sid") + + # Ensure the uuid header is present + if not uuid: + return jsonify({"error": "UUID header is missing"}), 400 + + # Ensure the sid header is present + if not sid: + return jsonify({"error": "SID header is missing"}), 400 + + data = request.json + label = data.get("label") + file_type = data.get("type") + + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + {"type": "info", "message": f"Renaming {file_type} at '{relative_path}'..."}, + uuid, + sid, + ) + + user_workspace_dir = os.path.join(WORKSPACE_DIR, uuid) + destination_path = os.path.join(user_workspace_dir, relative_path) + new_path = os.path.join(os.path.dirname(destination_path), label) + + try: + # Ensure the user specific directory exists + if not os.path.exists(user_workspace_dir): + # Copy the template from the template directory to the user's workspace + shutil.copytree(WORKSPACE_TEMPLATE_DIR, user_workspace_dir) + + # Ensure the directory exists + os.makedirs(os.path.dirname(destination_path), exist_ok=True) + + # Rename the file or folder + os.rename(destination_path, new_path) + + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + {"type": "succ", "message": f"Successfully renamed '{relative_path}'."}, + uuid, + sid, + ) + + # Emit a feedback to the user's workspace + socketio_emit_to_user_session( + WORKSPACE_UPDATE_FEEDBACK_EVENT, + {"status": "updated"}, + uuid, + sid, + ) + + # Build the response data + response_data = { + "newId": ( + f"{os.path.dirname(relative_path)}/{label}" + if os.path.dirname(relative_path) + else label + ), + "newLabel": label, + "newType": file_type, + } + + return jsonify(response_data) + + except FileNotFoundError as e: + logger.error("FileNotFoundError: %s while renaming %s", e, destination_path) + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + { + "type": "errr", + "message": f"FileNotFoundError: {e} while renaming {destination_path}", + }, + uuid, + sid, + ) + return jsonify({"error": "Requested file not found"}), 404 + except PermissionError as e: + logger.error("PermissionError: %s while renaming %s", e, destination_path) + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + { + "type": "errr", + "message": f"PermissionError: {e} while renaming {destination_path}", + }, + uuid, + sid, + ) + return jsonify({"error": "Permission denied"}), 403 + except UnexpectedError as e: + logger.error("UnexpectedError: %s while renaming %s", e.message, destination_path) + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + { + "type": "errr", + "message": f"UnexpectedError: {e.message} while renaming {destination_path}", + }, + uuid, + sid, + ) + return jsonify({"error": "An internal error occurred"}), 500 + + +@workspace_route_bp.route(f"{WORKSPACE_DELETE_ROUTE}/", methods=["PUT"]) +@compress.compressed() +def put_workspace_delete(relative_path): + """ + Deletes a file or directory from the user's workspace. + + - PUT `/workspace/delete/`: Deletes the item at `relative_path`. + + Parameters: + - `relative_path` (str): Path of the item to be deleted. + - Request Headers: + - `uuid` (str): User identifier (required). + - `sid` (str): User session identifier (required). + - Request Body (JSON): + - `type` (str): Type of the item to delete, either "file" or "folder". + + Responses: + - **200 OK**: JSON with `oldId` of the deleted item. + - **400 Bad Request**: Missing `uuid` or `sid` headers. + - **403 Forbidden**: Permission issues. + - **404 Not Found**: Item not found. + - **500 Internal Server Error**: Unexpected errors. + + Emits: + - Console and workspace update feedback via WebSocket. + + Example Request: + ``` + PUT /workspace/delete/myfolder + Headers: + uuid: + sid: + Body: + { + "type": "folder" + } + ``` + + Example Response: + ```json + { + "oldId": "myfolder" + } + ``` + """ + + uuid = request.headers.get("uuid") + sid = request.headers.get("sid") + + # Ensure the uuid header is present + if not uuid: + return jsonify({"error": "UUID header is missing"}), 400 + + # Ensure the sid header is present + if not sid: + return jsonify({"error": "SID header is missing"}), 400 + + data = request.json + file_type = data.get("type") + + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + {"type": "info", "message": f"Deleting {file_type} at '{relative_path}'..."}, + uuid, + sid, + ) + + user_workspace_dir = os.path.join(WORKSPACE_DIR, uuid) + destination_path = os.path.join(user_workspace_dir, relative_path) + + try: + # Ensure the user specific directory exists + if not os.path.exists(user_workspace_dir): + # Copy the template from the template directory to the user's workspace + shutil.copytree(WORKSPACE_TEMPLATE_DIR, user_workspace_dir) + + # Ensure the directory exists + os.makedirs(os.path.dirname(destination_path), exist_ok=True) + + # Delete the file or folder + if file_type == "file": + os.remove(destination_path) + elif file_type == "folder": + shutil.rmtree(destination_path) + + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + {"type": "succ", "message": f"Successfully deleted '{relative_path}'."}, + uuid, + sid, + ) + + # Emit a feedback to the user's workspace + socketio_emit_to_user_session( + WORKSPACE_UPDATE_FEEDBACK_EVENT, + {"status": "updated"}, + uuid, + sid, + ) + + # Build the response data + response_data = { + "oldId": relative_path, + } + + return jsonify(response_data) + + except FileNotFoundError as e: + logger.error("FileNotFoundError: %s while deleting %s", e, destination_path) + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + { + "type": "errr", + "message": f"FileNotFoundError: {e} while deleting {destination_path}", + }, + uuid, + sid, + ) + return jsonify({"error": "Requested file not found"}), 404 + except PermissionError as e: + logger.error("PermissionError: %s while deleting %s", e, destination_path) + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + { + "type": "errr", + "message": f"PermissionError: {e} while deleting {destination_path}", + }, + uuid, + sid, + ) + return jsonify({"error": "Permission denied"}), 403 + except UnexpectedError as e: + logger.error("UnexpectedError: %s while deleting %s", e.message, destination_path) + # Emit a feedback to the user's console + socketio_emit_to_user_session( + CONSOLE_FEEDBACK_EVENT, + { + "type": "errr", + "message": f"UnexpectedError: {e.message} while deleting {destination_path}", + }, + uuid, + sid, + ) + return jsonify({"error": "An internal error occurred"}), 500 From 6aad38c3d6b3f76c992af4d579a12906256ed3bf Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Thu, 29 Aug 2024 01:49:53 +0300 Subject: [PATCH 15/72] MDE/PKFE-19 bug-fix --- .../features/editor/components/editorView/editorView.tsx | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/app/front-end/src/features/editor/components/editorView/editorView.tsx b/app/front-end/src/features/editor/components/editorView/editorView.tsx index 5ef03bc..aa2d240 100644 --- a/app/front-end/src/features/editor/components/editorView/editorView.tsx +++ b/app/front-end/src/features/editor/components/editorView/editorView.tsx @@ -54,7 +54,7 @@ export const EditorView: React.FC = () => { setIsLoading(true); try { - const response = await axios.get(`${Endpoints.WORKSPACE}/${Workspace.fileId}`, { + const response = await axios.get(`${Endpoints.WORKSPACE_FILE}/${Workspace.fileId}`, { params: { page: page, rowsPerPage: rowsPerPage, @@ -63,6 +63,12 @@ export const EditorView: React.FC = () => { const { totalRows, header, rows } = response.data as FileDataResponseDTO; + if (!header) { + setgridColumns([]); + setgridRows([]); + return; + } + const parsedColumns = header.map((value) => { return { field: value, From 2e6f993c27051d445dab207e1d8306d92eea9538 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Thu, 29 Aug 2024 01:54:22 +0300 Subject: [PATCH 16/72] MDE/PKFE-19 additional code documentation --- app/back-end/src/routes/workspace_route.py | 5 +-- .../fileTreeItem/fileTreeItemContextMenu.tsx | 31 +++++++++++++++++++ .../components/fileTreeView/fileTreeView.tsx | 18 ++++++----- 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/app/back-end/src/routes/workspace_route.py b/app/back-end/src/routes/workspace_route.py index 0b39a47..5841b44 100644 --- a/app/back-end/src/routes/workspace_route.py +++ b/app/back-end/src/routes/workspace_route.py @@ -77,6 +77,7 @@ # pylint: disable=import-error # pylint: disable=too-many-locals +# pylint: disable=too-many-lines import os import shutil @@ -793,7 +794,7 @@ def put_workspace_rename(relative_path): } ``` """ - + uuid = request.headers.get("uuid") sid = request.headers.get("sid") @@ -948,7 +949,7 @@ def put_workspace_delete(relative_path): } ``` """ - + uuid = request.headers.get("uuid") sid = request.headers.get("sid") diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenu.tsx b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenu.tsx index cc6ed7a..918a29e 100644 --- a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenu.tsx +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenu.tsx @@ -16,6 +16,37 @@ export interface FileTreeItemContextMenuProps { onClose: () => void; } +/** + * `FileTreeItemContextMenu` component provides a context menu for file tree items with options for creating, renaming, + * deleting files and folders, as well as other related actions. + * + * @description This component displays a context menu that allows users to perform various actions on file tree items, + * such as creating new files or folders, renaming existing items, or deleting items. The menu items are conditionally + * rendered based on the type of the selected file or folder. It also includes dialogs for entering information and + * confirming actions. + * + * The component uses `FileTreeItemContextMenuTextfieldDialog` for dialogs requiring text input and `FileTreeItemContextMenuConfirmationDialog` + * for actions requiring confirmation. The menu and dialogs are controlled via state hooks and props. + * + * @component + * + * @param {FileTreeItemContextMenuProps} props - The props for the component. + * @param {FileTreeViewItemProps} props.item - The file or folder item for which the context menu is displayed. + * @param {{ top: number; left: number }} props.anchorPosition - The position of the menu relative to the anchor. + * @param {boolean} props.open - A boolean indicating whether the context menu is open or not. + * @param {Function} props.onClose - The function to be called when the context menu is closed. + * + * @example + * // Example usage of the FileTreeItemContextMenu component + * + * + * @returns {JSX.Element} The context menu component with various menu items and dialogs. + */ export const FileTreeItemContextMenu: React.FC = ({ item, anchorPosition, diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeView.tsx b/app/front-end/src/features/editor/components/fileTreeView/fileTreeView.tsx index 01067d6..639ab04 100644 --- a/app/front-end/src/features/editor/components/fileTreeView/fileTreeView.tsx +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeView.tsx @@ -16,14 +16,18 @@ declare module 'react' { } /** - * FileTreeView component displays a hierarchical tree view of files and directories. + * `FileTreeView` component renders a hierarchical tree view of files and directories. * - * @description This component renders a `RichTreeView` with items fetched from the workspace endpoint. It displays a loading - * indicator while the data is being fetched and presents the tree view when the data is loaded. The tree view uses `FileTreeItem` - * to represent each item. The component handles asynchronous data fetching and provides visual feedback using `LinearProgress`. + * @description This component utilizes `RichTreeView` from Material-UI to display a hierarchical view of files and directories + * fetched from the workspace endpoint. It handles loading states with `LinearProgress` and displays a context menu for file + * operations. The context menu is controlled by state and provides options such as creating new files or folders. * - * The `RichTreeView` component from Material-UI's TreeView package is utilized to render the hierarchical structure, with - * a custom `item` slot for rendering each tree node via the `FileTreeItem` component. + * The component: + * - Fetches file tree data asynchronously from the workspace API endpoint. + * - Displays a loading indicator while data is being fetched. + * - Renders the file tree using `FileTreeItem` for each item. + * - Manages the state and position of a context menu that appears on right-click or button click. + * - Listens for updates via WebSocket to refresh the file tree data. * * @component * @@ -33,7 +37,7 @@ declare module 'react' { * * ); * - * @returns {JSX.Element} The rendered tree view component, displaying either a loading indicator or the file tree. + * @returns {JSX.Element} The rendered tree view component, showing either a loading indicator or the file tree. */ export const FileTreeView: React.FC = () => { const { connected } = useSessionContext(); From d52e08f80ca80de88fbe201974f41dc46ecb99cc Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Sun, 1 Sep 2024 02:19:09 +0300 Subject: [PATCH 17/72] MDE/PKFE-19 updated workspace provider for hierarchy --- .../stores/workspaceContextProvider.tsx | 64 +++++++++++++++---- 1 file changed, 53 insertions(+), 11 deletions(-) diff --git a/app/front-end/src/features/editor/stores/workspaceContextProvider.tsx b/app/front-end/src/features/editor/stores/workspaceContextProvider.tsx index e34d965..cda592c 100644 --- a/app/front-end/src/features/editor/stores/workspaceContextProvider.tsx +++ b/app/front-end/src/features/editor/stores/workspaceContextProvider.tsx @@ -1,6 +1,10 @@ import { FilebarGroupItemProps } from '@/features/editor/components/filebarView'; -import { FileTypes } from '@/types'; -import React, { createContext, useState } from 'react'; +import { FileTreeViewItemProps } from '@/features/editor/types'; +import { useSessionContext } from '@/hooks'; +import { axios, socket } from '@/lib'; +import { Endpoints, Events, FileTypes } from '@/types'; +import { TreeViewBaseItem } from '@mui/x-tree-view'; +import React, { createContext, useCallback, useEffect, useState } from 'react'; export interface WorkspaceContextProps { fileId: string; @@ -9,6 +13,8 @@ export interface WorkspaceContextProps { update: (newId: string, newLabel: string, newType: FileTypes) => void; fileHistory: FilebarGroupItemProps[]; remove: (fileId: string) => void; + fileTreeViewItems: TreeViewBaseItem[] | undefined; + fileTreeViewIsLoading: boolean; } export const WorkspaceContext = createContext({ @@ -18,6 +24,8 @@ export const WorkspaceContext = createContext({ update: () => {}, fileHistory: [], remove: () => {}, + fileTreeViewItems: undefined, + fileTreeViewIsLoading: true, }); interface Props { @@ -25,19 +33,21 @@ interface Props { } /** - * `WorkspaceContextProvider` is a component that provides context for managing the current workspace file's state. + * `WorkspaceContextProvider` is a React component that provides context for managing workspace file state across the application. * - * @description This component sets up a React context for managing and sharing workspace file information across the - * application. It tracks the current file's ID, label, and type, and provides an API to update these values. The context - * also maintains a history of recently opened files (excluding folders) and supports removing files from this history. + * @description This component sets up a context that holds and manages the state related to the current file in the workspace, + * including its ID, label, type, and history. It provides functions to update the current file, add or remove files from the + * history, and manage the file tree view data. The context is also updated with real-time changes via WebSocket. * * The context includes: * - `fileId`: The unique identifier for the current file. * - `fileLabel`: The label or name of the current file. - * - `fileType`: The type of the file (e.g., folder, document). - * - `update`: A function to update the current file's ID, label, and type, and add the file to the history. - * - `fileHistory`: An array of recently opened files, excluding folders. - * - `remove`: A function to remove a file from the history and update the current file if the removed file was active. + * - `fileType`: The type of the file (e.g., `FileTypes.FILE`, `FileTypes.FOLDER`). + * - `update`: A function to update the current file's ID, label, and type, and add the file to the history if it's not a folder. + * - `fileHistory`: An array of recently accessed files, excluding folders, with their IDs, labels, and types. + * - `remove`: A function to remove a file from the history and update the current file if it was active. + * - `fileTreeViewItems`: The hierarchical data for the file tree view. + * - `fileTreeViewIsLoading`: A boolean indicating if the file tree view data is still loading. * * @component * @@ -52,13 +62,43 @@ interface Props { * @param {Object} props - The props for the WorkspaceContextProvider component. * @param {React.ReactNode} [props.children] - Optional child components that will have access to the workspace context. * - * @returns {JSX.Element} The `WorkspaceContext.Provider` with the current workspace context value. + * @returns {JSX.Element} The `WorkspaceContext.Provider` component with the current workspace context value. */ export const WorkspaceContextProvider: React.FC = ({ children }) => { const [fileId, setFileId] = useState(''); const [fileLabel, setFileLabel] = useState(''); const [fileType, setFileType] = useState(FileTypes.FOLDER); const [fileHistory, setFileHistory] = useState([]); + const [fileTreeViewItems, setFileTreeViewItems] = useState[] | undefined>( + undefined + ); + const [fileTreeViewIsLoading, setFileTreeViewIsLoading] = useState(true); + + const { connected } = useSessionContext(); + + const getWorkspace = useCallback(async () => { + setFileTreeViewIsLoading(true); + try { + const response = await axios.get(Endpoints.WORKSPACE); + setFileTreeViewItems(response.data); + } catch (error) { + console.error('Failed to fetch workspace data:', error); + } finally { + setFileTreeViewIsLoading(false); + } + }, []); + + useEffect(() => { + if (connected) { + getWorkspace(); + } + + socket.on(Events.WORKSPACE_UPDATE_FEEDBACK_EVENT, getWorkspace); + + return () => { + socket.off(Events.WORKSPACE_UPDATE_FEEDBACK_EVENT); + }; + }, [connected, getWorkspace]); const update = (newId: string, newLabel: string, newType: FileTypes) => { setFileId(newId); @@ -112,6 +152,8 @@ export const WorkspaceContextProvider: React.FC = ({ children }) => { update, fileHistory, remove, + fileTreeViewItems, + fileTreeViewIsLoading, }; return {children}; From ec753568278cd7132fbe9357f50452a0a8fb89b2 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Sun, 1 Sep 2024 02:20:11 +0300 Subject: [PATCH 18/72] MDE/PKFE-19 implemented function for checking if specific file exists --- .../src/features/editor/utils/helpers.ts | 17 +++++++++++++++++ .../src/features/editor/utils/index.ts | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/app/front-end/src/features/editor/utils/helpers.ts b/app/front-end/src/features/editor/utils/helpers.ts index ed53cf5..790c150 100644 --- a/app/front-end/src/features/editor/utils/helpers.ts +++ b/app/front-end/src/features/editor/utils/helpers.ts @@ -1,5 +1,7 @@ import { FileTypes } from '@/types/enums'; import { Article as ArticleIcon, FolderRounded, InsertDriveFile as InsertDriveFileIcon } from '@mui/icons-material'; +import { TreeViewBaseItem } from '@mui/x-tree-view'; +import { FileTreeViewItemProps } from '../types'; export const isExpandable = (reactChildren: React.ReactNode) => { if (Array.isArray(reactChildren)) { @@ -22,3 +24,18 @@ export const getIconFromFileType = (fileType: FileTypes) => { return InsertDriveFileIcon; } }; + +export const doesFileExist = (fileTreeView: TreeViewBaseItem[], path: string): boolean => { + for (const item of fileTreeView) { + if (item.id === path) { + return true; + } + + if (path.startsWith(item.id) && item.children) { + if (doesFileExist(item.children, path)) { + return true; + } + } + } + return false; +} \ No newline at end of file diff --git a/app/front-end/src/features/editor/utils/index.ts b/app/front-end/src/features/editor/utils/index.ts index c2774e5..18e5ab4 100644 --- a/app/front-end/src/features/editor/utils/index.ts +++ b/app/front-end/src/features/editor/utils/index.ts @@ -1 +1 @@ -export { getIconFromFileType, isExpandable } from './helpers'; +export { doesFileExist, getIconFromFileType, isExpandable } from './helpers'; From 7038a06932de984101d788ae4bdcc9370824738a Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Sun, 1 Sep 2024 02:25:08 +0300 Subject: [PATCH 19/72] MDE/PKFE-19 updated context menu textfield dialog for additional checks and removed ability to specify custom file extensions --- .../fileTreeItem/fileTreeItemContextMenu.tsx | 45 ++-- ...fileTreeItemContextMenuTextfieldDialog.tsx | 220 +++++++++++++++--- .../types/fileTreeItemContextMenuActions.tsx | 8 + .../src/features/editor/types/index.ts | 1 + 4 files changed, 212 insertions(+), 62 deletions(-) create mode 100644 app/front-end/src/features/editor/types/fileTreeItemContextMenuActions.tsx diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenu.tsx b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenu.tsx index 918a29e..47eedea 100644 --- a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenu.tsx +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenu.tsx @@ -3,7 +3,7 @@ import { FileTreeItemContextMenuTextfieldDialog, } from '@/features/editor/components/fileTreeView/fileTreeItem'; import { useWorkspaceContext } from '@/features/editor/hooks'; -import { FileTreeViewItemProps } from '@/features/editor/types'; +import { FileTreeItemContextMenuActions, FileTreeViewItemProps } from '@/features/editor/types'; import { axios } from '@/lib'; import { Endpoints, FileTypes } from '@/types'; import { Divider, Menu, MenuItem } from '@mui/material'; @@ -60,7 +60,7 @@ export const FileTreeItemContextMenu: React.FC = ( const [deleteDialogOpen, setDeleteDialogOpen] = useState(false); const menuItems = []; - if (item.fileType === undefined) { + if (item.fileType === undefined || item.fileType === FileTypes.FOLDER) { menuItems.push( handleActionContextMenu('newFile')}> New file... @@ -74,29 +74,19 @@ export const FileTreeItemContextMenu: React.FC = ( ); } else { - if (item.fileType === FileTypes.FOLDER || item.fileType === undefined) { - menuItems.push( - handleActionContextMenu('newFile')}> - New file... - , - handleActionContextMenu('newFolder')}> - New folder... - , - , - handleActionContextMenu('import')} disabled> - Import... - , - - ); - } else { - menuItems.push( - handleActionContextMenu('export')} disabled> - Export... - , - - ); - } + menuItems.push( + handleActionContextMenu('export')} disabled> + Export... + , + + ); + } + if (item.fileType === FileTypes.FOLDER) { + menuItems.push(); + } + + if (item.fileType !== undefined) { menuItems.push( handleActionContextMenu('rename')}> Rename... @@ -176,20 +166,25 @@ export const FileTreeItemContextMenu: React.FC = ( setNewFileDialogOpen(false)} onSave={handleNewFileSave} /> setNewFolderDialogOpen(false)} onSave={handleNewFolderSave} /> = ( /> setDeleteDialogOpen(false)} onConfirm={handleDeleteConfirm} diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuTextfieldDialog.tsx b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuTextfieldDialog.tsx index d01e2a7..0d200e8 100644 --- a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuTextfieldDialog.tsx +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuTextfieldDialog.tsx @@ -1,14 +1,21 @@ import { FileTreeItemContextMenuStyledDialog } from '@/features/editor/components/fileTreeView/fileTreeItem'; -import { FileTreeViewItemProps } from '@/features/editor/types'; +import { useWorkspaceContext } from '@/features/editor/hooks'; +import { FileTreeItemContextMenuActions, FileTreeViewItemProps } from '@/features/editor/types'; +import { doesFileExist } from '@/features/editor/utils'; import { FileTypes } from '@/types'; import { Close as CloseIcon } from '@mui/icons-material'; import { + Box, Button, DialogActions, DialogContent, DialogTitle, + FormControl, Grid, IconButton, + MenuItem, + Select, + SelectChangeEvent, TextField, Typography, useTheme, @@ -17,37 +24,40 @@ import { useEffect, useState } from 'react'; export interface FileTreeItemContextMenuTextfieldDialogProps { open: boolean; + action: FileTreeItemContextMenuActions; title: string; label: string; - item?: FileTreeViewItemProps; + item: FileTreeViewItemProps; onClose: () => void; onSave: (label: string) => void; } /** - * `FileTreeItemContextMenuTextfieldDialog` component provides a dialog for editing the label of a file tree item. + * `FileTreeItemContextMenuTextfieldDialog` is a dialog component that allows users to input or edit the label of a file tree item. * - * @description This component displays a modal dialog for users to input or edit a label for a file or folder in the file tree. - * It includes validation to ensure the input is not empty and does not exceed 50 characters. The dialog features a title - * and a text field for input, with a save and cancel button. The dialog is styled with the application's theme for a consistent - * look and feel. + * @description This component provides a modal dialog for editing or creating a file or folder label within a file tree view. + * It supports different actions, such as renaming an existing file or folder, or creating a new file or folder. The dialog includes + * input validation to ensure the new label is valid, such as not being empty, not exceeding 50 characters, and not containing + * forbidden characters. For file creation, it allows selecting a file extension. * * @component * * @param {FileTreeItemContextMenuTextfieldDialogProps} props - The props for the component. - * @param {boolean} props.open - A boolean indicating whether the dialog is open or closed. - * @param {string} props.title - The title of the dialog. - * @param {string} props.label - The label for the text field. - * @param {FileTreeViewItemProps} [props.item] - Optional file tree item object that includes the current label and file type. + * @param {boolean} props.open - A boolean that determines whether the dialog is visible or hidden. + * @param {FileTreeItemContextMenuActions} props.action - The action to be performed, such as renaming or creating a new file. + * @param {string} props.title - The title displayed at the top of the dialog. + * @param {string} props.label - The label for the input field. + * @param {FileTreeViewItemProps} props.item - The file tree item object being edited, containing information such as the current label and file type. * @param {() => void} props.onClose - Callback function to be called when the dialog is closed. - * @param {(label: string) => void} props.onSave - Callback function to be called when the user saves the input. + * @param {(label: string) => void} props.onSave - Callback function to be called when the user saves the new label. * * @example * // Example usage of the FileTreeItemContextMenuTextfieldDialog component * setDialogOpen(false)} * onSave={(newLabel) => console.log('New label:', newLabel)} @@ -57,6 +67,7 @@ export interface FileTreeItemContextMenuTextfieldDialogProps { */ export const FileTreeItemContextMenuTextfieldDialog: React.FC = ({ open, + action, title, label, item, @@ -64,45 +75,161 @@ export const FileTreeItemContextMenuTextfieldDialog: React.FC { const Theme = useTheme(); - const [value, setValue] = useState(item?.label || ''); + const { fileTreeViewItems } = useWorkspaceContext(); + + const [value, setValue] = useState(() => { + switch (action) { + case FileTreeItemContextMenuActions.RENAME: + if (item.fileType !== FileTypes.FOLDER) return item.label.match(/^(.*)(?=\.[^.]+$)/)?.[0] || ''; + return item.label; + default: + return ''; + } + }); + + const [fileExtension, setFileExtension] = useState(() => { + switch (action) { + case FileTreeItemContextMenuActions.NEW_FILE: + return FileTypes.CSV; + case FileTreeItemContextMenuActions.NEW_FOLDER: + return ''; + case FileTreeItemContextMenuActions.RENAME: + if (item.fileType !== FileTypes.FOLDER) return item.label.match(/[^.]+$/)?.[0] || FileTypes.CSV; + return ''; + default: + return ''; + } + }); const [error, setError] = useState(null); useEffect(() => { if (open) { - setValue(item?.label || ''); + setValue(() => { + switch (action) { + case FileTreeItemContextMenuActions.RENAME: + if (item.fileType !== FileTypes.FOLDER) return item.label.match(/^(.*)(?=\.[^.]+$)/)?.[0] || ''; + return item.label; + default: + return ''; + } + }); + setFileExtension(() => { + switch (action) { + case FileTreeItemContextMenuActions.NEW_FILE: + return FileTypes.CSV; + case FileTreeItemContextMenuActions.NEW_FOLDER: + return ''; + case FileTreeItemContextMenuActions.RENAME: + if (item.fileType !== FileTypes.FOLDER) return item.label.match(/[^.]+$/)?.[0] || FileTypes.CSV; + return ''; + default: + return ''; + } + }); setError(null); } - }, [open, item?.label]); + }, [open, item, action]); + + const validateInput = (input: string, fileExtension: string) => { + const parentPath = item.id.match(/^(.*)(?=\/[^/]*$)/)?.[0] || ''; + const path = () => { + if (action === FileTreeItemContextMenuActions.RENAME) { + if (parentPath === '') return input; + return parentPath + '/' + input; + } else { + if (item.id === '') return input; + return item.id + '/' + input; + } + }; - const validateInput = (input: string) => { - if (!input.trim()) { + // Check if file already exists + switch (action) { + case FileTreeItemContextMenuActions.NEW_FILE: + if (doesFileExist(fileTreeViewItems || [], path() + '.' + fileExtension)) return 'This name already exists'; + break; + case FileTreeItemContextMenuActions.NEW_FOLDER: + if (doesFileExist(fileTreeViewItems || [], path())) return 'This name already exists'; + break; + case FileTreeItemContextMenuActions.RENAME: + if (fileExtension === '') { + if (doesFileExist(fileTreeViewItems || [], path())) return 'This name already exists'; + } else { + if (doesFileExist(fileTreeViewItems || [], path() + '.' + fileExtension)) return 'This name already exists'; + } + break; + default: + break; + } + + // Check if the input is empty + if (!input) { return 'Input cannot be empty'; } + // Check if the input exceeds the length limit if (input.length > 50) { return 'Input must be less than 50 characters'; } + // Check for forbidden characters + if (input.includes('\0')) { + return 'Input contains a forbidden null character'; + } + + if (input.includes('/')) { + return 'Input cannot contain a forward slash (/)'; + } + + if (/[*?[\]]/.test(input)) { + return 'Input contains forbidden glob characters (*, ?, [, ])'; + } + + // Check for special filename cases ('.' and '..') + if (input === '.' || input === '..') { + return 'Input cannot be "." or ".."'; + } + + // Check for leading or trailing dots + if (input.startsWith('.') || input.endsWith('.')) { + return 'Input cannot start or end with a dot'; + } + return null; // No error }; const handleSave = () => { - const validationError = validateInput(value); + const trimmedValue = value.trim(); + const validationError = validateInput(trimmedValue, fileExtension); if (validationError) { setError(validationError); } else { - onSave(value); + switch (action) { + case FileTreeItemContextMenuActions.NEW_FILE: + onSave(trimmedValue + '.' + fileExtension); + break; + case FileTreeItemContextMenuActions.NEW_FOLDER: + onSave(trimmedValue); + break; + case FileTreeItemContextMenuActions.RENAME: + if (fileExtension !== '') onSave(trimmedValue + '.' + fileExtension); + else onSave(trimmedValue); + break; + default: + break; + } } }; + const handleFileExtension = (event: SelectChangeEvent) => { + setFileExtension(event.target.value as FileTypes); + }; + return ( - - {title} {item ? (item.fileType === FileTypes.FOLDER ? 'Folder' : 'File') : ''} - + {title} - setValue(event.target.value)} - error={Boolean(error)} - helperText={error} - sx={{ - ':hover': { borderColor: Theme.palette.primary.main }, - backgroundColor: Theme.palette.background.paper, - justifyItems: 'center', - }} - /> + + setValue(event.target.value)} + error={Boolean(error)} + helperText={error} + sx={{ + ':hover': { borderColor: Theme.palette.primary.main }, + backgroundColor: Theme.palette.background.paper, + justifyItems: 'center', + }} + /> + {(action === FileTreeItemContextMenuActions.NEW_FILE || + (action === FileTreeItemContextMenuActions.RENAME && item.fileType !== FileTypes.FOLDER)) && ( + <> + + + + + )} + + + {filename === '' ? ( + 'No file selected' + ) : ( + <> + Selected file: {filename} + + )} + + + + + + + + + ); +}; diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/index.ts b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/index.ts index d7190c2..79fb0e7 100644 --- a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/index.ts +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/index.ts @@ -3,6 +3,7 @@ export { FileTreeItemContextMenu } from './fileTreeItemContextMenu'; export type { FileTreeItemContextMenuProps } from './fileTreeItemContextMenu'; export { FileTreeItemContextMenuConfirmationDialog } from './fileTreeItemContextMenuConfirmationDialog'; export type { FileTreeItemContextMenuConfirmationDialogProps } from './fileTreeItemContextMenuConfirmationDialog'; +export { FileTreeItemContextMenuFileImportDialog } from './fileTreeItemContextMenuFileImportDialog'; export { FileTreeItemContextMenuStyledDialog } from './fileTreeItemContextMenuStyledDialog'; export { FileTreeItemContextMenuTextfieldDialog } from './fileTreeItemContextMenuTextfieldDialog'; export type { FileTreeItemContextMenuTextfieldDialogProps } from './fileTreeItemContextMenuTextfieldDialog'; diff --git a/app/front-end/src/stores/themeContextProvider.tsx b/app/front-end/src/stores/themeContextProvider.tsx index e265ac0..dc7770b 100644 --- a/app/front-end/src/stores/themeContextProvider.tsx +++ b/app/front-end/src/stores/themeContextProvider.tsx @@ -138,6 +138,14 @@ export const ThemeContextProvider: React.FC = ({ children }) => { }, }, }, + MuiButton: { + styleOverrides: { + root: { + fontFamily: 'Nunito', + textTransform: 'none', + }, + }, + }, }, }), [mode] From ac6b060064f0f66d090f3f4bd878bd691abc68e6 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Wed, 4 Sep 2024 02:41:25 +0300 Subject: [PATCH 36/72] MDE/PKFE-16 resolved back-end bug --- app/back-end/src/routes/workspace_aggregate_route.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/app/back-end/src/routes/workspace_aggregate_route.py b/app/back-end/src/routes/workspace_aggregate_route.py index af76e92..f60b8bd 100644 --- a/app/back-end/src/routes/workspace_aggregate_route.py +++ b/app/back-end/src/routes/workspace_aggregate_route.py @@ -252,11 +252,9 @@ def get_workspace_aggregate(relative_path): ), 404, ) - - header_index = header.index(field) - + for row in reader: - value = row[header_index] + value = row[header.index(field)] if action == "cnt": if value: result += float(1) From cf50ea3c86e6e73147a293ea5bcfa413ea466d15 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Wed, 4 Sep 2024 21:35:39 +0300 Subject: [PATCH 37/72] MDE/PKFE-16 final bug fixes --- .../src/routes/workspace_aggregate_route.py | 17 ++++++++++++++--- .../editor/components/editorView/editorView.tsx | 2 +- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/app/back-end/src/routes/workspace_aggregate_route.py b/app/back-end/src/routes/workspace_aggregate_route.py index f60b8bd..87a245d 100644 --- a/app/back-end/src/routes/workspace_aggregate_route.py +++ b/app/back-end/src/routes/workspace_aggregate_route.py @@ -64,8 +64,14 @@ def get_workspace_aggregate_all(relative_path): if header: for row in reader: for field in columns_aggregation.keys(): + + header_index = header.index(field) + if header_index >= len(row): + skipped_counts[field] += 1 + continue + action = header_actions[field] - value = row[header.index(field)] + value = row[header_index] if action == "cnt": if value: header_values[field] += float(1) @@ -252,9 +258,14 @@ def get_workspace_aggregate(relative_path): ), 404, ) - + for row in reader: - value = row[header.index(field)] + header_index = header.index(field) + if header_index >= len(row): + skipped_count += 1 + continue + + value = row[header_index] if action == "cnt": if value: result += float(1) diff --git a/app/front-end/src/features/editor/components/editorView/editorView.tsx b/app/front-end/src/features/editor/components/editorView/editorView.tsx index 1ef85b5..1505885 100644 --- a/app/front-end/src/features/editor/components/editorView/editorView.tsx +++ b/app/front-end/src/features/editor/components/editorView/editorView.tsx @@ -115,7 +115,7 @@ export const EditorView: React.FC = () => { const getWorkspaceFile = useCallback(async () => { if (!file.id) { - fileStateReset(); + setFileContentResponse({ totalRows: 0, header: [], rows: [], page: 0 }); return; } From 2dcb1e5d05fdc5c8564a1922dde10cfae71e7190 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Thu, 5 Sep 2024 00:54:38 +0300 Subject: [PATCH 38/72] MDE/PKFE-32 implemented status context provider --- app/front-end/src/app/provider.tsx | 6 ++- app/front-end/src/hooks/index.ts | 1 + app/front-end/src/hooks/useStatusContext.ts | 4 ++ app/front-end/src/stores/index.ts | 2 + .../src/stores/statusContextProvider.tsx | 42 +++++++++++++++++++ 5 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 app/front-end/src/hooks/useStatusContext.ts create mode 100644 app/front-end/src/stores/statusContextProvider.tsx diff --git a/app/front-end/src/app/provider.tsx b/app/front-end/src/app/provider.tsx index 31c7e84..40e5c3a 100644 --- a/app/front-end/src/app/provider.tsx +++ b/app/front-end/src/app/provider.tsx @@ -1,4 +1,4 @@ -import { SessionContextProvider, ThemeContextProvider } from '@/stores'; +import { SessionContextProvider, StatusContextProvider, ThemeContextProvider } from '@/stores'; import { CircularProgress } from '@mui/material'; import React from 'react'; @@ -40,7 +40,9 @@ export const AppProvider = ({ children }: AppProviderProps) => { fallback={} > - {children} + + {children} + ); diff --git a/app/front-end/src/hooks/index.ts b/app/front-end/src/hooks/index.ts index 978411d..9bbe3c9 100644 --- a/app/front-end/src/hooks/index.ts +++ b/app/front-end/src/hooks/index.ts @@ -1,2 +1,3 @@ export { useSessionContext } from './useSessionContext'; +export { useStatusContext } from './useStatusContext'; export { useThemeContext } from './useThemeContext'; diff --git a/app/front-end/src/hooks/useStatusContext.ts b/app/front-end/src/hooks/useStatusContext.ts new file mode 100644 index 0000000..ccae9c5 --- /dev/null +++ b/app/front-end/src/hooks/useStatusContext.ts @@ -0,0 +1,4 @@ +import { StatusContext } from '@/stores'; +import { useContext } from 'react'; + +export const useStatusContext = () => useContext(StatusContext); diff --git a/app/front-end/src/stores/index.ts b/app/front-end/src/stores/index.ts index 59d0790..0d28f71 100644 --- a/app/front-end/src/stores/index.ts +++ b/app/front-end/src/stores/index.ts @@ -1,4 +1,6 @@ export { SessionContext, SessionContextProvider } from './sessionContextProvider'; export type { SessionContextProps } from './sessionContextProvider'; +export { StatusContext, StatusContextProvider } from './statusContextProvider'; +export type { StatusContextProps } from './statusContextProvider'; export { ThemeContext, ThemeContextProvider } from './themeContextProvider'; export type { ThemeContextProps } from './themeContextProvider'; diff --git a/app/front-end/src/stores/statusContextProvider.tsx b/app/front-end/src/stores/statusContextProvider.tsx new file mode 100644 index 0000000..02c4607 --- /dev/null +++ b/app/front-end/src/stores/statusContextProvider.tsx @@ -0,0 +1,42 @@ +import { useSessionContext } from '@/hooks'; +import React, { createContext, useEffect, useState } from 'react'; + +export interface StatusContextProps { + blocked: boolean; + blockedStateUpdate: (blocked: boolean) => void; +} + +export const StatusContext = createContext({ + blocked: false, + blockedStateUpdate: () => {}, +}); + +interface Props { + children?: React.ReactNode; +} + +export const StatusContextProvider: React.FC = ({ children }) => { + const [blocked, setBlocked] = useState(false); + + const blockedStateUpdate = (blocked: boolean) => { + setBlocked(blocked); + }; + + const { connected } = useSessionContext(); + + useEffect(() => { + if (!connected) { + setBlocked(true); + return; + } + + setBlocked(false); + }, [connected]); + + const StatusContextValue: StatusContextProps = { + blocked, + blockedStateUpdate, + }; + + return {children}; +}; From d3af32aa47bfbf08cf25a2022339fde0f1f7db39 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Thu, 5 Sep 2024 00:56:38 +0300 Subject: [PATCH 39/72] MDE/PKFE-32 restructured toolbar button data --- .../toolbarGroupButtons/applyGroupButtons.tsx | 26 ---- .../downloadGroupButtons.tsx | 36 ----- .../toolbarView/toolbarGroupButtons/index.ts | 3 - .../toolbarGroupButtons/mergeGroupButtons.tsx | 26 ---- .../toolbarView/toolbarGroupItem.tsx | 4 +- .../components/toolbarView/toolbarView.tsx | 135 ++++++++++++++++-- 6 files changed, 124 insertions(+), 106 deletions(-) delete mode 100644 app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/applyGroupButtons.tsx delete mode 100644 app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/downloadGroupButtons.tsx delete mode 100644 app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/index.ts delete mode 100644 app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/mergeGroupButtons.tsx diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/applyGroupButtons.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/applyGroupButtons.tsx deleted file mode 100644 index 3044051..0000000 --- a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/applyGroupButtons.tsx +++ /dev/null @@ -1,26 +0,0 @@ -import { Deblur as DeblurIcon } from '@mui/icons-material'; - -import { ToolbarGroupItemProps } from '@/features/editor/components/toolbarView'; - -const applySpliceAiClick = () => { - console.log('Clicked Apply SpliceAI Button!'); -}; - -const applyCaddClick = () => { - console.log('Clicked Apply CADD Button!'); -}; - -export const ApplyGroupButtons: ToolbarGroupItemProps[] = [ - { - group: 'apply', - icon: DeblurIcon, - label: 'Apply SpliceAI', - onClick: applySpliceAiClick, - }, - { - group: 'apply', - icon: DeblurIcon, - label: 'Apply CADD', - onClick: applyCaddClick, - }, -]; diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/downloadGroupButtons.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/downloadGroupButtons.tsx deleted file mode 100644 index febfd84..0000000 --- a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/downloadGroupButtons.tsx +++ /dev/null @@ -1,36 +0,0 @@ -import { ToolbarGroupItemProps } from '@/features/editor/components/toolbarView'; - -import { Download as DownloadIcon } from '@mui/icons-material'; - -const handleDownloadLovdClick = () => { - console.log('Clicked Download Lovd Button!'); -}; - -const handleDownloadClinvarClick = () => { - console.log('Clicked Download Clinvar Button!'); -}; - -const handleDownloadGnomadClick = () => { - console.log('Clicked Download Gnomad Button!'); -}; - -export const DownloadGroupButtons: ToolbarGroupItemProps[] = [ - { - group: 'download', - icon: DownloadIcon, - label: 'LOVD', - onClick: handleDownloadLovdClick, - }, - { - group: 'download', - icon: DownloadIcon, - label: 'ClinVar', - onClick: handleDownloadClinvarClick, - }, - { - group: 'download', - icon: DownloadIcon, - label: 'gnomAD', - onClick: handleDownloadGnomadClick, - }, -]; diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/index.ts b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/index.ts deleted file mode 100644 index 243e3af..0000000 --- a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/index.ts +++ /dev/null @@ -1,3 +0,0 @@ -export { ApplyGroupButtons } from './applyGroupButtons'; -export { DownloadGroupButtons } from './downloadGroupButtons'; -export { MergeGroupButtons } from './mergeGroupButtons'; diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/mergeGroupButtons.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/mergeGroupButtons.tsx deleted file mode 100644 index 73d2271..0000000 --- a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/mergeGroupButtons.tsx +++ /dev/null @@ -1,26 +0,0 @@ -import { MergeType as MergeTypeIcon } from '@mui/icons-material'; - -import { ToolbarGroupItemProps } from '@/features/editor/components/toolbarView'; - -const mergeLovdAndGnomadClick = () => { - console.log('Clicked Merge LOVD & gnomAD Button!'); -}; - -const mergeLovdAndClinvarClick = () => { - console.log('Clicked Merge LOVD & ClinVar Button!'); -}; - -export const MergeGroupButtons: ToolbarGroupItemProps[] = [ - { - group: 'merge', - icon: MergeTypeIcon, - label: 'Merge LOVD & gnomAD', - onClick: mergeLovdAndGnomadClick, - }, - { - group: 'merge', - icon: MergeTypeIcon, - label: 'Merge LOVD & ClinVar', - onClick: mergeLovdAndClinvarClick, - }, -]; diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupItem.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupItem.tsx index 707773b..789a501 100644 --- a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupItem.tsx +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupItem.tsx @@ -5,6 +5,7 @@ export interface ToolbarGroupItemProps { group: string; icon: SvgIconComponent; label: string; + disabled: boolean; onClick: () => void; } @@ -34,13 +35,14 @@ export interface ToolbarGroupItemProps { * @param {Function} onClick - The function to be called when the button is clicked. * @returns {JSX.Element} The rendered Button component with an icon and label. */ -export const ToolbarGroupItem: React.FC = ({ icon: Icon, label, onClick }) => { +export const ToolbarGroupItem: React.FC = ({ icon: Icon, label, disabled, onClick }) => { const Theme = useTheme(); return ( = (file) => { const Theme = useTheme(); const Workspace = useWorkspaceContext(); + const { blocked } = useStatusContext(); const { id, label } = file; @@ -39,9 +41,13 @@ export const FilebarGroupItem: React.FC = (file) => { borderRadius: '0rem', ':hover': { backgroundColor: - Workspace.file.id === id ? Theme.palette.background.default : alpha(Theme.palette.background.default, 0.5), + Workspace.file.id === id + ? Theme.palette.background.default + : blocked + ? Theme.palette.action.selected + : alpha(Theme.palette.background.default, 0.5), }, - cursor: 'pointer', + cursor: blocked ? 'default' : 'pointer', display: 'flex', flexDirection: 'row', alignItems: 'center', @@ -49,8 +55,10 @@ export const FilebarGroupItem: React.FC = (file) => { }} onClick={() => { // Update the workspace to the selected file - Workspace.fileStateUpdate(file); - Workspace.filesHistoryStateUpdate(file); + if (!blocked) { + Workspace.fileStateUpdate(file); + Workspace.filesHistoryStateUpdate(file); + } }} > = (file) => { { event.stopPropagation(); }} diff --git a/app/front-end/src/features/editor/stores/workspaceContextProvider.tsx b/app/front-end/src/features/editor/stores/workspaceContextProvider.tsx index 1eecacf..7a87392 100644 --- a/app/front-end/src/features/editor/stores/workspaceContextProvider.tsx +++ b/app/front-end/src/features/editor/stores/workspaceContextProvider.tsx @@ -1,5 +1,5 @@ import { FileContentModel, FileModel, FilePaginationModel, FileTypes } from '@/features/editor/types'; -import { useSessionContext } from '@/hooks'; +import { useSessionContext, useStatusContext } from '@/hooks'; import { axios, socket } from '@/lib'; import { Endpoints, Events } from '@/types'; import { TreeViewBaseItem } from '@mui/x-tree-view'; @@ -149,8 +149,11 @@ export const WorkspaceContextProvider: React.FC = ({ children }) => { const [fileTreeIsLoading, setFileTreeIsLoading] = useState(true); const [fileTree, setFileTree] = useState[]>([]); + const { blockedStateUpdate } = useStatusContext(); + const getWorkspace = useCallback(async () => { setFileTreeIsLoading(true); + blockedStateUpdate(true); try { const response = await axios.get(Endpoints.WORKSPACE); setFileTree(response.data); @@ -158,6 +161,7 @@ export const WorkspaceContextProvider: React.FC = ({ children }) => { console.error('Failed to fetch workspace data:', error); } finally { setFileTreeIsLoading(false); + blockedStateUpdate(false); } }, []); From 8d12237ac793227faa2bb84aab32f17443549e61 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Thu, 5 Sep 2024 01:04:43 +0300 Subject: [PATCH 41/72] MDE/PKFE-32 bug-fix --- app/back-end/src/routes/workspace_aggregate_route.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/app/back-end/src/routes/workspace_aggregate_route.py b/app/back-end/src/routes/workspace_aggregate_route.py index 87a245d..4db5cf6 100644 --- a/app/back-end/src/routes/workspace_aggregate_route.py +++ b/app/back-end/src/routes/workspace_aggregate_route.py @@ -104,11 +104,7 @@ def get_workspace_aggregate_all(relative_path): or header_values[field] == float("-inf") or ( header_values[field] == float(0) - and ( - header_actions[field] != "min" - or header_actions[field] != "max" - or header_actions[field] != "cnt" - ) + and header_actions[field] not in ["min", "max", "cnt"] ) else ( str(int(header_values[field])) From 1fb82da040b65b3a9ea2ad0fa13ede94028113e7 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Thu, 5 Sep 2024 16:12:42 +0300 Subject: [PATCH 42/72] MDE/PKFE-32 changes to toolbar --- .../toolbarGroupButtons/applyGroupButtons.tsx | 26 ++++ .../downloadGroupButtons.tsx | 36 ++++++ .../toolbarView/toolbarGroupButtons/index.ts | 3 + .../toolbarGroupButtons/mergeGroupButtons.tsx | 26 ++++ .../toolbarView/toolbarGroupItem.tsx | 7 +- .../components/toolbarView/toolbarView.tsx | 116 +----------------- 6 files changed, 100 insertions(+), 114 deletions(-) create mode 100644 app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/applyGroupButtons.tsx create mode 100644 app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/downloadGroupButtons.tsx create mode 100644 app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/index.ts create mode 100644 app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/mergeGroupButtons.tsx diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/applyGroupButtons.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/applyGroupButtons.tsx new file mode 100644 index 0000000..3044051 --- /dev/null +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/applyGroupButtons.tsx @@ -0,0 +1,26 @@ +import { Deblur as DeblurIcon } from '@mui/icons-material'; + +import { ToolbarGroupItemProps } from '@/features/editor/components/toolbarView'; + +const applySpliceAiClick = () => { + console.log('Clicked Apply SpliceAI Button!'); +}; + +const applyCaddClick = () => { + console.log('Clicked Apply CADD Button!'); +}; + +export const ApplyGroupButtons: ToolbarGroupItemProps[] = [ + { + group: 'apply', + icon: DeblurIcon, + label: 'Apply SpliceAI', + onClick: applySpliceAiClick, + }, + { + group: 'apply', + icon: DeblurIcon, + label: 'Apply CADD', + onClick: applyCaddClick, + }, +]; diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/downloadGroupButtons.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/downloadGroupButtons.tsx new file mode 100644 index 0000000..febfd84 --- /dev/null +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/downloadGroupButtons.tsx @@ -0,0 +1,36 @@ +import { ToolbarGroupItemProps } from '@/features/editor/components/toolbarView'; + +import { Download as DownloadIcon } from '@mui/icons-material'; + +const handleDownloadLovdClick = () => { + console.log('Clicked Download Lovd Button!'); +}; + +const handleDownloadClinvarClick = () => { + console.log('Clicked Download Clinvar Button!'); +}; + +const handleDownloadGnomadClick = () => { + console.log('Clicked Download Gnomad Button!'); +}; + +export const DownloadGroupButtons: ToolbarGroupItemProps[] = [ + { + group: 'download', + icon: DownloadIcon, + label: 'LOVD', + onClick: handleDownloadLovdClick, + }, + { + group: 'download', + icon: DownloadIcon, + label: 'ClinVar', + onClick: handleDownloadClinvarClick, + }, + { + group: 'download', + icon: DownloadIcon, + label: 'gnomAD', + onClick: handleDownloadGnomadClick, + }, +]; diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/index.ts b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/index.ts new file mode 100644 index 0000000..243e3af --- /dev/null +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/index.ts @@ -0,0 +1,3 @@ +export { ApplyGroupButtons } from './applyGroupButtons'; +export { DownloadGroupButtons } from './downloadGroupButtons'; +export { MergeGroupButtons } from './mergeGroupButtons'; diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/mergeGroupButtons.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/mergeGroupButtons.tsx new file mode 100644 index 0000000..73d2271 --- /dev/null +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/mergeGroupButtons.tsx @@ -0,0 +1,26 @@ +import { MergeType as MergeTypeIcon } from '@mui/icons-material'; + +import { ToolbarGroupItemProps } from '@/features/editor/components/toolbarView'; + +const mergeLovdAndGnomadClick = () => { + console.log('Clicked Merge LOVD & gnomAD Button!'); +}; + +const mergeLovdAndClinvarClick = () => { + console.log('Clicked Merge LOVD & ClinVar Button!'); +}; + +export const MergeGroupButtons: ToolbarGroupItemProps[] = [ + { + group: 'merge', + icon: MergeTypeIcon, + label: 'Merge LOVD & gnomAD', + onClick: mergeLovdAndGnomadClick, + }, + { + group: 'merge', + icon: MergeTypeIcon, + label: 'Merge LOVD & ClinVar', + onClick: mergeLovdAndClinvarClick, + }, +]; diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupItem.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupItem.tsx index 789a501..8a43dd8 100644 --- a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupItem.tsx +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupItem.tsx @@ -1,3 +1,4 @@ +import { useStatusContext } from '@/hooks'; import { SvgIconComponent } from '@mui/icons-material'; import { alpha, Box, Button, useTheme } from '@mui/material'; @@ -5,7 +6,6 @@ export interface ToolbarGroupItemProps { group: string; icon: SvgIconComponent; label: string; - disabled: boolean; onClick: () => void; } @@ -35,14 +35,15 @@ export interface ToolbarGroupItemProps { * @param {Function} onClick - The function to be called when the button is clicked. * @returns {JSX.Element} The rendered Button component with an icon and label. */ -export const ToolbarGroupItem: React.FC = ({ icon: Icon, label, disabled, onClick }) => { +export const ToolbarGroupItem: React.FC = ({ icon: Icon, label, onClick }) => { const Theme = useTheme(); + const { blocked } = useStatusContext(); return ( - - {filename === '' ? ( - 'No file selected' - ) : ( + + {filename !== '' && ( <> - Selected file: {filename} + Selected file: "{filename}" )} + {!isIncorrectFileType && newInfoFileName !== '' && ( + + File will be saved as: "{newInfoFileName}". + + )} + {isIncorrectFileType && ( + + Incorrect file extension! +
Accepted file extensions: '.csv', '.txt' +
+ )} diff --git a/app/front-end/src/features/editor/utils/helpers.tsx b/app/front-end/src/features/editor/utils/helpers.tsx index 11c55d7..2dea027 100644 --- a/app/front-end/src/features/editor/utils/helpers.tsx +++ b/app/front-end/src/features/editor/utils/helpers.tsx @@ -39,3 +39,30 @@ export const doesFileExist = (fileTreeView: TreeViewBaseItem[], path: string): string => { + const dotIndex = path.lastIndexOf('.'); + const filePath = path.substring(0, dotIndex); + const fileExtension = path.substring(dotIndex + 1); + + let newFilePath = filePath; + let newFullPath = `${newFilePath}.${fileExtension}`; + + let i = 1; + + while (doesFileExist(fileTreeView, newFullPath)) { + newFilePath = `${filePath} (${i})`; + newFullPath = `${newFilePath}.${fileExtension}`; + i++; + } + + const lastSlashIndex = newFullPath.lastIndexOf('/'); + const newFileName = newFullPath.substring(lastSlashIndex + 1); + + return newFileName; +}; + +export const getFileExtension = (filename: string): string => { + const dotIndex = filename.lastIndexOf('.'); + return dotIndex !== -1 ? filename.substring(dotIndex + 1).toLowerCase() : ''; +}; diff --git a/app/front-end/src/types/constants/endpoints.ts b/app/front-end/src/types/constants/endpoints.ts index 6805ff7..b1accc2 100644 --- a/app/front-end/src/types/constants/endpoints.ts +++ b/app/front-end/src/types/constants/endpoints.ts @@ -38,4 +38,5 @@ export const Endpoints = { WORKSPACE_RENAME: `/workspace/rename`, WORKSPACE_DELETE: `/workspace/delete`, WORKSPACE_AGGREGATE: `/workspace/aggregate`, + WORKSPACE_IMPORT: `/workspace/import`, }; From 84012bfbe47e2573887bb62a516a8f4c70c8b147 Mon Sep 17 00:00:00 2001 From: Justinas <156369263+justinnas@users.noreply.github.com> Date: Fri, 6 Sep 2024 19:27:36 +0300 Subject: [PATCH 44/72] Small bugfixes --- .../fileTreeItemContextMenuConfirmationDialog.tsx | 6 ++++-- .../fileTreeItemContextMenuFileImportDialog.tsx | 1 - 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuConfirmationDialog.tsx b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuConfirmationDialog.tsx index e4fd98c..2a7861b 100644 --- a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuConfirmationDialog.tsx +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuConfirmationDialog.tsx @@ -79,7 +79,7 @@ export const FileTreeItemContextMenuConfirmationDialog: React.FC - + {content.text} {content.boldText}? @@ -99,7 +99,9 @@ export const FileTreeItemContextMenuConfirmationDialog: React.FC - {action} + + {action.charAt(0).toUpperCase() + action.slice(1).toLowerCase()} + diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuFileImportDialog.tsx b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuFileImportDialog.tsx index b3c46ba..f837774 100644 --- a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuFileImportDialog.tsx +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuFileImportDialog.tsx @@ -66,7 +66,6 @@ export const FileTreeItemContextMenuFileImportDialog: React.FC Date: Fri, 6 Sep 2024 22:10:22 +0300 Subject: [PATCH 45/72] Small code fixes --- .../fileTreeItem/fileTreeItemContextMenuFileImportDialog.tsx | 3 +-- app/front-end/src/features/editor/utils/index.ts | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuFileImportDialog.tsx b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuFileImportDialog.tsx index f837774..1f3ada0 100644 --- a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuFileImportDialog.tsx +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuFileImportDialog.tsx @@ -1,8 +1,7 @@ import { FileTreeItemContextMenuStyledDialog } from '@/features/editor/components/fileTreeView/fileTreeItem'; import { useWorkspaceContext } from '@/features/editor/hooks'; import { FileTreeViewItemProps } from '@/features/editor/types'; -import { doesFileExist } from '@/features/editor/utils'; -import { findUniqueFileName, getFileExtension } from '@/features/editor/utils/helpers'; +import { doesFileExist, findUniqueFileName, getFileExtension } from '@/features/editor/utils'; import { axios } from '@/lib'; import { Endpoints } from '@/types'; import { Close as CloseIcon, UploadFile as UploadFileIcon } from '@mui/icons-material'; diff --git a/app/front-end/src/features/editor/utils/index.ts b/app/front-end/src/features/editor/utils/index.ts index 18e5ab4..76178c3 100644 --- a/app/front-end/src/features/editor/utils/index.ts +++ b/app/front-end/src/features/editor/utils/index.ts @@ -1 +1 @@ -export { doesFileExist, getIconFromFileType, isExpandable } from './helpers'; +export { doesFileExist, findUniqueFileName, getFileExtension, getIconFromFileType, isExpandable } from './helpers'; From 1893e27fcfd14adda7779d9cb87e446685313f2a Mon Sep 17 00:00:00 2001 From: Justinas <156369263+justinnas@users.noreply.github.com> Date: Fri, 6 Sep 2024 22:20:58 +0300 Subject: [PATCH 46/72] Additional small changes --- .../fileTreeItem/fileTreeItemContextMenuFileImportDialog.tsx | 5 +---- app/front-end/src/features/editor/utils/helpers.tsx | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuFileImportDialog.tsx b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuFileImportDialog.tsx index 1f3ada0..fd48aec 100644 --- a/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuFileImportDialog.tsx +++ b/app/front-end/src/features/editor/components/fileTreeView/fileTreeItem/fileTreeItemContextMenuFileImportDialog.tsx @@ -79,10 +79,7 @@ export const FileTreeItemContextMenuFileImportDialog: React.FC Date: Sun, 8 Sep 2024 00:37:11 +0300 Subject: [PATCH 47/72] MDE/PKFE-31 bug-fix --- .../src/features/editor/components/editorView/editorView.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/front-end/src/features/editor/components/editorView/editorView.tsx b/app/front-end/src/features/editor/components/editorView/editorView.tsx index edcd860..368ad4e 100644 --- a/app/front-end/src/features/editor/components/editorView/editorView.tsx +++ b/app/front-end/src/features/editor/components/editorView/editorView.tsx @@ -165,7 +165,7 @@ export const EditorView: React.FC = () => { const { totalRows, header, rows } = fileContentResponse; if (!header) { - fileStateUpdate(undefined, { columns: [], rows: [], aggregations: {} }, undefined); + fileStateUpdate(undefined, { columns: [], rows: [], aggregations: fileContent.aggregations }, undefined); return; } From be9dc76940bfb6300dc9420e67f1879f4bb75f68 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Sun, 8 Sep 2024 00:38:45 +0300 Subject: [PATCH 48/72] MDE/PKFE-31 updated workspace provider for file tree array --- .../stores/workspaceContextProvider.tsx | 10 +++++-- .../src/features/editor/utils/helpers.tsx | 26 ++++++++++++++++++- .../src/features/editor/utils/index.ts | 2 +- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/app/front-end/src/features/editor/stores/workspaceContextProvider.tsx b/app/front-end/src/features/editor/stores/workspaceContextProvider.tsx index 7a87392..16de5d8 100644 --- a/app/front-end/src/features/editor/stores/workspaceContextProvider.tsx +++ b/app/front-end/src/features/editor/stores/workspaceContextProvider.tsx @@ -1,4 +1,5 @@ import { FileContentModel, FileModel, FilePaginationModel, FileTypes } from '@/features/editor/types'; +import { getWorkspaceArray } from '@/features/editor/utils'; import { useSessionContext, useStatusContext } from '@/hooks'; import { axios, socket } from '@/lib'; import { Endpoints, Events } from '@/types'; @@ -20,6 +21,7 @@ export interface WorkspaceContextProps { // File tree state properties fileTreeIsLoading: boolean; fileTree: TreeViewBaseItem[]; + fileTreeArray: FileModel[]; } export const WorkspaceContext = createContext({ @@ -37,6 +39,7 @@ export const WorkspaceContext = createContext({ // File tree state defaults fileTreeIsLoading: true, fileTree: [], + fileTreeArray: [], }); interface Props { @@ -148,6 +151,7 @@ export const WorkspaceContextProvider: React.FC = ({ children }) => { // File tree state const [fileTreeIsLoading, setFileTreeIsLoading] = useState(true); const [fileTree, setFileTree] = useState[]>([]); + const [fileTreeArray, setFileTreeArray] = useState([]); const { blockedStateUpdate } = useStatusContext(); @@ -157,6 +161,7 @@ export const WorkspaceContextProvider: React.FC = ({ children }) => { try { const response = await axios.get(Endpoints.WORKSPACE); setFileTree(response.data); + setFileTreeArray(getWorkspaceArray(response.data)); } catch (error) { console.error('Failed to fetch workspace data:', error); } finally { @@ -192,8 +197,9 @@ export const WorkspaceContextProvider: React.FC = ({ children }) => { filesHistory, filesHistoryStateUpdate, - fileTreeIsLoading: fileTreeIsLoading, - fileTree: fileTree, + fileTreeIsLoading, + fileTree, + fileTreeArray, }; return {children}; diff --git a/app/front-end/src/features/editor/utils/helpers.tsx b/app/front-end/src/features/editor/utils/helpers.tsx index 6b4db62..c0263bb 100644 --- a/app/front-end/src/features/editor/utils/helpers.tsx +++ b/app/front-end/src/features/editor/utils/helpers.tsx @@ -1,6 +1,6 @@ +import { FileModel, FileTreeViewItemProps, FileTypes } from '@/features/editor/types'; import { Article as ArticleIcon, FolderRounded, InsertDriveFile as InsertDriveFileIcon } from '@mui/icons-material'; import { TreeViewBaseItem } from '@mui/x-tree-view'; -import { FileTreeViewItemProps, FileTypes } from '../types'; export const isExpandable = (reactChildren: React.ReactNode) => { if (Array.isArray(reactChildren)) { @@ -66,3 +66,27 @@ export const getFileExtension = (filename: string): string => { const dotIndex = filename.lastIndexOf('.'); return dotIndex !== -1 ? filename.substring(dotIndex + 1).toLowerCase() : ''; }; + +export const getWorkspaceArray = (fileTreeView: TreeViewBaseItem[]): FileModel[] => { + const workspaceArray: FileModel[] = []; + fileTreeView.sort((a, b) => { + if (a.fileType === FileTypes.FOLDER || b.fileType === FileTypes.FOLDER) { + if (a.fileType === b.fileType) { + return a.id.localeCompare(b.id); + } + + return a.fileType === FileTypes.FOLDER ? 1 : -1; + } + + return a.id.localeCompare(b.id); + }); + + for (const item of fileTreeView) { + workspaceArray.push({ id: item.id, label: item.label, type: item.fileType }); + if (item.children && item.children.length !== 0) { + workspaceArray.push(...getWorkspaceArray(item.children)); + } + } + + return workspaceArray; +}; diff --git a/app/front-end/src/features/editor/utils/index.ts b/app/front-end/src/features/editor/utils/index.ts index 76178c3..acd3fe7 100644 --- a/app/front-end/src/features/editor/utils/index.ts +++ b/app/front-end/src/features/editor/utils/index.ts @@ -1 +1 @@ -export { doesFileExist, findUniqueFileName, getFileExtension, getIconFromFileType, isExpandable } from './helpers'; +export { doesFileExist, findUniqueFileName, getFileExtension, getIconFromFileType, isExpandable, getWorkspaceArray } from './helpers'; From fc8d0679b335044b417add2acff616299aed1cf6 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Sun, 8 Sep 2024 00:40:05 +0300 Subject: [PATCH 49/72] MDE/PKFE-31 toolbar resizing --- .../editor/components/toolbarView/toolbarGroupsSelector.tsx | 2 +- app/front-end/src/features/editor/index.tsx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupsSelector.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupsSelector.tsx index 15cc20f..dcea7bc 100644 --- a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupsSelector.tsx +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupsSelector.tsx @@ -26,5 +26,5 @@ export interface ToolbarGroupsSelectorProps { * @returns {JSX.Element} The rendered List component. */ export const ToolbarGroupsSelector: React.FC = ({ children }) => { - return {children}; + return {children}; }; diff --git a/app/front-end/src/features/editor/index.tsx b/app/front-end/src/features/editor/index.tsx index fc54172..de4c293 100644 --- a/app/front-end/src/features/editor/index.tsx +++ b/app/front-end/src/features/editor/index.tsx @@ -54,7 +54,7 @@ export const Editor = () => { { Date: Sun, 8 Sep 2024 00:41:14 +0300 Subject: [PATCH 50/72] MDE/PKFE-31 implemented toolbar context provider --- .../src/features/editor/hooks/index.ts | 1 + .../editor/hooks/useToolbarContext.ts | 4 + .../src/features/editor/stores/index.ts | 2 + .../editor/stores/toolbarContextProvider.tsx | 197 ++++++++++++++++++ .../features/editor/types/enums/genesEnum.tsx | 3 + .../src/features/editor/types/index.ts | 1 + 6 files changed, 208 insertions(+) create mode 100644 app/front-end/src/features/editor/hooks/useToolbarContext.ts create mode 100644 app/front-end/src/features/editor/stores/toolbarContextProvider.tsx create mode 100644 app/front-end/src/features/editor/types/enums/genesEnum.tsx diff --git a/app/front-end/src/features/editor/hooks/index.ts b/app/front-end/src/features/editor/hooks/index.ts index 5ff1d0d..2cbaa08 100644 --- a/app/front-end/src/features/editor/hooks/index.ts +++ b/app/front-end/src/features/editor/hooks/index.ts @@ -1 +1,2 @@ +export { useToolbarContext } from './useToolbarContext'; export { useWorkspaceContext } from './useWorkspaceContext'; diff --git a/app/front-end/src/features/editor/hooks/useToolbarContext.ts b/app/front-end/src/features/editor/hooks/useToolbarContext.ts new file mode 100644 index 0000000..7a25802 --- /dev/null +++ b/app/front-end/src/features/editor/hooks/useToolbarContext.ts @@ -0,0 +1,4 @@ +import { ToolbarContext } from '@/features/editor/stores'; +import { useContext } from 'react'; + +export const useToolbarContext = () => useContext(ToolbarContext); diff --git a/app/front-end/src/features/editor/stores/index.ts b/app/front-end/src/features/editor/stores/index.ts index 790e4c1..45278ee 100644 --- a/app/front-end/src/features/editor/stores/index.ts +++ b/app/front-end/src/features/editor/stores/index.ts @@ -1,2 +1,4 @@ +export { ToolbarContext, ToolbarContextProvider } from './toolbarContextProvider'; +export type { ToolbarContextProps } from './toolbarContextProvider'; export { WorkspaceContext, WorkspaceContextProvider } from './workspaceContextProvider'; export type { WorkspaceContextProps } from './workspaceContextProvider'; diff --git a/app/front-end/src/features/editor/stores/toolbarContextProvider.tsx b/app/front-end/src/features/editor/stores/toolbarContextProvider.tsx new file mode 100644 index 0000000..5e4339d --- /dev/null +++ b/app/front-end/src/features/editor/stores/toolbarContextProvider.tsx @@ -0,0 +1,197 @@ +import { GenesEnum } from '@/features/editor/types'; +import React, { createContext, useState } from 'react'; + +export interface ToolbarContextProps { + // + // Universal state properties + // + saveTo: string; + override: boolean; + saveToStateUpdate: (saveTo: string, override?: boolean) => void; + + // + // Download state properties + // + gene: GenesEnum; + geneStateUpdate: (gene: GenesEnum) => void; + + // + // Merge state properties + // + lovdFile: string; + clinvarFile: string; + gnomadFile: string; + mergeStateUpdate: (lovdFile: string, clinvarFile: string, gnomadFile: string) => void; + + lovdError: string; + lovdErrorStateUpdate: (lovdFileError: string) => void; + clinvarError: string; + clinvarErrorStateUpdate: (clinvarFileError: string) => void; + gnomadError: string; + gnomadErrorStateUpdate: (gnomadFileError: string) => void; + + // + // Apply state properties + // + applyTo: string; + applyToStateUpdate: (applyTo: string) => void; + + applyError: string; + applyErrorStateUpdate: (applyError: string) => void; +} + +export const ToolbarContext = createContext({ + // + // Universal state defaults + // + saveTo: '/', // Root directory (new file) + override: false, + saveToStateUpdate: () => {}, + + // + // Download state defaults + // + gene: GenesEnum.EYS, + geneStateUpdate: () => {}, + + // + // Merge state defaults + // + lovdFile: '', + clinvarFile: '', + gnomadFile: '', + mergeStateUpdate: () => {}, + + lovdError: '', + lovdErrorStateUpdate: () => {}, + clinvarError: '', + clinvarErrorStateUpdate: () => {}, + gnomadError: '', + gnomadErrorStateUpdate: () => {}, + + // + // Apply state defaults + // + applyTo: '', + applyToStateUpdate: () => {}, + + applyError: '', + applyErrorStateUpdate: () => {}, +}); + +interface Props { + children?: React.ReactNode; +} + +export const ToolbarContextProvider: React.FC = ({ children }) => { + /*************** + State management + ***************/ + + // + // Universal state + // + const [saveTo, setSaveTo] = useState('/'); + const [override, setOverride] = useState(false); + + const saveToStateUpdate = (saveTo: string, override?: boolean) => { + setSaveTo(saveTo); + if (override !== undefined) setOverride(override); + }; + + // + // Download state + // + const [gene, setGene] = useState(GenesEnum.EYS); + + const geneStateUpdate = (gene: GenesEnum) => { + setGene(gene); + }; + + // + // Merge state + // + const [lovdFile, setLovdFile] = useState(''); + const [clinvarFile, setClinvarFile] = useState(''); + const [gnomadFile, setGnomadFile] = useState(''); + + const mergeStateUpdate = (lovdFile: string, clinvarFile: string, gnomadFile: string) => { + setLovdFile(lovdFile); + setClinvarFile(clinvarFile); + setGnomadFile(gnomadFile); + }; + + const [lovdError, setLovdError] = useState(''); + + const lovdErrorStateUpdate = (lovdFileError: string) => { + setLovdError(lovdFileError); + }; + + const [clinvarError, setClinvarError] = useState(''); + + const clinvarErrorStateUpdate = (clinvarFileError: string) => { + setClinvarError(clinvarFileError); + }; + + const [gnomadError, setGnomadError] = useState(''); + + const gnomadErrorStateUpdate = (gnomadFileError: string) => { + setGnomadError(gnomadFileError); + }; + + // + // Apply state + // + const [applyTo, setApplyTo] = useState(''); + + const applyToStateUpdate = (applyTo: string) => { + setApplyTo(applyTo); + }; + + const [applyError, setApplyError] = useState(''); + + const applyErrorStateUpdate = (applyError: string) => { + setApplyError(applyError); + }; + + const ToolbarContextValue: ToolbarContextProps = { + // + // Universal state + // + saveTo, + override, + saveToStateUpdate, + + // + // Download state + // + gene, + geneStateUpdate, + + // + // Merge state + // + lovdFile, + clinvarFile, + gnomadFile, + mergeStateUpdate, + + lovdError, + lovdErrorStateUpdate, + clinvarError, + clinvarErrorStateUpdate, + gnomadError, + gnomadErrorStateUpdate, + + // + // Apply state + // + applyTo, + applyToStateUpdate, + + applyError, + applyErrorStateUpdate, + }; + + return {children}; +}; diff --git a/app/front-end/src/features/editor/types/enums/genesEnum.tsx b/app/front-end/src/features/editor/types/enums/genesEnum.tsx new file mode 100644 index 0000000..c73fac4 --- /dev/null +++ b/app/front-end/src/features/editor/types/enums/genesEnum.tsx @@ -0,0 +1,3 @@ +export enum GenesEnum { + EYS = 'eys', +} diff --git a/app/front-end/src/features/editor/types/index.ts b/app/front-end/src/features/editor/types/index.ts index 8abb6be..d23c13e 100644 --- a/app/front-end/src/features/editor/types/index.ts +++ b/app/front-end/src/features/editor/types/index.ts @@ -1,5 +1,6 @@ export { ConsoleFeedbackTypes } from './consoleFeedback'; export type { ConsoleFeedback } from './consoleFeedback'; +export { GenesEnum } from './enums/genesEnum'; export type { FileDataRequestDTO, FileDataResponseDTO } from './fileData'; export { FileTreeItemContextMenuActions } from './fileTreeItemContextMenuActions'; export type { FileTreeViewItemProps } from './fileTreeViewItemProps'; From 456a4f721116dfe4b07cc7bf678063a516caa9b4 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Sun, 8 Sep 2024 00:43:17 +0300 Subject: [PATCH 51/72] MDE/PKFE-31 overhauled toolbar buttons logic for hook support --- .../toolbarGroupButtons/applyGroupButtons.tsx | 106 +++++++++++--- .../downloadGroupButtons.tsx | 133 ++++++++++++++---- .../toolbarView/toolbarGroupButtons/index.ts | 3 + .../toolbarGroupButtons/mergeGroupButtons.tsx | 133 +++++++++++++++--- 4 files changed, 302 insertions(+), 73 deletions(-) diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/applyGroupButtons.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/applyGroupButtons.tsx index 3044051..c099c10 100644 --- a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/applyGroupButtons.tsx +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/applyGroupButtons.tsx @@ -1,26 +1,90 @@ +import { ToolbarGroupItem, ToolbarGroupItemProps } from '@/features/editor/components/toolbarView'; +import { useToolbarContext } from '@/features/editor/hooks'; +import { useStatusContext } from '@/hooks'; import { Deblur as DeblurIcon } from '@mui/icons-material'; +import { useCallback, useMemo } from 'react'; -import { ToolbarGroupItemProps } from '@/features/editor/components/toolbarView'; +export interface ApplyGroupButtonsProps {} -const applySpliceAiClick = () => { - console.log('Clicked Apply SpliceAI Button!'); -}; +export const ApplyGroupButtons: React.FC = () => { + const { blockedStateUpdate } = useStatusContext(); + const { saveTo, override, applyTo, applyErrorStateUpdate } = useToolbarContext(); -const applyCaddClick = () => { - console.log('Clicked Apply CADD Button!'); -}; + const applySpliceAiClick = useCallback(async () => { + if (!applyTo) { + applyErrorStateUpdate('Please select a file'); + return; + } + + blockedStateUpdate(true); + + try { + console.log( + 'Clicked Apply SpliceAI Button! Params:\n saveTo:', + saveTo, + '\n override:', + override, + '\n applyTo:', + applyTo + ); + + await new Promise((resolve) => setTimeout(resolve, 1000)); // TODO: remove this line + } catch (error) { + console.error('Error applying SpliceAI:', error); + } finally { + blockedStateUpdate(false); + } + }, [saveTo, override, applyTo]); + + const applyCaddClick = useCallback(async () => { + if (!applyTo) { + applyErrorStateUpdate('Please select a file'); + return; + } -export const ApplyGroupButtons: ToolbarGroupItemProps[] = [ - { - group: 'apply', - icon: DeblurIcon, - label: 'Apply SpliceAI', - onClick: applySpliceAiClick, - }, - { - group: 'apply', - icon: DeblurIcon, - label: 'Apply CADD', - onClick: applyCaddClick, - }, -]; + blockedStateUpdate(true); + + try { + console.log( + 'Clicked Merge LOVD & ClinVar Button! Params:\n saveTo:', + saveTo, + '\n override:', + override, + '\n applyTo:', + applyTo + ); + + await new Promise((resolve) => setTimeout(resolve, 1000)); // TODO: remove this line + } catch (error) { + console.error('Error applying CADD:', error); + } finally { + blockedStateUpdate(false); + } + }, [saveTo, override, applyTo]); + + const buttons: ToolbarGroupItemProps[] = useMemo( + () => [ + { + group: 'apply', + icon: DeblurIcon, + label: 'Apply SpliceAI', + onClick: applySpliceAiClick, + }, + { + group: 'apply', + icon: DeblurIcon, + label: 'Apply CADD', + onClick: applyCaddClick, + }, + ], + [applySpliceAiClick, applyCaddClick] + ); + + return ( + <> + {buttons.map((button, index) => ( + + ))} + + ); +}; diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/downloadGroupButtons.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/downloadGroupButtons.tsx index febfd84..39a4133 100644 --- a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/downloadGroupButtons.tsx +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/downloadGroupButtons.tsx @@ -1,36 +1,107 @@ -import { ToolbarGroupItemProps } from '@/features/editor/components/toolbarView'; - +import { ToolbarGroupItem, ToolbarGroupItemProps } from '@/features/editor/components/toolbarView'; +import { useToolbarContext } from '@/features/editor/hooks'; +import { useStatusContext } from '@/hooks'; import { Download as DownloadIcon } from '@mui/icons-material'; +import { useCallback, useMemo } from 'react'; -const handleDownloadLovdClick = () => { - console.log('Clicked Download Lovd Button!'); -}; +export interface DownloadGroupButtonsProps {} -const handleDownloadClinvarClick = () => { - console.log('Clicked Download Clinvar Button!'); -}; +export const DownloadGroupButtons: React.FC = () => { + const { blockedStateUpdate } = useStatusContext(); + const { saveTo, override, gene } = useToolbarContext(); -const handleDownloadGnomadClick = () => { - console.log('Clicked Download Gnomad Button!'); -}; + const handleDownloadLovdClick = useCallback(async () => { + blockedStateUpdate(true); + + try { + console.log( + 'Clicked Download Lovd Button! Params:\n saveTo:', + saveTo, + '\n override:', + override, + '\n gene:', + gene + ); + + await new Promise((resolve) => setTimeout(resolve, 1000)); // TODO: remove this line + } catch (error) { + console.error('Error downloading LOVD file:', error); + } finally { + blockedStateUpdate(false); + } + }, [saveTo, override, gene]); + + const handleDownloadClinvarClick = useCallback(async () => { + blockedStateUpdate(true); -export const DownloadGroupButtons: ToolbarGroupItemProps[] = [ - { - group: 'download', - icon: DownloadIcon, - label: 'LOVD', - onClick: handleDownloadLovdClick, - }, - { - group: 'download', - icon: DownloadIcon, - label: 'ClinVar', - onClick: handleDownloadClinvarClick, - }, - { - group: 'download', - icon: DownloadIcon, - label: 'gnomAD', - onClick: handleDownloadGnomadClick, - }, -]; + try { + console.log( + 'Clicked Download Clinvar Button! Params:\n saveTo:', + saveTo, + '\n override:', + override, + '\n gene:', + gene + ); + + await new Promise((resolve) => setTimeout(resolve, 1000)); // TODO: remove this line + } catch (error) { + console.error('Error downloading ClinVar file:', error); + } finally { + blockedStateUpdate(false); + } + }, [saveTo, override, gene]); + + const handleDownloadGnomadClick = useCallback(async () => { + blockedStateUpdate(true); + + try { + console.log( + 'Clicked Download Gnomad Button! Params:\n saveTo:', + saveTo, + '\n override:', + override, + '\n gene:', + gene + ); + + await new Promise((resolve) => setTimeout(resolve, 1000)); // TODO: remove this line + } catch (error) { + console.error('Error downloading gnomAD file:', error); + } finally { + blockedStateUpdate(false); + } + }, [saveTo, override, gene]); + + const buttons: ToolbarGroupItemProps[] = useMemo( + () => [ + { + group: 'download', + icon: DownloadIcon, + label: 'LOVD', + onClick: handleDownloadLovdClick, + }, + { + group: 'download', + icon: DownloadIcon, + label: 'ClinVar', + onClick: handleDownloadClinvarClick, + }, + { + group: 'download', + icon: DownloadIcon, + label: 'gnomAD', + onClick: handleDownloadGnomadClick, + }, + ], + [handleDownloadLovdClick, handleDownloadClinvarClick, handleDownloadGnomadClick] + ); + + return ( + <> + {buttons.map((button, index) => ( + + ))} + + ); +}; diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/index.ts b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/index.ts index 243e3af..5e8bc3b 100644 --- a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/index.ts +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/index.ts @@ -1,3 +1,6 @@ export { ApplyGroupButtons } from './applyGroupButtons'; +export type { ApplyGroupButtonsProps } from './applyGroupButtons'; export { DownloadGroupButtons } from './downloadGroupButtons'; +export type { DownloadGroupButtonsProps } from './downloadGroupButtons'; export { MergeGroupButtons } from './mergeGroupButtons'; +export type { MergeGroupButtonsProps } from './mergeGroupButtons'; diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/mergeGroupButtons.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/mergeGroupButtons.tsx index 73d2271..6e1a22c 100644 --- a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/mergeGroupButtons.tsx +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupButtons/mergeGroupButtons.tsx @@ -1,26 +1,117 @@ +import { ToolbarGroupItem, ToolbarGroupItemProps } from '@/features/editor/components/toolbarView'; +import { useToolbarContext } from '@/features/editor/hooks'; +import { useStatusContext } from '@/hooks'; import { MergeType as MergeTypeIcon } from '@mui/icons-material'; +import { useCallback, useMemo } from 'react'; -import { ToolbarGroupItemProps } from '@/features/editor/components/toolbarView'; +export interface MergeGroupButtonsProps {} -const mergeLovdAndGnomadClick = () => { - console.log('Clicked Merge LOVD & gnomAD Button!'); -}; +export const MergeGroupButtons: React.FC = () => { + const { blockedStateUpdate } = useStatusContext(); + const { + saveTo, + override, + lovdFile, + clinvarFile, + gnomadFile, + lovdErrorStateUpdate, + clinvarErrorStateUpdate, + gnomadErrorStateUpdate, + } = useToolbarContext(); -const mergeLovdAndClinvarClick = () => { - console.log('Clicked Merge LOVD & ClinVar Button!'); -}; + const mergeLovdAndGnomadClick = useCallback(async () => { + clinvarErrorStateUpdate(''); + + if (!lovdFile) { + lovdErrorStateUpdate('Please select a LOVD file'); + return; + } + + if (!gnomadFile) { + gnomadErrorStateUpdate('Please select a gnomAD file'); + return; + } + + blockedStateUpdate(true); + + try { + console.log( + 'Clicked Merge LOVD & gnomAD Button! Params:\n saveTo:', + saveTo, + '\n override:', + override, + '\n lovd:', + lovdFile, + '\n gnomad:', + gnomadFile + ); + + await new Promise((resolve) => setTimeout(resolve, 1000)); // TODO: remove this line + } catch (error) { + console.error('Error merging LOVD & gnomAD files:', error); + } finally { + blockedStateUpdate(false); + } + }, [saveTo, override, lovdFile, gnomadFile]); + + const mergeLovdAndClinvarClick = useCallback(async () => { + gnomadErrorStateUpdate(''); -export const MergeGroupButtons: ToolbarGroupItemProps[] = [ - { - group: 'merge', - icon: MergeTypeIcon, - label: 'Merge LOVD & gnomAD', - onClick: mergeLovdAndGnomadClick, - }, - { - group: 'merge', - icon: MergeTypeIcon, - label: 'Merge LOVD & ClinVar', - onClick: mergeLovdAndClinvarClick, - }, -]; + if (!lovdFile) { + lovdErrorStateUpdate('Please select a LOVD file'); + return; + } + + if (!clinvarFile) { + clinvarErrorStateUpdate('Please select a ClinVar file'); + return; + } + + blockedStateUpdate(true); + + try { + console.log( + 'Clicked Merge LOVD & ClinVar Button! Params:\n saveTo:', + saveTo, + '\n override:', + override, + '\n lovd:', + lovdFile, + '\n clinvar:', + clinvarFile + ); + + await new Promise((resolve) => setTimeout(resolve, 1000)); // TODO: remove this line + } catch (error) { + console.error('Error merging LOVD & ClinVar files:', error); + } finally { + blockedStateUpdate(false); + } + }, [saveTo, override, lovdFile, clinvarFile]); + + const buttons: ToolbarGroupItemProps[] = useMemo( + () => [ + { + group: 'merge', + icon: MergeTypeIcon, + label: 'Merge LOVD & gnomAD', + onClick: mergeLovdAndGnomadClick, + }, + { + group: 'merge', + icon: MergeTypeIcon, + label: 'Merge LOVD & ClinVar', + onClick: mergeLovdAndClinvarClick, + }, + ], + [mergeLovdAndGnomadClick, mergeLovdAndClinvarClick] + ); + + return ( + <> + {buttons.map((button, index) => ( + + ))} + + ); +}; From 26f842dd106d33adfaa7c3b839488d62997ceb3d Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Sun, 8 Sep 2024 00:43:46 +0300 Subject: [PATCH 52/72] MDE/PKFE-31 styled core components for param fields --- .../styledGroupParamsComponents.tsx | 37 +++++++++++++++++++ .../src/stores/themeContextProvider.tsx | 2 + 2 files changed, 39 insertions(+) create mode 100644 app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/styledGroupParamsComponents.tsx diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/styledGroupParamsComponents.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/styledGroupParamsComponents.tsx new file mode 100644 index 0000000..6722d9c --- /dev/null +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/styledGroupParamsComponents.tsx @@ -0,0 +1,37 @@ +import { InputLabel, ListSubheader, MenuItem, Select, Typography, styled } from '@mui/material'; + +export const StyledGroupParamsTypography = styled(Typography)({ + fontSize: '0.875rem', + transition: 'color 0.3s ease', +}); + +export const StyledGroupParamsInputLabel = styled(InputLabel)({ + transition: 'color 0.3s ease', +}); + +export const StyledGroupParamsSelect = styled(Select)({ + height: '3rem', + transition: 'color 0.3s ease, border-color 0.3s ease', +}); + +export const StyledGroupParamsMenuItem = styled(MenuItem)({ + whiteSpace: 'normal', +}); + +export const StyledGroupParamsListSubheader = styled(ListSubheader)(({ theme }) => ({ + backgroundColor: theme.palette.background.default, + whiteSpace: 'normal', +})); + +export const StyledGroupParamsMenuItemTypography = styled(Typography)({ + width: '100%', + fontSize: '1rem', + textAlign: 'right', + overflowWrap: 'break-word', +}); + +export const StyledGroupParamsMenuItemTypographyBold = styled(Typography)({ + fontSize: '0.875rem', + fontWeight: 'bold', + overflowWrap: 'break-word', +}); diff --git a/app/front-end/src/stores/themeContextProvider.tsx b/app/front-end/src/stores/themeContextProvider.tsx index dc7770b..33295b3 100644 --- a/app/front-end/src/stores/themeContextProvider.tsx +++ b/app/front-end/src/stores/themeContextProvider.tsx @@ -99,6 +99,8 @@ export const ThemeContextProvider: React.FC = ({ children }) => { fontSize: '20px', color: mode === 'light' ? Colors.textPrimaryLight : Colors.textPrimaryDark, fontWeight: '400', + overflow: 'hidden', + textOverflow: 'ellipsis', }, }, }, From 36af934f7641e156908ff606ca36c08421ae35a5 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Sun, 8 Sep 2024 00:44:45 +0300 Subject: [PATCH 53/72] MDE/PKFE-31 implemented different groups params fields --- .../toolbarGroupParams/applyGroupParams.tsx | 190 +++++++++++ .../downloadGroupParams.tsx | 263 +++++++++++++++ .../toolbarView/toolbarGroupParams/index.ts | 15 + .../toolbarGroupParams/mergeGroupParams.tsx | 316 ++++++++++++++++++ 4 files changed, 784 insertions(+) create mode 100644 app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/applyGroupParams.tsx create mode 100644 app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/downloadGroupParams.tsx create mode 100644 app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/index.ts create mode 100644 app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/mergeGroupParams.tsx diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/applyGroupParams.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/applyGroupParams.tsx new file mode 100644 index 0000000..cf80bf9 --- /dev/null +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/applyGroupParams.tsx @@ -0,0 +1,190 @@ +import { + StyledGroupParamsInputLabel, + StyledGroupParamsListSubheader, + StyledGroupParamsMenuItem, + StyledGroupParamsMenuItemTypography, + StyledGroupParamsMenuItemTypographyBold, + StyledGroupParamsSelect, + StyledGroupParamsTypography, +} from '@/features/editor/components/toolbarView/toolbarGroupParams'; +import { useToolbarContext, useWorkspaceContext } from '@/features/editor/hooks'; +import { FileModel, FileTypes } from '@/features/editor/types'; +import { getWorkspaceArray } from '@/features/editor/utils'; +import { useStatusContext } from '@/hooks'; +import { Box, Checkbox, FormControl, FormControlLabel, SelectChangeEvent, useTheme } from '@mui/material'; +import { useEffect, useState } from 'react'; + +export interface ApplyGroupParamsProps {} + +export const ApplyGroupParams: React.FC = () => { + const { blocked } = useStatusContext(); + const Theme = useTheme(); + const { fileTree, fileTreeArray } = useWorkspaceContext(); + const { saveTo, saveToStateUpdate, applyTo, applyToStateUpdate, applyError, applyErrorStateUpdate } = + useToolbarContext(); + + // + // Apply state + // + const [applyToValue, setApplyToValue] = useState(applyTo); + + const handleApplyToChange = (event: SelectChangeEvent) => { + setApplyToValue(event.target.value); + applyToStateUpdate(event.target.value); + applyErrorStateUpdate(''); + }; + + // + // Save To state + // + const [fileArray, setFileArray] = useState(fileTreeArray); + const [saveToValue, setSaveToValue] = useState(saveTo); + const [overrideValue, setOverrideValue] = useState(false); + + const handleSaveToChange = (event: SelectChangeEvent) => { + setSaveToValue(event.target.value); + saveToStateUpdate(event.target.value, false); + setOverrideValue(false); + }; + + const handleOverrideChange = (event: React.ChangeEvent) => { + setOverrideValue(event.target.checked); + saveToStateUpdate(saveToValue, event.target.checked); + }; + + // + // Effects + // + useEffect(() => { + setFileArray(getWorkspaceArray(fileTree)); + }, [fileTree]); + + return ( + + + + + {applyError ? applyError : 'Apply To'} + + + + root: + + {fileArray.map((file) => { + if (file.type === FileTypes.FOLDER) { + return ( + + {file.id}: + + ); + } + + return ( + + {file.label} + + ); + })} + + + + + + + Save To + + + + New file... + + + root: + + {fileArray.map((file) => { + if (file.type === FileTypes.FOLDER) { + return ( + + {file.id}: + + ); + } + + return ( + + {file.label} + + ); + })} + + + + } + label={ + + Override + + } + labelPlacement='start' + /> + + + ); +}; diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/downloadGroupParams.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/downloadGroupParams.tsx new file mode 100644 index 0000000..cce8d65 --- /dev/null +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/downloadGroupParams.tsx @@ -0,0 +1,263 @@ +import { + StyledGroupParamsInputLabel, + StyledGroupParamsListSubheader, + StyledGroupParamsMenuItem, + StyledGroupParamsMenuItemTypography, + StyledGroupParamsMenuItemTypographyBold, + StyledGroupParamsSelect, + StyledGroupParamsTypography, +} from '@/features/editor/components/toolbarView/toolbarGroupParams'; +import { useToolbarContext, useWorkspaceContext } from '@/features/editor/hooks'; +import { FileModel, FileTypes, GenesEnum } from '@/features/editor/types'; +import { getWorkspaceArray } from '@/features/editor/utils'; +import { useStatusContext } from '@/hooks'; +import { Box, Checkbox, FormControl, FormControlLabel, SelectChangeEvent, useTheme } from '@mui/material'; +import { useEffect, useState } from 'react'; + +export interface DownloadGroupParamsProps {} + +export const DownloadGroupParams: React.FC = () => { + const { blocked } = useStatusContext(); + const Theme = useTheme(); + const { fileTree, fileTreeArray } = useWorkspaceContext(); + const { saveTo, saveToStateUpdate, gene, geneStateUpdate } = useToolbarContext(); + + // + // Gene state + // + const [geneValue, setGeneValue] = useState(gene); + + const handleGeneChange = (event: SelectChangeEvent) => { + setGeneValue(event.target.value); + geneStateUpdate(event.target.value); + }; + + // + // Save To state + // + const [fileArray, setFileArray] = useState(fileTreeArray); + const [saveToValue, setSaveToValue] = useState(saveTo); + const [overrideValue, setOverrideValue] = useState(false); + + const handleSaveToChange = (event: SelectChangeEvent) => { + setSaveToValue(event.target.value); + saveToStateUpdate(event.target.value, false); + setOverrideValue(false); + }; + + const handleOverrideChange = (event: React.ChangeEvent) => { + setOverrideValue(event.target.checked); + saveToStateUpdate(saveToValue, event.target.checked); + }; + + // + // Effects + // + useEffect(() => { + setFileArray(getWorkspaceArray(fileTree)); + }, [fileTree]); + + return ( + + + + + Gene + + + + {GenesEnum.EYS.toUpperCase()} + + + + + + + + Save To + + + + New file... + + + root: + + {fileArray.map((file) => { + if (file.type === FileTypes.FOLDER) { + return ( + + {file.id}: + + ); + } + + return ( + + {file.label} + + ); + })} + + + + } + label={ + + Override + + } + labelPlacement='start' + /> + + + ); +}; + +{ + /* Gene + */ +} + +{ + /* Save To + */ +} diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/index.ts b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/index.ts new file mode 100644 index 0000000..e432661 --- /dev/null +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/index.ts @@ -0,0 +1,15 @@ +export { ApplyGroupParams } from './applyGroupParams'; +export type { ApplyGroupParamsProps } from './applyGroupParams'; +export { DownloadGroupParams } from './downloadGroupParams'; +export type { DownloadGroupParamsProps } from './downloadGroupParams'; +export { MergeGroupParams } from './mergeGroupParams'; +export type { MergeGroupParamsProps } from './mergeGroupParams'; +export { + StyledGroupParamsInputLabel, + StyledGroupParamsListSubheader, + StyledGroupParamsMenuItem, + StyledGroupParamsMenuItemTypography, + StyledGroupParamsMenuItemTypographyBold, + StyledGroupParamsSelect, + StyledGroupParamsTypography, +} from './styledGroupParamsComponents'; diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/mergeGroupParams.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/mergeGroupParams.tsx new file mode 100644 index 0000000..ff26f50 --- /dev/null +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupParams/mergeGroupParams.tsx @@ -0,0 +1,316 @@ +import { + StyledGroupParamsInputLabel, + StyledGroupParamsListSubheader, + StyledGroupParamsMenuItem, + StyledGroupParamsMenuItemTypography, + StyledGroupParamsMenuItemTypographyBold, + StyledGroupParamsSelect, + StyledGroupParamsTypography, +} from '@/features/editor/components/toolbarView/toolbarGroupParams'; +import { useToolbarContext, useWorkspaceContext } from '@/features/editor/hooks'; +import { FileModel, FileTypes } from '@/features/editor/types'; +import { getWorkspaceArray } from '@/features/editor/utils'; +import { useStatusContext } from '@/hooks'; +import { Box, Checkbox, FormControl, FormControlLabel, SelectChangeEvent, useTheme } from '@mui/material'; +import { useEffect, useState } from 'react'; + +export interface MergeGroupParamsProps {} + +export const MergeGroupParams: React.FC = () => { + const { blocked } = useStatusContext(); + const Theme = useTheme(); + const { fileTree, fileTreeArray } = useWorkspaceContext(); + const { + saveTo, + saveToStateUpdate, + lovdFile, + clinvarFile, + gnomadFile, + mergeStateUpdate, + lovdError, + clinvarError, + gnomadError, + lovdErrorStateUpdate, + clinvarErrorStateUpdate, + gnomadErrorStateUpdate, + } = useToolbarContext(); + + // + // Merge state + // + const [lovdFileValue, setLovdFileValue] = useState(lovdFile); + const [clinvarFileValue, setClinvarFileValue] = useState(clinvarFile); + const [gnomadFileValue, setGnomadFileValue] = useState(gnomadFile); + + const handleLovdFileChange = (event: SelectChangeEvent) => { + setLovdFileValue(event.target.value); + mergeStateUpdate(event.target.value, clinvarFileValue, gnomadFileValue); + lovdErrorStateUpdate(''); + }; + + const handleClinvarFileChange = (event: SelectChangeEvent) => { + setClinvarFileValue(event.target.value); + mergeStateUpdate(lovdFileValue, event.target.value, gnomadFileValue); + clinvarErrorStateUpdate(''); + }; + + const handleGnomadFileChange = (event: SelectChangeEvent) => { + setGnomadFileValue(event.target.value); + mergeStateUpdate(lovdFileValue, clinvarFileValue, event.target.value); + gnomadErrorStateUpdate(''); + }; + + // + // Save To state + // + const [fileArray, setFileArray] = useState(fileTreeArray); + const [saveToValue, setSaveToValue] = useState(saveTo); + const [overrideValue, setOverrideValue] = useState(false); + + const handleSaveToChange = (event: SelectChangeEvent) => { + setSaveToValue(event.target.value); + saveToStateUpdate(event.target.value, false); + setOverrideValue(false); + }; + + const handleOverrideChange = (event: React.ChangeEvent) => { + setOverrideValue(event.target.checked); + saveToStateUpdate(saveToValue, event.target.checked); + }; + + // + // Effects + // + useEffect(() => { + setFileArray(getWorkspaceArray(fileTree)); + }, [fileTree]); + + return ( + + + + + {lovdError ? lovdError : 'Lovd File'} + + + + root: + + {fileArray.map((file) => { + if (file.type === FileTypes.FOLDER) { + return ( + + {file.id}: + + ); + } + + return ( + + {file.label} + + ); + })} + + + + + {clinvarError ? clinvarError : 'Clinvar File'} + + + + root: + + {fileArray.map((file) => { + if (file.type === FileTypes.FOLDER) { + return ( + + {file.id}: + + ); + } + + return ( + + {file.label} + + ); + })} + + + + + {gnomadError ? gnomadError : 'Gnomad File'} + + + + root: + + {fileArray.map((file) => { + if (file.type === FileTypes.FOLDER) { + return ( + + {file.id}: + + ); + } + + return ( + + {file.label} + + ); + })} + + + + + + + Save To + + + + New file... + + + root: + + {fileArray.map((file) => { + if (file.type === FileTypes.FOLDER) { + return ( + + {file.id}: + + ); + } + + return ( + + {file.label} + + ); + })} + + + + } + label={ + + Override + + } + labelPlacement='start' + /> + + + ); +}; From 523657e534c4e7b46bfb97ae6201e7a1a54722f2 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Sun, 8 Sep 2024 00:45:07 +0300 Subject: [PATCH 54/72] MDE/PKFE-31 updated toolbar view --- .../components/toolbarView/toolbarGroup.tsx | 38 ++++++++++++------- .../components/toolbarView/toolbarView.tsx | 36 ++++++++++++------ 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroup.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroup.tsx index 031a267..1d62db3 100644 --- a/app/front-end/src/features/editor/components/toolbarView/toolbarGroup.tsx +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroup.tsx @@ -1,7 +1,8 @@ -import { List, useTheme } from '@mui/material'; +import { Box, List, useTheme } from '@mui/material'; export interface ToolbarGroupProps { - children: React.ReactNode; + params: React.ReactNode; + buttons: React.ReactNode; } /** @@ -25,24 +26,33 @@ export interface ToolbarGroupProps { * @param {React.ReactNode} children - The child elements to be displayed inside the list. * @returns {JSX.Element} The rendered List component. */ -export const ToolbarGroup: React.FC = ({ children }) => { +export const ToolbarGroup: React.FC = ({ params, buttons }) => { const Theme = useTheme(); return ( - - {children} - + {params} + + {buttons} + + ); }; diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarView.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarView.tsx index 835a492..6768e78 100644 --- a/app/front-end/src/features/editor/components/toolbarView/toolbarView.tsx +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarView.tsx @@ -1,7 +1,5 @@ import { ToolbarGroup, - ToolbarGroupItem, - ToolbarGroupItemProps, ToolbarGroupsSelector, ToolbarGroupsSelectorItem, ToolbarGroupsSelectorItemProps, @@ -11,7 +9,13 @@ import { DownloadGroupButtons, MergeGroupButtons, } from '@/features/editor/components/toolbarView/toolbarGroupButtons'; -import { useMemo, useState } from 'react'; +import { + ApplyGroupParams, + DownloadGroupParams, + MergeGroupParams, +} from '@/features/editor/components/toolbarView/toolbarGroupParams'; +import { ToolbarContextProvider } from '@/features/editor/stores'; +import React, { useMemo, useState } from 'react'; /** * ToolbarView component manages and displays a set of toolbar groups and items. @@ -54,26 +58,34 @@ export const ToolbarView: React.FC = () => { }, ]; + // Combine the params groups into a dictionary for easy access + const ToolbarGroupsParams: Record = useMemo( + () => ({ + download: , + merge: , + apply: , + }), + [] + ); + // Combine the button groups into a dictionary for easy access - const ToolbarGroupsButtons: Record = useMemo( + const ToolbarGroupsButtons: Record = useMemo( () => ({ - download: DownloadGroupButtons, - merge: MergeGroupButtons, - apply: ApplyGroupButtons, + download: , + merge: , + apply: , }), [DownloadGroupButtons, MergeGroupButtons, ApplyGroupButtons] ); return ( - <> + {ToolbarGroups.map((group, index) => ( ))} - - {ToolbarGroupsButtons[selectedGroup]?.map((button, index) => )} - - + + ); }; From 33d500cdb4174a747520e46e2ac82234477cd7c4 Mon Sep 17 00:00:00 2001 From: Mantvydas Deltuva Date: Sun, 8 Sep 2024 00:54:43 +0300 Subject: [PATCH 55/72] MDE/PKFE-31 update button icon color on disabled --- .../features/editor/components/toolbarView/toolbarGroupItem.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupItem.tsx b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupItem.tsx index 8a43dd8..1a24c20 100644 --- a/app/front-end/src/features/editor/components/toolbarView/toolbarGroupItem.tsx +++ b/app/front-end/src/features/editor/components/toolbarView/toolbarGroupItem.tsx @@ -42,7 +42,7 @@ export const ToolbarGroupItem: React.FC = ({ icon: Icon, return (