diff --git a/api/__init__.py b/api/__init__.py index b407f4e..8de60d4 100644 --- a/api/__init__.py +++ b/api/__init__.py @@ -46,7 +46,7 @@ download_database_for_eys_gene, # Functions for storing databases - store_database_for_eys_gene + download_selected_database_for_eys_gene ) # DATA REFACTORING IMPORT diff --git a/api/data/__init__.py b/api/data/__init__.py index 7bfdfbe..b2bc9c1 100644 --- a/api/data/__init__.py +++ b/api/data/__init__.py @@ -34,7 +34,7 @@ ) # DATA COLLECTION IMPORT -from .collection import ( +from .downloading import ( # Custom exceptions BadResponseException, DownloadError, @@ -49,7 +49,7 @@ download_data_from_gnomad_eys, # Functions for storing databases - store_database_for_eys_gene + download_selected_database_for_eys_gene ) diff --git a/api/data/constants.py b/api/data/constants.py index 757074c..fa0060f 100644 --- a/api/data/constants.py +++ b/api/data/constants.py @@ -8,6 +8,7 @@ LOVD_FILE_URL = "https://databases.lovd.nl/shared/download/all/gene/" LOVD_FILE_URL_EYS = LOVD_FILE_URL + "EYS" STORE_AS_LOVD = "../data/lovd/lovd_data.txt" +STORE_AS_GNOMAD = "../data/gnomad/gnomad_data.csv" GNOMAD_URL = "https://gnomad.broadinstitute.org/gene" GNOMAD_URL_EYS = "https://gnomad.broadinstitute.org/gene/ENSG00000188107?dataset=gnomad_r4" diff --git a/api/data/downloading.py b/api/data/downloading.py index 5fa786d..c253f14 100644 --- a/api/data/downloading.py +++ b/api/data/downloading.py @@ -18,7 +18,8 @@ LOVD_PATH, DATABASES_DOWNLOAD_PATHS, LOVD_FILE_URL_EYS, - STORE_AS_LOVD) + STORE_AS_LOVD, + STORE_AS_GNOMAD) # EXCEPTIONS @@ -176,21 +177,36 @@ def download_database_for_eys_gene(database_name, override=False): os.rename(latest_file, os_path) -def store_database_for_eys_gene(database_name, override=False): +def download_selected_database_for_eys_gene(database_name, save_path="", override=False): """ Calls a function to download a database. + :param database_name: the name of the database that should be downloaded + :param save_path: path to save the data :param override: should be already existing file be overwritten """ + if not isinstance(database_name, str): + raise TypeError("Database name should be a string") + database_name = database_name.lower() + + # if save_path is not provided, save to default location + if database_name == "lovd" and save_path == "": + save_path = STORE_AS_LOVD + elif database_name == "gnomad" and save_path == "": + save_path = STORE_AS_GNOMAD + + # check if database_name is supported if database_name not in DATABASES_DOWNLOAD_PATHS: - raise IndexError(f"Requested {database_name} database is not supported") + raise IndexError(f"Requested for {database_name} database is not supported") + + # download the database if database_name == "lovd": - download_lovd_database_for_eys_gene(database_name, override) + download_lovd_database_for_eys_gene(save_path, override) elif database_name == "gnomad": - download_data_from_gnomad_eys(database_name, override) + download_data_from_gnomad_eys(save_path, override) else: - download_database_for_eys_gene(database_name, override) + raise IndexError(f"Requested for {database_name} is not yet supported") def prepare_popmax_calculation(df, pop_data, name, pop_ids, index): @@ -215,7 +231,7 @@ def prepare_popmax_calculation(df, pop_data, name, pop_ids, index): df.loc[index, f'{name}_an_{variant_id}'] = pop['an'] -def download_data_from_gnomad_eys(path, override=False): +def download_data_from_gnomad_eys(path=STORE_AS_GNOMAD, override=False): """ Requests gnomAD API for data about a specific gene containing: - variant_id @@ -226,10 +242,8 @@ def download_data_from_gnomad_eys(path, override=False): - popmax - popmax population - :param str gene_name: name of gene - :param bool to_file: if True, saves data to variants.csv - :returns: DataFrame from gnomAD API - :rtype: DataFrame + :param str path: path to save the data (default: 'data/gnomad/gnomad_eys.csv') + :param bool override: should an existing file be overriden with a new one """ if os.path.exists(path) and not override: @@ -347,6 +361,4 @@ def download_data_from_gnomad_eys(path, override=False): df = df.filter(not_to_drop, axis="columns") if not os.path.isfile(path) or override: - df.to_csv(path, index=False) - - return df \ No newline at end of file + df.to_csv(path, index=False) \ No newline at end of file diff --git a/tests/pipeline2.ipynb b/tests/pipeline2.ipynb new file mode 100644 index 0000000..4e459ea --- /dev/null +++ b/tests/pipeline2.ipynb @@ -0,0 +1,4089 @@ +{ + "cells": [ + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "ExecuteTime": { + "end_time": "2024-09-26T20:31:26.514394Z", + "start_time": "2024-09-26T20:31:25.176141Z" + } + }, + "source": [ + "import pandas as pd\n", + "\n", + "from api.data import (download_selected_database_for_eys_gene,\n", + " parse_lovd,\n", + " LOVD_PATH,\n", + " set_lovd_dtypes)\n", + "from api.data import save_lovd_as_vcf\n", + "pd.options.display.max_columns = 0" + ], + "outputs": [], + "execution_count": 1 + }, + { + "cell_type": "code", + "id": "f49f7691a27aa7b4", + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-09-26T20:35:18.191483Z", + "start_time": "2024-09-26T20:32:53.778757Z" + } + }, + "source": "download_selected_database_for_eys_gene(\"gnomad\", save_path=\"../data/gnomad/gnomad_data_text.csv\", override=False)", + "outputs": [], + "execution_count": 3 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-09-26T20:36:09.300943Z", + "start_time": "2024-09-26T20:36:08.947865Z" + } + }, + "cell_type": "code", + "source": "download_selected_database_for_eys_gene(\"lovd\", save_path=\"../data/lovd/lovd_data_text.txt\", override=False)", + "id": "75f1962c219b6348", + "outputs": [], + "execution_count": 5 + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cf5c45c0f7b9de0f", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-13T15:38:24.591752Z", + "start_time": "2024-05-13T15:38:19.498594Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "data = parse_lovd(LOVD_PATH + \"/lovd_data.txt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8a089e29bfc8c119", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-13T15:12:07.510712Z", + "start_time": "2024-05-13T15:12:07.366319Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Genes\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamechromosomechrom_bandimprintingrefseq_genomicrefseq_UDreferenceurl_homepageurl_externalallow_downloadid_hgncid_entrezid_omimshow_hgmdshow_genecardsshow_genetestsshow_orphanetnote_indexnote_listingrefseqrefseq_urldisclaimerdisclaimer_textheaderheader_alignfooterfooter_aligncreated_bycreated_dateedited_byedited_dateupdated_byupdated_date
0EYSeyes shut homolog (Drosophila)6q12unknownNG_023443.2UD_132085377375http://www.LOVD.nl/EYSNaN21555346007612424NaNNaNNaNNaN<font color=\\\"#FF0000\\\">This database is one o...ghttp://databases.lovd.nl/shared/refseq/EYS_NM_...NaN<font color=\\\"#FF0000\\\">This database is one o...-1-112012-02-1362023-08-30 13:08:1902024-04-19 20:27:30
\n", + "
" + ], + "text/plain": [ + " id name ... updated_by updated_date\n", + "0 EYS eyes shut homolog (Drosophila) ... 0 2024-04-19 20:27:30\n", + "\n", + "[1 rows x 34 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transcripts\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idgeneidnameid_mutalyzerid_ncbiid_ensemblid_protein_ncbiid_protein_ensemblid_protein_uniprotremarksposition_c_mrna_startposition_c_mrna_endposition_c_cds_endposition_g_mrna_startposition_g_mrna_endcreated_bycreated_dateedited_byedited_date
07329EYStranscript variant 11NM_001142800.1NP_001136272.1-5381005194356641711864429876<NA>NaT<NA>NaT
\n", + "
" + ], + "text/plain": [ + " id geneid name ... created_date edited_by edited_date\n", + "0 7329 EYS transcript variant 1 ... NaT NaT\n", + "\n", + "[1 rows x 19 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Diseases\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idsymbolnameinheritanceid_omimtissuesfeaturesremarkscreated_bycreated_dateedited_byedited_date
012PSORSpsoriasis, pustular, generalized (PSORS)<NA>62012-07-06 21:50:3262019-08-12 13:38:21
158CORDdystrophy, cone-rod (CORD)<NA>62012-09-22 11:31:2562020-08-30 09:43:59
2112RPretinitis pigmentosa (RP)26800012013-02-21 17:12:3662021-01-18 09:53:26
3139IDintellectual disability (ID)<NA>842013-06-04 18:18:0762015-02-09 10:02:49
4173SLOSSmith-Lemli-Opitz syndrome (SLOS)AR27040062013-08-01 11:16:1462021-12-10 21:51:32
5198?unclassified / mixed<NA>62013-09-13 14:21:4762016-10-22 17:54:40
62156-retinitis pigmentosa, X-linked, and sinorespir...30045562014-09-25 23:29:4062021-12-10 21:51:32
72440RP25retinitis pigmentosa, type 25 (RP25)AR60277262014-09-25 23:29:4062021-12-10 21:51:32
84211RParretinitis pigmentosa, autosomal recessive (RPar)<NA>62015-02-27 18:58:57<NA>NaT
94214-retinal disease<NA>62015-02-27 19:48:0712023-03-09 14:26:26
104249macular dystrophydystrophy, macular<NA>62015-05-04 22:10:5862024-02-15 21:18:39
115086HLhearing loss (HL)<NA>62015-10-23 11:41:0562015-10-23 11:43:00
125415USHUsher syndrome (USH)<NA>62018-04-02 16:40:44<NA>NaT
135468uveitisuveitis<NA>62018-08-22 09:47:04<NA>NaT
146906DEEencephalopathy, developmental and epileptic<NA>62022-04-07 09:24:23<NA>NaT
\n", + "
" + ], + "text/plain": [ + " id symbol ... edited_by edited_date\n", + "0 12 PSORS ... 6 2019-08-12 13:38:21\n", + "1 58 CORD ... 6 2020-08-30 09:43:59\n", + "2 112 RP ... 6 2021-01-18 09:53:26\n", + "3 139 ID ... 6 2015-02-09 10:02:49\n", + "4 173 SLOS ... 6 2021-12-10 21:51:32\n", + "5 198 ? ... 6 2016-10-22 17:54:40\n", + "6 2156 - ... 6 2021-12-10 21:51:32\n", + "7 2440 RP25 ... 6 2021-12-10 21:51:32\n", + "8 4211 RPar ... NaT\n", + "9 4214 - ... 1 2023-03-09 14:26:26\n", + "10 4249 macular dystrophy ... 6 2024-02-15 21:18:39\n", + "11 5086 HL ... 6 2015-10-23 11:43:00\n", + "12 5415 USH ... NaT\n", + "13 5468 uveitis ... NaT\n", + "14 6906 DEE ... NaT\n", + "\n", + "[15 rows x 12 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Genes_To_Diseases\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geneiddiseaseid
0EYS112
1EYS2440
\n", + "
" + ], + "text/plain": [ + " geneid diseaseid\n", + "0 EYS 112\n", + "1 EYS 2440" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Individuals\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idfatheridmotheridpanelidpanel_sizelicenseowned_byIndividual/ReferenceIndividual/RemarksIndividual/GenderIndividual/ConsanguinityIndividual/Origin/GeographicIndividual/Age_of_deathIndividual/VIPIndividual/Data_avIndividual/TreatmentIndividual/Origin/PopulationIndividual/Individual_ID
0135<NA>36{PMID:Marrakchi 2011:21848462}5-generation family, 3 affecteds (M)MyesTunisia
1210<NA>139{PMID:Abu-Safieh-2013:23105016}(Saudi Arabia)
21962<NA>125M?Germanywhite
316605<NA>15520
433096<NA>1229{PMID:Neveling 2012:22334370}Mno0
.........................................................
1445447702<NA>16{PMID:Weisschuh 2024:37734845}patient, no family historyFGermany0SRP-1105
1446447707<NA>16{PMID:Weisschuh 2024:37734845}patient, no family historyMGermany0SRP-1167
1447447716<NA>16{PMID:Weisschuh 2024:37734845}patient, no family historyFGermany0SRP-1249
1448447718<NA>16{PMID:Weisschuh 2024:37734845}patient, no family historyMGermany0SRP-1274
1449447720<NA>16{PMID:Weisschuh 2024:37734845}patientMGermany0SRP-1299
\n", + "

1450 rows × 18 columns

\n", + "
" + ], + "text/plain": [ + " id fatherid ... Individual/Origin/Population Individual/Individual_ID\n", + "0 135 ... \n", + "1 210 ... \n", + "2 1962 ... white \n", + "3 16605 ... \n", + "4 33096 ... \n", + "... ... ... ... ... ...\n", + "1445 447702 ... SRP-1105\n", + "1446 447707 ... SRP-1167\n", + "1447 447716 ... SRP-1249\n", + "1448 447718 ... SRP-1274\n", + "1449 447720 ... SRP-1299\n", + "\n", + "[1450 rows x 18 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Individuals_To_Diseases\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
individualiddiseaseid
013512
121058
21962173
3330964214
4331094214
.........
1444447702198
1445447707198
1446447716198
1447447718198
1448447720198
\n", + "

1449 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " individualid diseaseid\n", + "0 135 12\n", + "1 210 58\n", + "2 1962 173\n", + "3 33096 4214\n", + "4 33109 4214\n", + "... ... ...\n", + "1444 447702 198\n", + "1445 447707 198\n", + "1446 447716 198\n", + "1447 447718 198\n", + "1448 447720 198\n", + "\n", + "[1449 rows x 2 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Phenotypes\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iddiseaseidindividualidowned_byPhenotype/InheritancePhenotype/AgePhenotype/AdditionalPhenotype/Biochem_paramPhenotype/Age/OnsetPhenotype/Age/DiagnosisPhenotype/Severity_scorePhenotype/OnsetPhenotype/ProteinPhenotype/Tumor/MSIPhenotype/Enzyme/CPKPhenotype/Heart/MyocardiumPhenotype/LungPhenotype/Diagnosis/DefinitePhenotype/Diagnosis/InitialPhenotype/Diagnosis/Criteria
08121356Familial, autosomal recessive
1265821039Familial, autosomal recessive
2941173196225Familial2-3 toe syndactyly5
326525421433096229Unknownretinitis pigmentosa
426538421433109229Unknownretinitis pigmentosa
...............................................................
12663369011984477026Unknownretinitis pigmentosa, simplex
12673369061984477076Unknownretinitis pigmentosa, simplex
12683369151984477166Unknownretinitis pigmentosa, simplex
12693369171984477186Unknownretinitis pigmentosa, simplex
12703369191984477206Unknownretinitis pigmentosa, simplex
\n", + "

1271 rows × 20 columns

\n", + "
" + ], + "text/plain": [ + " id ... Phenotype/Diagnosis/Criteria\n", + "0 8 ... \n", + "1 26 ... \n", + "2 941 ... \n", + "3 26525 ... \n", + "4 26538 ... \n", + "... ... ... ...\n", + "1266 336901 ... \n", + "1267 336906 ... \n", + "1268 336915 ... \n", + "1269 336917 ... \n", + "1270 336919 ... \n", + "\n", + "[1271 rows x 20 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Screenings\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idindividualidvariants_foundowned_bycreated_bycreated_dateedited_byedited_dateScreening/TechniqueScreening/TemplateScreening/TissueScreening/Remarks
01261351662012-07-07 19:04:1962012-07-07 19:12:08RT-PCR;SEQDNA;RNA
121121013962012-09-22 11:36:24<NA>NaTSEQDNA
21640196212562010-03-11 16:36:41252012-04-13 15:18:00SEQDNA
3165571660515525522014-05-23 13:12:43<NA>NaTSEQ-NG-IDNA
4331643309612292292012-02-04 15:20:0162012-05-18 13:59:33SEQ;SEQ-NG-SDNA
.......................................
14454492794477021662024-01-26 10:23:59<NA>NaTSEQ-NGDNAWGS
14464492844477071662024-01-26 10:23:59<NA>NaTSEQ-NGDNAWGS
14474492934477161662024-01-26 10:23:59<NA>NaTSEQ-NGDNAWGS
14484492954477181662024-01-26 10:23:59<NA>NaTSEQ-NGDNAWGS
14494492974477201662024-01-26 10:23:59<NA>NaTSEQ-NGDNAWGS
\n", + "

1450 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " id individualid ... Screening/Tissue Screening/Remarks\n", + "0 126 135 ... \n", + "1 211 210 ... \n", + "2 1640 1962 ... \n", + "3 16557 16605 ... \n", + "4 33164 33096 ... \n", + "... ... ... ... ... ...\n", + "1445 449279 447702 ... WGS\n", + "1446 449284 447707 ... WGS\n", + "1447 449293 447716 ... WGS\n", + "1448 449295 447718 ... WGS\n", + "1449 449297 447720 ... WGS\n", + "\n", + "[1450 rows x 12 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Screenings_To_Genes\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
screeningidgeneid
0126IL36RN
1211MKS1
21640DHCR7
333164AHI1
433164EYS
.........
1311437646EYS
1312437902EYS
1313437922EYS
1314443144EYS
1315443145EYS
\n", + "

1316 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " screeningid geneid\n", + "0 126 IL36RN\n", + "1 211 MKS1\n", + "2 1640 DHCR7\n", + "3 33164 AHI1\n", + "4 33164 EYS\n", + "... ... ...\n", + "1311 437646 EYS\n", + "1312 437902 EYS\n", + "1313 437922 EYS\n", + "1314 443144 EYS\n", + "1315 443145 EYS\n", + "\n", + "[1316 rows x 2 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Variants_On_Genome\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idalleleeffectidchromosomeposition_g_startposition_g_endtypeaverage_frequencyowned_byVariantOnGenome/DBIDVariantOnGenome/DNAVariantOnGenome/FrequencyVariantOnGenome/ReferenceVariantOnGenome/Restriction_siteVariantOnGenome/Published_asVariantOnGenome/RemarksVariantOnGenome/Genetic_originVariantOnGenome/SegregationVariantOnGenome/dbSNPVariantOnGenome/VIPVariantOnGenome/MethylationVariantOnGenome/ISCNVariantOnGenome/DNA/hg38VariantOnGenome/ClinVarVariantOnGenome/ClinicalClassificationVariantOnGenome/ClinicalClassification/Method
03642635066449897164498971subst0.000743552EYS_000007g.64498971A>GGermline0g.63789078A>GVUS
15988135566565575865655758subst0.001153229EYS_000001g.65655758T>GExAC: 60, 19750, 0, 0.003038{PMID:Neveling 2012:22334370}Q770PGermlineyes0g.64945865T>GVUS
25988311166533614365336143subst0.224189229EYS_000002g.65336143G>AExAC: 3936, 19366, 441, 0.2032{PMID:Neveling 2012:22334370}p.?unaffected brother also this variant homozygousGermlineno0g.64626250G>Abenign
35988411566530086965300869subst0.000838229EYS_000003g.65300869G>AExAC: 12, 19406, 0, 0.0006184{PMID:Neveling 2012:22334370}(P1631S)predicted benign, disease-related variant in o...Germline0g.64590976G>Abenign
45988511166501699865016999del0.000000229EYS_000004g.65016998_65016999delExAC: 9866, 18292, 921, 0.5394{PMID:Neveling 2012:22334370}6045-4_6045-3delpredicted benignGermlineyes0g.64307105_64307106delbenign
.................................................................................
253696421103066576763465767634subst0.2430222330EYS_000248g.65767634G>AEYS(NM_001292009.2):c.2024-15_2024-14delTCinsTTVKGL data sharing initiative NederlandCLASSIFICATION recordlikely benign
253796421203066576764365767643del0.0000002330EYS_000926g.65767643delEYS(NM_001292009.2):c.2024-15delTVKGL data sharing initiative NederlandCLASSIFICATION recordlikely benign
253896421505066600592766005927subst0.0001122327EYS_000253g.66005927C>TEYS(NM_001142800.1):c.1852G>A (p.G618S), EYS(N...VKGL data sharing initiative NederlandCLASSIFICATION recordVUS
253996421605066604487466044874subst0.0000822327EYS_000256g.66044874T>CEYS(NM_001292009.2):c.1765A>G (p.R589G)VKGL data sharing initiative NederlandCLASSIFICATION recordVUS
254097731409066443094364430943subst0.0000071804EYS_000060g.64430943A>TEYS(NM_001142800.2):c.8984T>A (p.(Ile2995Asn))...VKGL data sharing initiative NederlandCLASSIFICATION recordpathogenic
\n", + "

2541 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " id ... VariantOnGenome/ClinicalClassification/Method\n", + "0 36426 ... \n", + "1 59881 ... \n", + "2 59883 ... \n", + "3 59884 ... \n", + "4 59885 ... \n", + "... ... ... ...\n", + "2536 964211 ... \n", + "2537 964212 ... \n", + "2538 964215 ... \n", + "2539 964216 ... \n", + "2540 977314 ... \n", + "\n", + "[2541 rows x 26 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Variants_On_Transcripts\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtranscriptideffectidposition_c_startposition_c_start_intronposition_c_endposition_c_end_intronVariantOnTranscript/DNAVariantOnTranscript/RNAVariantOnTranscript/ProteinVariantOnTranscript/Exon
0364267329507558075580c.7558T>Cr.(?)p.(Phe2520Leu)38
1598817329552309023090c.2309A>Cr.(?)p.(Gln770Pro)15
2598837329113444-53444-5c.3444-5C>Tr.(?)p.(=)22i
3598847329154891048910c.4891C>Tr.(?)p.(Pro1631Ser)26
4598857329116079-46079-3c.6079-4_6079-3delr.(?)p.(=)29i
....................................
25369642117329302024-142024-14c.2024-14C>Tr.(=)p.(=)
25379642127329302024-152024-15c.2024-15delr.(=)p.(=)
25389642157329501852018520c.1852G>Ar.(?)p.(Gly618Ser)
25399642167329501765017650c.1765A>Gr.(?)p.(Arg589Gly)
25409773147329908984089840c.8984T>Ar.(?)p.(Ile2995Asn)
\n", + "

2541 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " id transcriptid ... VariantOnTranscript/Protein VariantOnTranscript/Exon\n", + "0 36426 7329 ... p.(Phe2520Leu) 38\n", + "1 59881 7329 ... p.(Gln770Pro) 15\n", + "2 59883 7329 ... p.(=) 22i\n", + "3 59884 7329 ... p.(Pro1631Ser) 26\n", + "4 59885 7329 ... p.(=) 29i\n", + "... ... ... ... ... ...\n", + "2536 964211 7329 ... p.(=) \n", + "2537 964212 7329 ... p.(=) \n", + "2538 964215 7329 ... p.(Gly618Ser) \n", + "2539 964216 7329 ... p.(Arg589Gly) \n", + "2540 977314 7329 ... p.(Ile2995Asn) \n", + "\n", + "[2541 rows x 11 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Screenings_To_Variants\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
screeningidvariantid
0126783293
1211790459
21640235838
31655736426
43316459884
.........
2144449279959046
2145449284959051
2146449293959060
2147449295959474
2148449297959064
\n", + "

2149 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " screeningid variantid\n", + "0 126 783293\n", + "1 211 790459\n", + "2 1640 235838\n", + "3 16557 36426\n", + "4 33164 59884\n", + "... ... ...\n", + "2144 449279 959046\n", + "2145 449284 959051\n", + "2146 449293 959060\n", + "2147 449295 959474\n", + "2148 449297 959064\n", + "\n", + "[2149 rows x 2 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "for i in data:\n", + " print(i)\n", + " display(data[i])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef07740b2fa63e42", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "set_lovd_dtypes(data)\n", + "for i in data:\n", + " print(i)\n", + " display(data[i].info())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c968af1617be40db", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-13T15:38:25.149624Z", + "start_time": "2024-05-13T15:38:24.807199Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Skipping variant g.64307105_64307106del\n", + "WARNING:root:Skipping variant g.65495379dup\n", + "WARNING:root:Skipping variant g.65495379dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64902422_64902438del\n", + "WARNING:root:Skipping variant g.64902422_64902438del\n", + "WARNING:root:Skipping variant g.64840707_64997105del\n", + "WARNING:root:Skipping variant g.64840707_64997105del\n", + "WARNING:root:Skipping variant g.64840707_64997105del\n", + "WARNING:root:Skipping variant g.65295915del\n", + "WARNING:root:Skipping variant g.65295915del\n", + "WARNING:root:Skipping variant g.65295915del\n", + "WARNING:root:Skipping variant g.65057728_65320715del\n", + "WARNING:root:Skipping variant g.65057728_65320715del\n", + "WARNING:root:Skipping variant g.65057728_65320715del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.65384425del\n", + "WARNING:root:Skipping variant g.63720737_63720746del\n", + "WARNING:root:Skipping variant g.63720737_63720746del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64439165del\n", + "WARNING:root:Skipping variant g.64439165del\n", + "WARNING:root:Skipping variant g.64626122del\n", + "WARNING:root:Skipping variant g.65494867del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.65494867del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.65494988_65495003del\n", + "WARNING:root:Skipping variant g.63720919_63720920del\n", + "WARNING:root:Skipping variant g.63720668dup\n", + "WARNING:root:Skipping variant g.63720919_63720920del\n", + "WARNING:root:Skipping variant g.63720668dup\n", + "WARNING:root:Skipping variant g.63999116del\n", + "WARNING:root:Skipping variant g.64591039_64591042del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64822643dup\n", + "WARNING:root:Skipping variant g.65495005_65495008delinsAAG\n", + "WARNING:root:Skipping variant g.65495005_65495008delinsAAG\n", + "WARNING:root:Skipping variant g.65495005_65495008delinsAAG\n", + "WARNING:root:Skipping variant g.65495005_65495008delinsAAG\n", + "WARNING:root:Skipping variant g.64591505_64591506delinsCT\n", + "WARNING:root:Skipping variant g.65495005_65495008delinsAAG\n", + "WARNING:root:Skipping variant g.65495005_65495008delinsAAG\n", + "WARNING:root:Skipping variant g.65384480dup\n", + "WARNING:root:Skipping variant g.63726599_63726600del\n", + "WARNING:root:Skipping variant g.65495005_65495008delinsAAG\n", + "WARNING:root:Skipping variant g.65495005_65495008delinsAAG\n", + "WARNING:root:Skipping variant g.65495005_65495008delinsAAG\n", + "WARNING:root:Skipping variant g.63726599_63726600del\n", + "WARNING:root:Skipping variant g.65495005_65495008delinsAAG\n", + "WARNING:root:Skipping variant g.63726599_63726600del\n", + "WARNING:root:Skipping variant g.65335102del\n", + "WARNING:root:Skipping variant g.65335102del\n", + "WARNING:root:Skipping variant g.65335102del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.63999116del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.63999110_63999111del\n", + "WARNING:root:Skipping variant g.63999110_63999111del\n", + "WARNING:root:Skipping variant g.65321830_65370656del\n", + "WARNING:root:Skipping variant g.63720850_63720853del\n", + "WARNING:root:Skipping variant g.65321830_65370656del\n", + "WARNING:root:Skipping variant g.63720850_63720853del\n", + "WARNING:root:Skipping variant g.65321830_65370656del\n", + "WARNING:root:Skipping variant g.63720850_63720853del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.63806240del\n", + "WARNING:root:Skipping variant g.63806240del\n", + "WARNING:root:Skipping variant g.65495332_65495333dup\n", + "WARNING:root:Skipping variant g.65324960_65416038del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.65295915del\n", + "WARNING:root:Skipping variant g.65494988_65495003del\n", + "WARNING:root:Skipping variant g.65494988_65495003del\n", + "WARNING:root:Skipping variant g.65494988_65495003del\n", + "WARNING:root:Skipping variant g.65494988_65495003del\n", + "WARNING:root:Skipping variant g.65494988_65495003del\n", + "WARNING:root:Skipping variant g.65494988_65495003del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.65384480dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.65344144_65344152delinsCTTTTCG\n", + "WARNING:root:Skipping variant g.63984409_63984410delinsACGAT\n", + "WARNING:root:Skipping variant g.63788163_63788164del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64912603dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590700dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590700dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64591845del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.63984390del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.63721651_63721652insCA\n", + "WARNING:root:Skipping variant g.64590665_64590666del\n", + "WARNING:root:Skipping variant g.63721651_63721652insCA\n", + "WARNING:root:Skipping variant g.64590665_64590666del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64591466dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64591480del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.63788136del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.63720730_63720733del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64912603dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590665_64590666del\n", + "WARNING:root:Skipping variant g.63721652_63721655dup\n", + "WARNING:root:Skipping variant g.64590665_64590666del\n", + "WARNING:root:Skipping variant g.63721652_63721655dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64912603dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64591313del\n", + "WARNING:root:Skipping variant g.64439355_64439356insA\n", + "WARNING:root:Skipping variant g.63984543_63984570del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.63984537_63984542dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64614795_64939832del\n", + "WARNING:root:Skipping variant g.64614795_64939832del\n", + "WARNING:root:Skipping variant g.64614795_64939832del\n", + "WARNING:root:Skipping variant g.63721576del\n", + "WARNING:root:Skipping variant g.64902132_64902133del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63720889dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.65495379dup\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.65495379dup\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.63721625dup\n", + "WARNING:root:Skipping variant g.65405300dup\n", + "WARNING:root:Skipping variant g.64813506del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.63720753_63720754dup\n", + "WARNING:root:Skipping variant g.65405325dup\n", + "WARNING:root:Skipping variant g.65405325dup\n", + "WARNING:root:Skipping variant g.65405325dup\n", + "WARNING:root:Skipping variant g.65405325dup\n", + "WARNING:root:Skipping variant g.65405325dup\n", + "WARNING:root:Skipping variant g.65405325dup\n", + "WARNING:root:Skipping variant g.65405325dup\n", + "WARNING:root:Skipping variant g.65405325dup\n", + "WARNING:root:Skipping variant g.65405325dup\n", + "WARNING:root:Skipping variant g.65405325dup\n", + "WARNING:root:Skipping variant g.65405325dup\n", + "WARNING:root:Skipping variant g.65405325dup\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.65707136_65707226del\n", + "WARNING:root:Skipping variant g.65495348del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.63720845_63720846del\n", + "WARNING:root:Skipping variant g.63726607del\n", + "WARNING:root:Skipping variant g.63721786dup\n", + "WARNING:root:Skipping variant g.63721640del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.65344138dup\n", + "WARNING:root:Skipping variant g.65402503del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590699_64590700del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590699_64590700del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.65384384_65384387del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.65384480dup\n", + "WARNING:root:Skipping variant g.63806228dup\n", + "WARNING:root:Skipping variant g.63999116del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.65384384_65384387del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.63999110_63999111del\n", + "WARNING:root:Skipping variant g.63999110_63999111del\n", + "WARNING:root:Skipping variant g.63999110_63999111del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.64439331del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.65494957dup\n", + "WARNING:root:Skipping variant g.63720995del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.63721771_63721776del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.65494957dup\n", + "WARNING:root:Skipping variant g.64591977del\n", + "WARNING:root:Skipping variant g.64886728_64886736del\n", + "WARNING:root:Skipping variant g.65495379dup\n", + "WARNING:root:Skipping variant g.63721138del\n", + "WARNING:root:Skipping variant g.65353541del\n", + "WARNING:root:Skipping variant g.65353541del\n", + "WARNING:root:Skipping variant g.65494988_65495002del\n", + "WARNING:root:Skipping variant g.65494988_65495002del\n", + "WARNING:root:Skipping variant g.65494988_65495002del\n", + "WARNING:root:Skipping variant g.65494988_65495002del\n", + "WARNING:root:Skipping variant g.65494988_65495002del\n", + "WARNING:root:Skipping variant g.65494988_65495002del\n", + "WARNING:root:Skipping variant g.65494988_65495002del\n", + "WARNING:root:Skipping variant g.65494988_65495002del\n", + "WARNING:root:Skipping variant g.65494988_65495002del\n", + "WARNING:root:Skipping variant g.65494988_65495002del\n", + "WARNING:root:Skipping variant g.65295857dup\n", + "WARNING:root:Skipping variant g.65295856_65295857dup\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.64307105_64307106del\n", + "WARNING:root:Skipping variant g.64307103_64307106del\n", + "WARNING:root:Skipping variant g.63726618_63726622del\n", + "WARNING:root:Skipping variant g.63720642_63720644del\n", + "WARNING:root:Skipping variant g.65295857dup\n", + "WARNING:root:Skipping variant g.64307105_64307106del\n", + "WARNING:root:Skipping variant g.65353541del\n", + "WARNING:root:Skipping variant g.64591918_64591919del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.64590525_64590548del\n", + "WARNING:root:Skipping variant g.64388841_64388843del\n", + "WARNING:root:Skipping variant g.64307084_64307085del\n", + "WARNING:root:Skipping variant g.64307105_64307106del\n", + "WARNING:root:Skipping variant g.64307103_64307106del\n", + "WARNING:root:Skipping variant g.64307105_64307106del\n", + "WARNING:root:Skipping variant g.64593097_64593101del\n", + "WARNING:root:Skipping variant g.65384384_65384387del\n", + "WARNING:root:Skipping variant g.63999110_63999111del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64439213del\n", + "WARNING:root:Skipping variant g.63721771_63721776del\n", + "WARNING:root:Skipping variant g.65494957dup\n", + "WARNING:root:Skipping variant g.65494988_65495008delinsAAAAG\n", + "WARNING:root:Skipping variant g.63720799_63720808del\n", + "WARNING:root:Skipping variant g.63726584del\n", + "WARNING:root:Skipping variant g.63726599_63726600del\n", + "WARNING:root:Skipping variant g.63726648del\n", + "WARNING:root:Skipping variant g.64349976_64426764del\n", + "WARNING:root:Skipping variant g.64591505_64591506delinsCT\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63720728_63720729del\n", + "WARNING:root:Skipping variant g.63720845_63720846del\n", + "WARNING:root:Skipping variant g.63721162del\n", + "WARNING:root:Skipping variant g.63721237_63721240del\n", + "WARNING:root:Skipping variant g.63721343del\n", + "WARNING:root:Skipping variant g.63721730_63721733del\n", + "WARNING:root:Skipping variant g.63721763_63721767del\n", + "WARNING:root:Skipping variant g.63721787dup\n", + "WARNING:root:Skipping variant g.63726618_63726622del\n", + "WARNING:root:Skipping variant g.63788163_63788164del\n", + "WARNING:root:Skipping variant g.63984389del\n", + "WARNING:root:Skipping variant g.64066348del\n", + "WARNING:root:Skipping variant g.64590909dup\n", + "WARNING:root:Skipping variant g.64591309del\n", + "WARNING:root:Skipping variant g.64591858_64591859del\n", + "WARNING:root:Skipping variant g.64886728_64886736del\n", + "WARNING:root:Skipping variant g.64945814del\n", + "WARNING:root:Skipping variant g.65344143_65344144insCTTT\n", + "WARNING:root:Skipping variant g.65344146_65344151del\n", + "WARNING:root:Skipping variant g.65344181dup\n", + "WARNING:root:Skipping variant g.65384473dup\n", + "WARNING:root:Skipping variant g.65405287del\n", + "WARNING:root:Skipping variant g.65494885_65494887del\n", + "WARNING:root:Skipping variant g.65494961del\n", + "WARNING:root:Skipping variant g.65495205del\n", + "WARNING:root:Skipping variant g.65495296_65495297del\n", + "WARNING:root:Skipping variant g.64590909dup\n", + "WARNING:root:Skipping variant g.65384473dup\n", + "WARNING:root:Skipping variant g.65405287del\n", + "WARNING:root:Skipping variant g.65494885_65494887del\n", + "WARNING:root:Skipping variant g.63762589del\n", + "WARNING:root:Skipping variant g.63720874del\n", + "WARNING:root:Skipping variant g.63721619_63721620insGT\n", + "WARNING:root:Skipping variant g.63788268dup\n", + "WARNING:root:Skipping variant g.63984369_63984392del\n", + "WARNING:root:Skipping variant g.65057740_65057741insA\n", + "WARNING:root:Skipping variant g.65057740_65057741insAA\n", + "WARNING:root:Skipping variant g.65057750dup\n", + "WARNING:root:Skipping variant g.65384384_65384387del\n", + "WARNING:root:Skipping variant g.65402624dup\n", + "WARNING:root:Skipping variant g.65405377dup\n", + "WARNING:root:Skipping variant g.65494957dup\n", + "WARNING:root:Skipping variant g.65494957dup\n", + "WARNING:root:Skipping variant g.65495379dup\n", + "WARNING:root:Skipping variant g.63721625dup\n", + "WARNING:root:Skipping variant g.63721704dup\n", + "WARNING:root:Skipping variant g.65353537dup\n", + "WARNING:root:Skipping variant g.65335105_65335108del\n", + "WARNING:root:Skipping variant g.63721385_63721386del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64439195del\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.63720867_63720868del\n", + "WARNING:root:Skipping variant g.63721314_63721321del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63720737_63720746del\n", + "WARNING:root:Skipping variant g.63720737_63720746del\n", + "WARNING:root:Skipping variant g.63720737_63720746del\n", + "WARNING:root:Skipping variant g.63720737_63720746del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63765706_63791377del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63781919_63803805del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64892926_64948294del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64591845del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64945857dup\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.63721557_63721558insA\n", + "WARNING:root:Skipping variant g.64439319_64439323del\n", + "WARNING:root:Skipping variant g.65296021del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.65495181del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.65384425del\n", + "WARNING:root:Skipping variant g.64591039_64591042del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63721599_63721604del\n", + "WARNING:root:Skipping variant g.64902132_64902133del\n", + "WARNING:root:Skipping variant g.65494988_65495008delinsAAAAG\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.65495236del\n", + "WARNING:root:Skipping variant g.64590908_64590909insT\n", + "WARNING:root:Skipping variant g.64617409_64617411dup\n", + "WARNING:root:Skipping variant g.65295857dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.63720845_63720846del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63720668dup\n", + "WARNING:root:Skipping variant g.(63741975_63762461)_(63778180_63788105)dup\n", + "WARNING:root:Skipping variant g.65296051_65296052del\n", + "WARNING:root:Skipping variant g.65296051_65296052del\n", + "WARNING:root:Skipping variant g.64439200dup\n", + "WARNING:root:Skipping variant g.63720737_63720746del\n", + "WARNING:root:Skipping variant g.63720737_63720746del\n", + "WARNING:root:Skipping variant g.65494885_65494887del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63984390del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.65295897del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.65494885_65494887del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590615dup\n", + "WARNING:root:Skipping variant g.63721705del\n", + "WARNING:root:Skipping variant g.64081884del\n", + "WARNING:root:Skipping variant g.65405342_65405355delinsAAA\n", + "WARNING:root:Skipping variant g.65490640_65490643dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.65296058del\n", + "WARNING:root:Skipping variant g.65405345del\n", + "WARNING:root:Skipping variant g.(65405368_65490593)_(65495411_?)del\n", + "WARNING:root:Skipping variant g.(65353618_65384385)_(65405368_65490593)del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64081885_64081888del\n", + "WARNING:root:Skipping variant g.64591401_64591408dup\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.63721237_63721240del\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.64591256_64591272del\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.63721436del\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.64591039_64591042del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.65495181del\n", + "WARNING:root:Skipping variant g.65495181del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.65495181del\n", + "WARNING:root:Skipping variant g.65495181del\n", + "WARNING:root:Skipping variant g.63720695_63720714del\n", + "WARNING:root:Skipping variant g.65384480dup\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.65495181del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.65495181del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.65495064del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.65495181del\n", + "WARNING:root:Skipping variant g.64066470_64066473del\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.65495181del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.65495181del\n", + "WARNING:root:Skipping variant g.64066470_64066473del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.65495181del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.65495181del\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.63720695_63720714del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63720668del\n", + "WARNING:root:Skipping variant g.64912603dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.63720730_63720733del\n", + "WARNING:root:Skipping variant g.64902132_64902133del\n", + "WARNING:root:Skipping variant g.63720954dup\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.64066335del\n", + "WARNING:root:Skipping variant g.64886711del\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.63778166_63778172del\n", + "WARNING:root:Skipping variant g.64590914dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63726618_63726622del\n", + "WARNING:root:Skipping variant g.64886728_64886736del\n", + "WARNING:root:Skipping variant g.64893157_64947352del\n", + "WARNING:root:Skipping variant g.64797009_64846087del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.63726519del\n", + "WARNING:root:Skipping variant g.64997275_64998015del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64081605_64082252del\n", + "WARNING:root:Skipping variant g.63777755_63789474dup\n", + "WARNING:root:Skipping variant g.63720737_63720746del\n", + "WARNING:root:Skipping variant g.64081605_64082252del\n", + "WARNING:root:Skipping variant g.65353208_65353867del\n", + "WARNING:root:Skipping variant g.63984134_63984854del\n", + "WARNING:root:Skipping variant g.63743557_63907234del\n", + "WARNING:root:Skipping variant g.63720737_63720746del\n", + "WARNING:root:Skipping variant g.64662532_64979780del\n", + "WARNING:root:Skipping variant g.64839119_64970113del\n", + "WARNING:root:Skipping variant g.63720872_63720873del\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64081885_64081888del\n", + "WARNING:root:Skipping variant g.63720737_63720746del\n", + "WARNING:root:Skipping variant g.63720737_63720746del\n", + "WARNING:root:Skipping variant g.63720682dup\n", + "WARNING:root:Skipping variant g.63721619dup\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.64388841del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.63720649_63720653del\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.63720695_63720714del\n", + "WARNING:root:Skipping variant g.65384384_65384387del\n", + "WARNING:root:Skipping variant g.63720730_63720733del\n", + "WARNING:root:Skipping variant g.65335105_65335108del\n", + "WARNING:root:Skipping variant g.64439165del\n", + "WARNING:root:Skipping variant g.63999116del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63998527_64002156del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.65495379dup\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.63720730_63720733del\n", + "WARNING:root:Skipping variant g.63720730_63720733del\n", + "WARNING:root:Skipping variant g.63721237_63721240del\n", + "WARNING:root:Skipping variant g.63721237_63721240del\n", + "WARNING:root:Skipping variant g.63721237_63721240del\n", + "WARNING:root:Skipping variant g.64081885_64081888del\n", + "WARNING:root:Skipping variant g.65495348dup\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.64439331del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64912705del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.64439200dup\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.65495181del\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.63721625dup\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.65384480del\n", + "WARNING:root:Skipping variant g.64388841_64388843del\n", + "WARNING:root:Skipping variant g.63998527_64002156del\n", + "WARNING:root:Skipping variant g.64590525_64590550delinsTA\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64081884del\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.216327637C>T\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.215879068C>T\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.215878931del\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.65384480dup\n", + "WARNING:root:Skipping variant g.216073265G>A\n", + "WARNING:root:Skipping variant g.65495206_65495207insTGCCAGTTTA\n", + "WARNING:root:Skipping variant g.63721227dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.63720990_63720991insATAT\n", + "WARNING:root:Skipping variant g.63720728_63720729del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63720947_63720948insT\n", + "WARNING:root:Skipping variant g.64590875_64590876insTCTT\n", + "WARNING:root:Skipping variant g.63721432del\n", + "WARNING:root:Skipping variant g.64591501_64591502insAGAA\n", + "WARNING:root:Skipping variant g.64590556_64590566del\n", + "WARNING:root:Skipping variant g.63999116del\n", + "WARNING:root:Skipping variant g.63720753_63720754dup\n", + "WARNING:root:Skipping variant g.(?_64945792)_(64945915_64997581)del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63720730_63720733del\n", + "WARNING:root:Skipping variant g.63721377_63721384del\n", + "WARNING:root:Skipping variant g.63726524del\n", + "WARNING:root:Skipping variant g.64423168_64798957delinsATGA\n", + "WARNING:root:Skipping variant g.63942752_64337822delinsATTATG\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant g.?\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64423168_64798957delinsATGA\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63942752_64337822delinsATTATG\n", + "WARNING:root:Skipping variant g.63957115_63958454del\n", + "WARNING:root:Skipping variant g.65001113_65005820del\n", + "WARNING:root:Skipping variant g.65550144_65552138del\n", + "WARNING:root:Skipping variant g.65689153_65694794del\n", + "WARNING:root:Skipping variant g.65454073_65454074insN[305]\n", + "WARNING:root:Skipping variant g.64296539_64296632del\n", + "WARNING:root:Skipping variant g.65204982_65205044del\n", + "WARNING:root:Skipping variant g.65564961_65565284del\n", + "WARNING:root:Skipping variant :g.64295412_64295413insN[118]\n", + "WARNING:root:Skipping variant g.65278328_65278329insN[59]\n", + "WARNING:root:Skipping variant g.64590911dup\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64590875_64590876insTCTT\n", + "WARNING:root:Skipping variant g.63720947_63720948insT\n", + "WARNING:root:Skipping variant g.64591501_64591502insAGAA\n", + "WARNING:root:Skipping variant g.63721432del\n", + "WARNING:root:Skipping variant g.64590556_64590566del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.65658176_65718924del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant NM_001142800.2:c.6079-2A>G\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.63999116del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.64986218_65013355del\n", + "WARNING:root:Skipping variant g.64388690_64388840del\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant g.(?_63719980)_(63726681_63762460)dup\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.65274506_65316845delinsAGATCA\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.65494100_65508832del\n", + "WARNING:root:Skipping variant g.65213025_65296862delinsGTTTTCTTTTTA\n", + "WARNING:root:Skipping variant g.64066349del\n", + "WARNING:root:Skipping variant g.64602159_64657461dup\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.63762589del\n", + "WARNING:root:Skipping variant g.65383303_65441305delinsAACTTTTACT\n", + "WARNING:root:Skipping variant g.63720737_63720746del\n", + "WARNING:root:Skipping variant g.65284957_66872862delinsT\n", + "WARNING:root:Skipping variant g.64122444_64129159delins64204448_64235506inv\n", + "WARNING:root:Skipping variant g.64937848_64948401delins[64944099_64944163inv;CAATTTTGTAT]\n", + "WARNING:root:Skipping variant g.63721385_63721386del\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.63720654_63720657del\n", + "WARNING:root:Skipping variant g.64591069_64591081del\n", + "WARNING:root:Skipping variant g.64886841del\n", + "WARNING:root:Skipping variant g.64790603_64977512del\n", + "WARNING:root:Skipping variant g.63721625dup\n", + "WARNING:root:Skipping variant g.64591514_64591520del\n", + "WARNING:root:Skipping variant g.63720649_63720653del\n", + "WARNING:root:Skipping variant g.65479942_67131267inv\n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n", + "WARNING:root:Skipping variant \n" + ] + } + ], + "source": [ + "save_lovd_as_vcf(data[\"Variants_On_Genome\"], \"./lovd.vcf\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c7ff16903e0c52bd", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-13T15:58:47.868055Z", + "start_time": "2024-05-13T15:58:41.380466Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-05-13 18:58:41.794056: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2024-05-13 18:58:41.794769: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2024-05-13 18:58:41.797917: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2024-05-13 18:58:41.857361: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2024-05-13 18:58:42.410244: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "2024-05-13 18:58:42.957291: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n", + "2024-05-13 18:58:42.957684: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n", + "Skipping registering GPU devices...\n", + "WARNING:absl:No training configuration found in the save file, so the model was *not* compiled. Compile it manually.\n", + "WARNING:absl:No training configuration found in the save file, so the model was *not* compiled. Compile it manually.\n", + "WARNING:absl:No training configuration found in the save file, so the model was *not* compiled. Compile it manually.\n", + "WARNING:absl:No training configuration found in the save file, so the model was *not* compiled. Compile it manually.\n", + "WARNING:absl:No training configuration found in the save file, so the model was *not* compiled. Compile it manually.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m1s\u001B[0m 595ms/step\n", + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m1s\u001B[0m 554ms/step\n", + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m1s\u001B[0m 553ms/step\n", + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m1s\u001B[0m 548ms/step\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:5 out of the last 5 calls to .one_step_on_data_distributed at 0x7dee981abf40> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n", + "WARNING:tensorflow:5 out of the last 5 calls to .one_step_on_data_distributed at 0x7dee981abf40> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m1s\u001B[0m 548ms/step\n", + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 49ms/step\n", + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 51ms/step\n", + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 51ms/step\n", + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 59ms/step\n", + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 53ms/step\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:dede['G|EYS|0.00|0.00|0.00|0.00|3|9|-20|9']\n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from subprocess import Popen\n", + "\n", + "\n", + "process = Popen(\"spliceai -I ./lovd.vcf -O ./lovd_output.vcf -R ../tools/spliceai/hg38.fa -A grch38\".split())\n", + "process.wait()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0514ccc3-5c91-41ad-ab15-f4158030ea14", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/nojsaj/kath/tools/revel/revel.py:9: DtypeWarning: Columns (0,2) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " revel_data = pd.read_csv(revel_file)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
chrhg19_posgrch38_posrefaltaarefaaaltREVELEnsembl_transcriptid
2875612766565575864945865TAQL0.188ENST00000503581;ENST00000370621;ENST00000370616
2875612866565575864945865TCQR0.111ENST00000503581;ENST00000370621;ENST00000370616
2875612966565575864945865TGQP0.344ENST00000503581;ENST00000370621;ENST00000370616
\n", + "
" + ], + "text/plain": [ + " chr hg19_pos grch38_pos ref alt aaref aaalt REVEL \\\n", + "28756127 6 65655758 64945865 T A Q L 0.188 \n", + "28756128 6 65655758 64945865 T C Q R 0.111 \n", + "28756129 6 65655758 64945865 T G Q P 0.344 \n", + "\n", + " Ensembl_transcriptid \n", + "28756127 ENST00000503581;ENST00000370621;ENST00000370616 \n", + "28756128 ENST00000503581;ENST00000370621;ENST00000370616 \n", + "28756129 ENST00000503581;ENST00000370621;ENST00000370616 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from tools import get_revel_scores\n", + "\n", + "chromosome = 6\n", + "position = 65655758\n", + "\n", + "results = get_revel_scores(chromosome, position)\n", + "\n", + "display(results)" + ] + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "from api.data.collection import store_database_for_eys_gene\n", + "store_database_for_eys_gene(\"clinvar\", override=False)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-08-12T16:46:07.973915Z", + "start_time": "2024-08-12T16:46:07.970874Z" + } + }, + "id": "b80a1049abe7596e", + "execution_count": 12 + }, + { + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file at ../data/lovd/lovd_data.txt already exists.\n" + ] + } + ], + "source": [ + "from api.data.collection import store_database_for_eys_gene\n", + "store_database_for_eys_gene(\"lovd\", override=False)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-08-12T16:47:56.094297Z", + "start_time": "2024-08-12T16:47:56.090300Z" + } + }, + "id": "a1e10fc8175753a0", + "execution_count": 4 + }, + { + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": " gnomAD ID Chromosome Position rsIDs Reference \\\n0 6-63720525-A-G 6 63720525 rs1768331164 A \n1 6-63720525-A-T 6 63720525 rs1768331164 A \n2 6-63720526-T-A 6 63720526 T \n3 6-63720531-C-CAA 6 63720531 C \n4 6-63720531-C-G 6 63720531 rs927390284 C \n... ... ... ... ... ... \n11083 6-65495478-G-A 6 65495478 rs530118054 G \n11084 6-65495479-G-A 6 65495479 rs1766225632 G \n11085 6-65495482-A-G 6 65495482 rs1766225707 A \n11086 6-65495484-T-G 6 65495484 rs1766225807 T \n11087 6-65495485-T-C 6 65495485 T \n\n Alternate Source Filters - exomes \\\n0 G gnomAD Exomes,gnomAD Genomes PASS \n1 T gnomAD Genomes \n2 A gnomAD Exomes PASS \n3 CAA gnomAD Exomes PASS \n4 G gnomAD Exomes,gnomAD Genomes PASS \n... ... ... ... \n11083 A gnomAD Exomes,gnomAD Genomes PASS \n11084 A gnomAD Exomes PASS \n11085 G gnomAD Exomes,gnomAD Genomes PASS \n11086 G gnomAD Exomes PASS \n11087 C gnomAD Exomes PASS \n\n Filters - genomes Transcript ... Homozygote Count Amish \\\n0 PASS ENST00000503581.6 ... 0 \n1 PASS ENST00000503581.6 ... 0 \n2 ENST00000503581.6 ... 0 \n3 ENST00000503581.6 ... 0 \n4 PASS ENST00000503581.6 ... 0 \n... ... ... ... ... \n11083 PASS ENST00000503581.6 ... 0 \n11084 ENST00000503581.6 ... 0 \n11085 PASS ENST00000503581.6 ... 0 \n11086 ENST00000503581.6 ... 0 \n11087 ENST00000503581.6 ... 0 \n\n Hemizygote Count Amish Allele Count South Asian \\\n0 0 0 \n1 0 0 \n2 0 1 \n3 0 0 \n4 0 0 \n... ... ... \n11083 0 4 \n11084 0 0 \n11085 0 0 \n11086 0 5 \n11087 0 1 \n\n Allele Number South Asian Homozygote Count South Asian \\\n0 55362 0 \n1 55362 0 \n2 55360 0 \n3 57520 0 \n4 57524 0 \n... ... ... \n11083 88352 0 \n11084 88316 0 \n11085 87828 0 \n11086 87818 0 \n11087 87636 0 \n\n Hemizygote Count South Asian Allele Count Remaining \\\n0 0 0 \n1 0 0 \n2 0 0 \n3 0 1 \n4 0 0 \n... ... ... \n11083 0 8 \n11084 0 0 \n11085 0 0 \n11086 0 0 \n11087 0 0 \n\n Allele Number Remaining Homozygote Count Remaining \\\n0 44082 0 \n1 44082 0 \n2 44162 0 \n3 47700 0 \n4 47678 0 \n... ... ... \n11083 60164 0 \n11084 60142 0 \n11085 59586 0 \n11086 59748 0 \n11087 59608 0 \n\n Hemizygote Count Remaining \n0 0 \n1 0 \n2 0 \n3 0 \n4 0 \n... ... \n11083 0 \n11084 0 \n11085 0 \n11086 0 \n11087 0 \n\n[11088 rows x 72 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
gnomAD IDChromosomePositionrsIDsReferenceAlternateSourceFilters - exomesFilters - genomesTranscript...Homozygote Count AmishHemizygote Count AmishAllele Count South AsianAllele Number South AsianHomozygote Count South AsianHemizygote Count South AsianAllele Count RemainingAllele Number RemainingHomozygote Count RemainingHemizygote Count Remaining
06-63720525-A-G663720525rs1768331164AGgnomAD Exomes,gnomAD GenomesPASSPASSENST00000503581.6...000553620004408200
16-63720525-A-T663720525rs1768331164ATgnomAD Genomes<NA>PASSENST00000503581.6...000553620004408200
26-63720526-T-A663720526<NA>TAgnomAD ExomesPASS<NA>ENST00000503581.6...001553600004416200
36-63720531-C-CAA663720531<NA>CCAAgnomAD ExomesPASS<NA>ENST00000503581.6...000575200014770000
46-63720531-C-G663720531rs927390284CGgnomAD Exomes,gnomAD GenomesPASSPASSENST00000503581.6...000575240004767800
..................................................................
110836-65495478-G-A665495478rs530118054GAgnomAD Exomes,gnomAD GenomesPASSPASSENST00000503581.6...004883520086016400
110846-65495479-G-A665495479rs1766225632GAgnomAD ExomesPASS<NA>ENST00000503581.6...000883160006014200
110856-65495482-A-G665495482rs1766225707AGgnomAD Exomes,gnomAD GenomesPASSPASSENST00000503581.6...000878280005958600
110866-65495484-T-G665495484rs1766225807TGgnomAD ExomesPASS<NA>ENST00000503581.6...005878180005974800
110876-65495485-T-C665495485<NA>TCgnomAD ExomesPASS<NA>ENST00000503581.6...001876360005960800
\n

11088 rows × 72 columns

\n
" + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from api.data.refactoring import parse_gnomad, set_gnomad_dtypes\n", + "from api import (store_database_for_eys_gene,GNOMAD_PATH)\n", + "\n", + "store_database_for_eys_gene('gnomad', False)\n", + "\n", + "gnomad_data = parse_gnomad(GNOMAD_PATH+'/gnomad_data.csv')\n", + "\n", + "set_gnomad_dtypes(gnomad_data)\n", + "\n", + "gnomad_data" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-08-20T18:43:31.996838Z", + "start_time": "2024-08-20T18:43:30.905943Z" + } + }, + "id": "4ba7fd02a60f5693", + "execution_count": 1 + }, + { + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file at ../data/lovd/lovd_data.txt already exists.\n" + ] + }, + { + "data": { + "text/plain": " id transcriptid effectid position_c_start \\\n0 170936 7329 90 -538 \n1 235579 7329 99 -332 \n2 235593 7329 99 1300 \n3 235595 7329 99 1300 \n4 235603 7329 99 6572 \n... ... ... ... ... \n13272 822052 7329 70 1767 \n13273 822775 7329 70 0 \n13274 822785 7329 70 0 \n13275 822816 7329 70 0 \n13276 867648 7329 70 0 \n\n position_c_start_intron position_c_end position_c_end_intron \\\n0 0 1599 1 \n1 -1 748 1 \n2 -1 1459 1 \n3 -1 1459 1 \n4 -1 6725 1 \n... ... ... ... \n13272 -1 2023 1 \n13273 0 0 0 \n13274 0 0 0 \n13275 0 0 0 \n13276 0 0 0 \n\n VariantOnTranscript/DNA VariantOnTranscript/RNA \\\n0 c.(?_-538)_(1599+1_1600-1)del r.0? \n1 c.(-333+1_-332-1)_(748+1_749-1)del r.? \n2 c.(1299+1_1300-1)_(1459+1_1460-1)del r.? \n3 c.(1299+1_1300-1)_(1459+1_1460-1)del r.(?) \n4 c.(6571+1_6572-1)_(6725+1_6726-1)del r.? \n... ... ... \n13272 c.(1766+1_1767-1)_(2023+1_2024-1)del r.spl \n13273 c.? r.(?) \n13274 c.? r.(?) \n13275 c.? r.(?) \n13276 c.? r.(?) \n\n VariantOnTranscript/Protein VariantOnTranscript/Exon \\\n0 p.0? _1_10i \n1 p.? 2i_4i \n2 p.? 8i_9i \n3 p.? 8i_9i \n4 p.(Ser2191Thrfs*14) 32i_33i \n... ... ... \n13272 p.(?) \n13273 p.(Tyr2555fs) \n13274 p.(Asp498fs) \n13275 p.(Gln3101fs) \n13276 p.? \n\n VariantOnGenome/DNA/hg38 gnomAD ID_gnomad hg38_gnomad \n0 \n1 \n2 \n3 \n4 \n... ... ... ... \n13272 g.? \n13273 g.? \n13274 g.? \n13275 g.? \n13276 g.? \n\n[13277 rows x 14 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idtranscriptideffectidposition_c_startposition_c_start_intronposition_c_endposition_c_end_intronVariantOnTranscript/DNAVariantOnTranscript/RNAVariantOnTranscript/ProteinVariantOnTranscript/ExonVariantOnGenome/DNA/hg38gnomAD ID_gnomadhg38_gnomad
0170936732990-538015991c.(?_-538)_(1599+1_1600-1)delr.0?p.0?_1_10i<NA><NA>
1235579732999-332-17481c.(-333+1_-332-1)_(748+1_749-1)delr.?p.?2i_4i<NA><NA>
22355937329991300-114591c.(1299+1_1300-1)_(1459+1_1460-1)delr.?p.?8i_9i<NA><NA>
32355957329991300-114591c.(1299+1_1300-1)_(1459+1_1460-1)delr.(?)p.?8i_9i<NA><NA>
42356037329996572-167251c.(6571+1_6572-1)_(6725+1_6726-1)delr.?p.(Ser2191Thrfs*14)32i_33i<NA><NA>
.............................................
132728220527329701767-120231c.(1766+1_1767-1)_(2023+1_2024-1)delr.splp.(?)g.?<NA><NA>
132738227757329700000c.?r.(?)p.(Tyr2555fs)g.?<NA><NA>
132748227857329700000c.?r.(?)p.(Asp498fs)g.?<NA><NA>
132758228167329700000c.?r.(?)p.(Gln3101fs)g.?<NA><NA>
132768676487329700000c.?r.(?)p.?g.?<NA><NA>
\n

13277 rows × 14 columns

\n
" + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from api.data.refactoring import merge_gnomad_lovd, parse_gnomad, set_gnomad_dtypes\n", + "import pandas as pd\n", + "from api import (store_database_for_eys_gene,\n", + " parse_lovd,\n", + " set_lovd_dtypes,\n", + " LOVD_PATH,\n", + " GNOMAD_PATH)\n", + "\n", + "store_database_for_eys_gene('lovd', False)\n", + "store_database_for_eys_gene('gnomad', False)\n", + "\n", + "lovd_data = parse_lovd(LOVD_PATH + \"/lovd_data.txt\")\n", + "gnomad_data = parse_gnomad(GNOMAD_PATH+'/gnomad_data.csv')\n", + "\n", + "set_lovd_dtypes(lovd_data)\n", + "set_gnomad_dtypes(gnomad_data)\n", + "\n", + "variants_on_genome = lovd_data[\"Variants_On_Genome\"].copy()\n", + "\n", + "lovd_data = pd.merge(lovd_data[\"Variants_On_Transcripts\"],\n", + " variants_on_genome[['id','VariantOnGenome/DNA/hg38']],\n", + " on='id',\n", + " how='left')\n", + "\n", + "gnomad_data = gnomad_data.copy()\n", + "final_data = merge_gnomad_lovd(lovd_data, gnomad_data)\n", + "final_data" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-08-21T18:35:42.249375Z", + "start_time": "2024-08-21T18:35:33.312752Z" + } + }, + "id": "dd9b17623f26a07c", + "execution_count": 1 + }, + { + "cell_type": "code", + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "1a3b6e41853817ca", + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}