Skip to content

Commit

Permalink
cadd_integration_test
Browse files Browse the repository at this point in the history
  • Loading branch information
Akaud committed Sep 30, 2024
1 parent dd78258 commit 720c894
Show file tree
Hide file tree
Showing 11 changed files with 517 additions and 150 deletions.
19 changes: 0 additions & 19 deletions api/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,6 @@
GNOMAD_PATH,
)

# DATA COLLECTION IMPORT
from .collection import (
# Custom exceptions
BadResponseException,
DownloadError,

# Custom utility functions
get_file_from_url,

# Functions for downloading databases
download_lovd_database_for_eys_gene,
download_genes_lovd,
download_database_for_eys_gene,
download_data_from_gnomad_eys,

# Functions for storing databases
store_database_for_eys_gene

)

# DATA REFACTORING IMPORT
from .refactoring import (
Expand Down
5 changes: 5 additions & 0 deletions tests/input.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
##fileformat=VCFv4.2
##fileDate=20231001
##reference=GRCh38
#CHROM POS ID REF ALT QUAL FILTER INFO
6 215879068 . C T . . .
158 changes: 158 additions & 0 deletions tests/output.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "e5a89356",
"metadata": {
"tags": [
"papermill-error-cell-tag"
]
},
"source": [
"<span style=\"color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;\">An Exception was encountered at '<a href=\"#papermill-error-cell\">In [1]</a>'.</span>"
]
},
{
"cell_type": "markdown",
"id": "7fa11030",
"metadata": {
"tags": [
"papermill-error-cell-tag"
]
},
"source": [
"<span id=\"papermill-error-cell\" style=\"color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;\">Execution using papermill encountered an exception here and stopped:</span>"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "initial_id",
"metadata": {
"collapsed": true,
"execution": {
"iopub.execute_input": "2024-09-29T18:08:52.223851Z",
"iopub.status.busy": "2024-09-29T18:08:52.223339Z",
"iopub.status.idle": "2024-09-29T18:09:06.722487Z",
"shell.execute_reply": "2024-09-29T18:09:06.721642Z"
},
"papermill": {
"duration": 14.503277,
"end_time": "2024-09-29T18:09:06.723475",
"exception": true,
"start_time": "2024-09-29T18:08:52.220198",
"status": "failed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The file at ../data/lovd/lovd_data.txt already exists.\n"
]
},
{
"ename": "NoSuchWindowException",
"evalue": "Message: Browsing context has been discarded\nStacktrace:\nRemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8\nWebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:193:5\nNoSuchWindowError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:679:5\nassert.that/<@chrome://remote/content/shared/webdriver/Assert.sys.mjs:515:13\nassert.open@chrome://remote/content/shared/webdriver/Assert.sys.mjs:147:4\nGeckoDriver.prototype.navigateTo@chrome://remote/content/marionette/driver.sys.mjs:859:39\ndespatch@chrome://remote/content/marionette/server.sys.mjs:318:40\nexecute@chrome://remote/content/marionette/server.sys.mjs:289:16\nonPacket/<@chrome://remote/content/marionette/server.sys.mjs:262:20\nonPacket@chrome://remote/content/marionette/server.sys.mjs:263:9\n_onJSONObjectReady/<@chrome://remote/content/marionette/transport.sys.mjs:494:20\n",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNoSuchWindowException\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 13\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Fetch and process data\u001b[39;00m\n\u001b[1;32m 12\u001b[0m store_database_for_eys_gene(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlovd\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 13\u001b[0m \u001b[43mstore_database_for_eys_gene\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mgnomad\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 15\u001b[0m lovd_data \u001b[38;5;241m=\u001b[39m parse_lovd(LOVD_PATH \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/lovd_data.txt\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 16\u001b[0m gnomad_data \u001b[38;5;241m=\u001b[39m parse_gnomad(GNOMAD_PATH \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/gnomad_data.csv\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
"File \u001b[0;32m/mnt/c/Users/Vlad/PycharmProjects/kath/api/data/downloading.py:191\u001b[0m, in \u001b[0;36mstore_database_for_eys_gene\u001b[0;34m(database_name, override)\u001b[0m\n\u001b[1;32m 189\u001b[0m download_lovd_database_for_eys_gene(override)\n\u001b[1;32m 190\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 191\u001b[0m \u001b[43mdownload_database_for_eys_gene\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdatabase_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moverride\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/mnt/c/Users/Vlad/PycharmProjects/kath/api/data/downloading.py:167\u001b[0m, in \u001b[0;36mdownload_database_for_eys_gene\u001b[0;34m(database_name, override)\u001b[0m\n\u001b[1;32m 163\u001b[0m firefox_options\u001b[38;5;241m.\u001b[39mset_preference(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbrowser.helperApps.neverAsk.saveToDisk\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 164\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapplication/octet-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 166\u001b[0m driver \u001b[38;5;241m=\u001b[39m webdriver\u001b[38;5;241m.\u001b[39mFirefox(options\u001b[38;5;241m=\u001b[39mfirefox_options)\n\u001b[0;32m--> 167\u001b[0m \u001b[43mdriver\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 168\u001b[0m WebDriverWait(driver, \u001b[38;5;241m30\u001b[39m)\u001b[38;5;241m.\u001b[39muntil(EC\u001b[38;5;241m.\u001b[39melement_to_be_clickable((By\u001b[38;5;241m.\u001b[39mXPATH, clickable)))\n\u001b[1;32m 169\u001b[0m driver\u001b[38;5;241m.\u001b[39mexecute_script(button_location)\n",
"File \u001b[0;32m~/.local/lib/python3.10/site-packages/selenium/webdriver/remote/webdriver.py:363\u001b[0m, in \u001b[0;36mWebDriver.get\u001b[0;34m(self, url)\u001b[0m\n\u001b[1;32m 361\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget\u001b[39m(\u001b[38;5;28mself\u001b[39m, url: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 362\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Loads a web page in the current browser session.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 363\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mCommand\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mGET\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43murl\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/.local/lib/python3.10/site-packages/selenium/webdriver/remote/webdriver.py:354\u001b[0m, in \u001b[0;36mWebDriver.execute\u001b[0;34m(self, driver_command, params)\u001b[0m\n\u001b[1;32m 352\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcommand_executor\u001b[38;5;241m.\u001b[39mexecute(driver_command, params)\n\u001b[1;32m 353\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response:\n\u001b[0;32m--> 354\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43merror_handler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheck_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 355\u001b[0m response[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalue\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_unwrap_value(response\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalue\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 356\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
"File \u001b[0;32m~/.local/lib/python3.10/site-packages/selenium/webdriver/remote/errorhandler.py:229\u001b[0m, in \u001b[0;36mErrorHandler.check_response\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m 227\u001b[0m alert_text \u001b[38;5;241m=\u001b[39m value[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124malert\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception_class(message, screen, stacktrace, alert_text) \u001b[38;5;66;03m# type: ignore[call-arg] # mypy is not smart enough here\u001b[39;00m\n\u001b[0;32m--> 229\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception_class(message, screen, stacktrace)\n",
"\u001b[0;31mNoSuchWindowException\u001b[0m: Message: Browsing context has been discarded\nStacktrace:\nRemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8\nWebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:193:5\nNoSuchWindowError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:679:5\nassert.that/<@chrome://remote/content/shared/webdriver/Assert.sys.mjs:515:13\nassert.open@chrome://remote/content/shared/webdriver/Assert.sys.mjs:147:4\nGeckoDriver.prototype.navigateTo@chrome://remote/content/marionette/driver.sys.mjs:859:39\ndespatch@chrome://remote/content/marionette/server.sys.mjs:318:40\nexecute@chrome://remote/content/marionette/server.sys.mjs:289:16\nonPacket/<@chrome://remote/content/marionette/server.sys.mjs:262:20\nonPacket@chrome://remote/content/marionette/server.sys.mjs:263:9\n_onJSONObjectReady/<@chrome://remote/content/marionette/transport.sys.mjs:494:20\n"
]
}
],
"source": [
"import os\n",
"from api.data.refactoring import merge_gnomad_lovd, parse_gnomad, set_gnomad_dtypes\n",
"from api import (store_database_for_eys_gene,\n",
" parse_lovd,\n",
" set_lovd_dtypes,\n",
" LOVD_PATH,\n",
" GNOMAD_PATH)\n",
"import pandas as pd\n",
"from tests.tools.spliceai.spliceai import add_spliceai_eval_column\n",
"\n",
"# Fetch and process data\n",
"store_database_for_eys_gene('lovd', False)\n",
"store_database_for_eys_gene('gnomad', False)\n",
"\n",
"lovd_data = parse_lovd(LOVD_PATH + \"/lovd_data.txt\")\n",
"gnomad_data = parse_gnomad(GNOMAD_PATH + '/gnomad_data.csv')\n",
"\n",
"# Set correct data types for LOVD and gnomAD data\n",
"set_lovd_dtypes(lovd_data)\n",
"set_gnomad_dtypes(gnomad_data)\n",
"\n",
"# Prepare LOVD data for merging\n",
"variants_on_genome = lovd_data[\"Variants_On_Genome\"].copy()\n",
"lovd_data = pd.merge(lovd_data[\"Variants_On_Transcripts\"],\n",
" variants_on_genome[['id', 'VariantOnGenome/DNA', 'VariantOnGenome/DNA/hg38',\n",
" 'chromosome', 'position_g_start', 'position_g_end']],\n",
" on='id',\n",
" how='left')\n",
"\n",
"gnomad_data = gnomad_data.copy()\n",
"data = merge_gnomad_lovd(lovd_data, gnomad_data)\n",
"first_100_rows = data.head(100).copy()\n",
"\n",
"# Define path for FASTA file\n",
"fasta_path = \"hg38_chr6.fa\"\n",
"\n",
"print(f\"Current working directory: {os.getcwd()}\")\n",
"# Verify the FASTA file's existence\n",
"if os.path.isfile(fasta_path):\n",
" print(\"FASTA file exists and can be read.\")\n",
"else:\n",
" print(f\"FASTA file not found at path: {fasta_path}\")\n",
" raise FileNotFoundError(f\"FASTA file not found at path: {fasta_path}\")\n",
"\n",
"result_data_spliceai = add_spliceai_eval_column(first_100_rows, fasta_path)\n",
"\n",
"# Output the result with SpliceAI scores\n",
"result_data_spliceai"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"papermill": {
"default_parameters": {},
"duration": 22.483974,
"end_time": "2024-09-29T18:09:09.346454",
"environment_variables": {},
"exception": true,
"input_path": "tests/spliceai_test.ipynb",
"output_path": "tests/output.ipynb",
"parameters": {},
"start_time": "2024-09-29T18:08:46.862480",
"version": "2.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Empty file added tests/output.vcf
Empty file.
38 changes: 25 additions & 13 deletions tests/pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
"outputs_hidden": true
},
"ExecuteTime": {
"end_time": "2024-09-14T17:48:45.547065Z",
"start_time": "2024-09-14T17:48:44.657414Z"
"end_time": "2024-09-29T13:28:49.316136Z",
"start_time": "2024-09-29T13:28:49.291455Z"
}
},
"source": [
Expand All @@ -32,16 +32,28 @@
"\n",
"pd.options.display.max_columns = 0"
],
"outputs": [],
"execution_count": 1
"outputs": [
{
"ename": "ImportError",
"evalue": "cannot import name 'store_database_for_eys_gene' from 'api.data' (C:\\Users\\Vlad\\PycharmProjects\\kath\\api\\data\\__init__.py)",
"output_type": "error",
"traceback": [
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[1;31mImportError\u001B[0m Traceback (most recent call last)",
"Cell \u001B[1;32mIn[2], line 4\u001B[0m\n\u001B[0;32m 1\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mpandas\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m \u001B[38;5;21;01mpd\u001B[39;00m\n\u001B[0;32m 2\u001B[0m \u001B[38;5;28;01mimport\u001B[39;00m \u001B[38;5;21;01mrequests\u001B[39;00m\n\u001B[1;32m----> 4\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mapi\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mdata\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m (store_database_for_eys_gene,\n\u001B[0;32m 5\u001B[0m parse_lovd,\n\u001B[0;32m 6\u001B[0m parse_gnomad,\n\u001B[0;32m 7\u001B[0m LOVD_PATH,\n\u001B[0;32m 8\u001B[0m set_lovd_dtypes,\n\u001B[0;32m 9\u001B[0m set_gnomad_dtypes,\n\u001B[0;32m 10\u001B[0m download_data_from_gnomad_eys,\n\u001B[0;32m 11\u001B[0m merge_gnomad_lovd,\n\u001B[0;32m 12\u001B[0m GNOMAD_PATH,\n\u001B[0;32m 13\u001B[0m )\n\u001B[0;32m 14\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mapi\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mdata\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m save_lovd_as_vcf\n\u001B[0;32m 17\u001B[0m pd\u001B[38;5;241m.\u001B[39moptions\u001B[38;5;241m.\u001B[39mdisplay\u001B[38;5;241m.\u001B[39mmax_columns \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m0\u001B[39m\n",
"\u001B[1;31mImportError\u001B[0m: cannot import name 'store_database_for_eys_gene' from 'api.data' (C:\\Users\\Vlad\\PycharmProjects\\kath\\api\\data\\__init__.py)"
]
}
],
"execution_count": 2
},
{
"cell_type": "code",
"id": "f49f7691a27aa7b4",
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-09-14T17:48:45.552787Z",
"end_time": "2024-09-29T13:28:49.338714800Z",
"start_time": "2024-09-14T17:48:45.549075Z"
}
},
Expand All @@ -68,7 +80,7 @@
"outputs_hidden": false
},
"ExecuteTime": {
"end_time": "2024-09-14T17:48:53.637253Z",
"end_time": "2024-09-29T13:28:49.340882700Z",
"start_time": "2024-09-14T17:48:45.553796Z"
}
},
Expand All @@ -82,7 +94,7 @@
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-14T17:51:24.853647Z",
"end_time": "2024-09-29T13:28:49.340951600Z",
"start_time": "2024-09-14T17:48:53.638260Z"
}
},
Expand Down Expand Up @@ -242,7 +254,7 @@
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-14T17:51:24.927680Z",
"end_time": "2024-09-29T13:28:49.341954700Z",
"start_time": "2024-09-14T17:51:24.854656Z"
}
},
Expand All @@ -259,7 +271,7 @@
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-14T17:51:24.982839Z",
"end_time": "2024-09-29T13:28:49.342952600Z",
"start_time": "2024-09-14T17:51:24.928689Z"
}
},
Expand Down Expand Up @@ -1219,7 +1231,7 @@
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-14T17:51:25.045318Z",
"end_time": "2024-09-29T13:28:49.343956Z",
"start_time": "2024-09-14T17:51:24.983847Z"
}
},
Expand All @@ -1232,7 +1244,7 @@
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-14T17:51:25.050214Z",
"end_time": "2024-09-29T13:28:49.354788100Z",
"start_time": "2024-09-14T17:51:25.046323Z"
}
},
Expand All @@ -1257,7 +1269,7 @@
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-14T17:52:20.264323Z",
"end_time": "2024-09-29T13:28:49.355790900Z",
"start_time": "2024-09-14T17:52:20.251983Z"
}
},
Expand Down Expand Up @@ -1414,7 +1426,7 @@
{
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-14T17:52:47.517346Z",
"end_time": "2024-09-29T13:28:49.356790800Z",
"start_time": "2024-09-14T17:52:45.257745Z"
}
},
Expand Down
Loading

0 comments on commit 720c894

Please sign in to comment.