From ea80fbfb56629ec6422498a7ace89d9b084fc1c1 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Thu, 25 Jul 2024 12:26:50 +0200 Subject: [PATCH 001/188] feat: expose summary statistics qc and locus breaker steps to hydra cli (#716) * feat: expose summary statistics qc to hydra cli * feat: expose locus breaker clumping step --------- Co-authored-by: Szymon Szyszkowski --- docs/python_api/steps/locus_breaker_clumping.md | 5 +++++ docs/python_api/steps/summary_statistics_qc.md | 5 +++++ src/gentropy/config.py | 4 ++++ 3 files changed, 14 insertions(+) create mode 100644 docs/python_api/steps/locus_breaker_clumping.md create mode 100644 docs/python_api/steps/summary_statistics_qc.md diff --git a/docs/python_api/steps/locus_breaker_clumping.md b/docs/python_api/steps/locus_breaker_clumping.md new file mode 100644 index 000000000..1e49234e3 --- /dev/null +++ b/docs/python_api/steps/locus_breaker_clumping.md @@ -0,0 +1,5 @@ +--- +title: locus_breaker_clumping +--- + +::: gentropy.locus_breaker_clumping.LocusBreakerClumpingStep diff --git a/docs/python_api/steps/summary_statistics_qc.md b/docs/python_api/steps/summary_statistics_qc.md new file mode 100644 index 000000000..a9fea12b9 --- /dev/null +++ b/docs/python_api/steps/summary_statistics_qc.md @@ -0,0 +1,5 @@ +--- +title: summary_statistics_qc +--- + +::: gentropy.sumstat_qc_step.SummaryStatisticsQCStep diff --git a/src/gentropy/config.py b/src/gentropy/config.py index a6f92669e..90160e962 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -532,3 +532,7 @@ def register_config() -> None: group="step", name="window_based_clumping", node=WindowBasedClumpingStepConfig ) cs.store(group="step", name="susie_finemapping", node=FinemapperConfig) + cs.store(group="step", name="summary_statistics_qc", node=GWASQCStep) + cs.store( + group="step", name="locus_breaker_clumping", node=LocusBreakerClumpingConfig + ) From a5c96b975dada005a1424ec241bbd80149f7a469 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Mon, 29 Jul 2024 11:31:10 +0200 Subject: [PATCH 002/188] fix: change config params to match new name (#721) Co-authored-by: Szymon Szyszkowski --- config/step/ot_gwas_catalog_ingestion.yaml | 2 +- config/step/ot_gwas_catalog_study_inclusion.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/step/ot_gwas_catalog_ingestion.yaml b/config/step/ot_gwas_catalog_ingestion.yaml index fc82b82c2..8acc07d62 100644 --- a/config/step/ot_gwas_catalog_ingestion.yaml +++ b/config/step/ot_gwas_catalog_ingestion.yaml @@ -5,7 +5,7 @@ catalog_study_files: ${datasets.gwas_catalog_studies} catalog_ancestry_files: ${datasets.gwas_catalog_ancestries} catalog_associations_file: ${datasets.gwas_catalog_associations} catalog_sumstats_lut: ${datasets.gwas_catalog_sumstats_lut} -variant_annotation_path: ${datasets.variant_annotation} +variant_annotation_path: ${datasets.gnomad_variants} catalog_studies_out: ${datasets.gwas_catalog_study_index} catalog_associations_out: ${datasets.gwas_catalog_study_locus_folder}/gwas_catalog_curated_associations gwas_catalog_study_curation_file: ${datasets.gwas_catalog_study_curation} diff --git a/config/step/ot_gwas_catalog_study_inclusion.yaml b/config/step/ot_gwas_catalog_study_inclusion.yaml index 7f3bf80b3..41590333c 100644 --- a/config/step/ot_gwas_catalog_study_inclusion.yaml +++ b/config/step/ot_gwas_catalog_study_inclusion.yaml @@ -4,7 +4,7 @@ defaults: catalog_study_files: ${datasets.gwas_catalog_studies} catalog_ancestry_files: ${datasets.gwas_catalog_ancestries} catalog_associations_file: ${datasets.gwas_catalog_associations} -variant_annotation_path: ${datasets.variant_annotation} +variant_annotation_path: ${datasets.gnomad_variants} gwas_catalog_study_curation_file: ${datasets.gwas_catalog_study_curation} harmonised_study_file: ${datasets.gwas_catalog_summary_stats_list} criteria: ??? From 897ae12f2e71b100e8bc223b9b0c7525a5cadac9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 30 Jul 2024 09:33:48 +0100 Subject: [PATCH 003/188] chore: pre-commit autoupdate (#715) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.5.2 → v0.5.5](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.2...v0.5.5) - [github.com/pre-commit/mirrors-mypy: v1.10.1 → v1.11.0](https://github.com/pre-commit/mirrors-mypy/compare/v1.10.1...v1.11.0) - [github.com/jsh9/pydoclint: 0.5.4 → 0.5.6](https://github.com/jsh9/pydoclint/compare/0.5.4...0.5.6) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cd9808f07..a68850464 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ ci: skip: [poetry-lock] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.2 + rev: v0.5.5 hooks: - id: ruff args: @@ -65,7 +65,7 @@ repos: stages: [commit-msg] - repo: https://github.com/pre-commit/mirrors-mypy - rev: "v1.10.1" + rev: "v1.11.0" hooks: - id: mypy args: @@ -98,7 +98,7 @@ repos: - id: beautysh - repo: https://github.com/jsh9/pydoclint - rev: 0.5.4 + rev: 0.5.6 hooks: - id: pydoclint From 397f1e99f1c12c48ae0c91887e10640115fa492e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 31 Jul 2024 16:09:08 +0100 Subject: [PATCH 004/188] build(deps-dev): bump pymdown-extensions from 10.8.1 to 10.9 (#720) Bumps [pymdown-extensions](https://github.com/facelessuser/pymdown-extensions) from 10.8.1 to 10.9. - [Release notes](https://github.com/facelessuser/pymdown-extensions/releases) - [Commits](https://github.com/facelessuser/pymdown-extensions/compare/10.8.1...10.9) --- updated-dependencies: - dependency-name: pymdown-extensions dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index beba69366..8f8fa90bf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6601,13 +6601,13 @@ files = [ [[package]] name = "pymdown-extensions" -version = "10.8.1" +version = "10.9" description = "Extension pack for Python Markdown." optional = false python-versions = ">=3.8" files = [ - {file = "pymdown_extensions-10.8.1-py3-none-any.whl", hash = "sha256:f938326115884f48c6059c67377c46cf631c733ef3629b6eed1349989d1b30cb"}, - {file = "pymdown_extensions-10.8.1.tar.gz", hash = "sha256:3ab1db5c9e21728dabf75192d71471f8e50f216627e9a1fa9535ecb0231b9940"}, + {file = "pymdown_extensions-10.9-py3-none-any.whl", hash = "sha256:d323f7e90d83c86113ee78f3fe62fc9dee5f56b54d912660703ea1816fed5626"}, + {file = "pymdown_extensions-10.9.tar.gz", hash = "sha256:6ff740bcd99ec4172a938970d42b96128bdc9d4b9bcad72494f29921dc69b753"}, ] [package.dependencies] From 0b9af70b711a7e1141e321fc2ed2bdbeb0a6f493 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 8 Aug 2024 09:47:30 +0100 Subject: [PATCH 005/188] build(deps-dev): bump deptry from 0.17.0 to 0.18.0 (#723) Bumps [deptry](https://github.com/fpgmaas/deptry) from 0.17.0 to 0.18.0. - [Release notes](https://github.com/fpgmaas/deptry/releases) - [Changelog](https://github.com/fpgmaas/deptry/blob/main/CHANGELOG.md) - [Commits](https://github.com/fpgmaas/deptry/compare/0.17.0...0.18.0) --- updated-dependencies: - dependency-name: deptry dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 26 +++++++++++++------------- pyproject.toml | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/poetry.lock b/poetry.lock index 8f8fa90bf..64a192984 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1667,22 +1667,22 @@ dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] [[package]] name = "deptry" -version = "0.17.0" +version = "0.18.0" description = "A command line utility to check for unused, missing and transitive dependencies in a Python project." optional = false python-versions = ">=3.8" files = [ - {file = "deptry-0.17.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:ddd05503cbae9cce608003bc50691cb2a6d714a9da30bc16a99116eedad5a0c2"}, - {file = "deptry-0.17.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:31af1dd2f83bddb6cf5abc9f37a86f8ca4b8572fda971a4e7eb0d552a727f454"}, - {file = "deptry-0.17.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0588827e36f4822517fc66308a85428780e15bbce819e2216d0a5d010edd1998"}, - {file = "deptry-0.17.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce0eb1408aae315fa757fc9877101079ea6b2ebcae18b261e5d3e0141ba517b2"}, - {file = "deptry-0.17.0-cp38-abi3-win_amd64.whl", hash = "sha256:d102754cd1f4ba2ed599fccaec54acb6be56bd00e8d03384d0a2bcb8ba8141e1"}, - {file = "deptry-0.17.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:1753b8807c3da82637beb6a0b32df85fea73bcc33a31bcda2087487bd92c336e"}, - {file = "deptry-0.17.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:f34309d3c2f28c459f2e55d93b67c81950cb863e1b210788f3491ab973e42f53"}, - {file = "deptry-0.17.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac6b569c9623e41f1a18f722ddf8422ca7b0d5f718f9d6c71bc9dfcd9e28cf5d"}, - {file = "deptry-0.17.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7de2feebecb256ccee69b0f8144c678763d7842704959239fa7e7f3fc60f8a1"}, - {file = "deptry-0.17.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e4724e014c0787452962833cc3030170d267fbd3ac34f6c09b8449d8e8147f39"}, - {file = "deptry-0.17.0.tar.gz", hash = "sha256:f48a71bab8f46a896fe507c8be5f2b50bb9bab0c44e4dfad00afe87e9a08c14b"}, + {file = "deptry-0.18.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:aac16b9825c67887f84795d3fe3c5a676376cd6cc8555f6f7b57bfd45603e421"}, + {file = "deptry-0.18.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:b1440d2fab960e224b542726e6fcb0d3065635cfa8233c14f6c578faa2766e02"}, + {file = "deptry-0.18.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5764d6b484d488ce0f7085dc1767d99069b476383857aafd3bbc912128892dd"}, + {file = "deptry-0.18.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee0a916d78ba8db092a9454d5bc20fccbadb6ed0e8fb81fc020ba7e0df3578ed"}, + {file = "deptry-0.18.0-cp38-abi3-win_amd64.whl", hash = "sha256:7d1b561a4477ab130e1cb277b3d3aa25743b3005e1bb60076031ec3926b47541"}, + {file = "deptry-0.18.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e73d5c2676a1f49a954baa59c248b56bc940ab87d6070cb164f1394c24e07cf3"}, + {file = "deptry-0.18.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7343bb4948ad625ac1b3109279665004e6790ce01c8dc6a8a2ef1e4424c29773"}, + {file = "deptry-0.18.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdf6da66e31ef8bdace3bb34a86c4f066b5c5296776dd61b76802c72b0b3f5f4"}, + {file = "deptry-0.18.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7590966832f5222d2277612e07e67285d92123ad96cf7713cda579d420d63d1"}, + {file = "deptry-0.18.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:57e09ca29e98c4782197dc959849498941b5c4fc53178e9fe1fa30025e608bfd"}, + {file = "deptry-0.18.0.tar.gz", hash = "sha256:9cf8e398ea394f90ccfa8e11d7dcfba8ed485f6a33270ee2b024475b72a00d11"}, ] [package.dependencies] @@ -8691,4 +8691,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "b33fe69038e16c3ff9272ff76a563c5d91a833b568eb2a46acba8f411bf16eef" +content-hash = "5ebbf7ab3f609d1206d56ff465af3a648b945cb79e8415be61798530943a5911" diff --git a/pyproject.toml b/pyproject.toml index 962799dd8..3a206f4ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,7 @@ apache-airflow = "^2.8.0" apache-airflow-providers-google = "^10.13.1" pydoclint = ">=0.3.8,<0.6.0" prettier = "^0.0.7" -deptry = ">=0.12,<0.18" +deptry = ">=0.12,<0.19" yamllint = "^1.33.0" [tool.semantic_release] From ae734a8f2eae6c4b777c2684c532e52daef7fe78 Mon Sep 17 00:00:00 2001 From: Daniel-Considine <113430683+Daniel-Considine@users.noreply.github.com> Date: Thu, 8 Aug 2024 14:51:39 +0100 Subject: [PATCH 006/188] feat: notebook for locus breaker and susie finemapping benchmark (#717) * feat: notebook for locus breaker and susie finemapping benchmark * fix: won't pass tests with matplotlib imported * feat: ukb_ppp_fm benchmark notebook * fix: check for nulls as well as nans * fix: removing matplotlib * fix: update notebooks * chore: re-run ukb ppp notebook * chore: rerun notebook --- notebooks/gwas_cat_benchmark.ipynb | 1507 ++++++++++++++++++++++++++++ notebooks/ukb_ppp_benchmark.ipynb | 1468 +++++++++++++++++++++++++++ 2 files changed, 2975 insertions(+) create mode 100644 notebooks/gwas_cat_benchmark.ipynb create mode 100644 notebooks/ukb_ppp_benchmark.ipynb diff --git a/notebooks/gwas_cat_benchmark.ipynb b/notebooks/gwas_cat_benchmark.ipynb new file mode 100644 index 000000000..cce01a050 --- /dev/null +++ b/notebooks/gwas_cat_benchmark.ipynb @@ -0,0 +1,1507 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + "
\n", + " \n", + " Loading BokehJS ...\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": "'use strict';\n(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\nconst JS_MIME_TYPE = 'application/javascript';\n const HTML_MIME_TYPE = 'text/html';\n const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n const CLASS_NAME = 'output_bokeh rendered_html';\n\n /**\n * Render data to the DOM node\n */\n function render(props, node) {\n const script = document.createElement(\"script\");\n node.appendChild(script);\n }\n\n /**\n * Handle when an output is cleared or removed\n */\n function handleClearOutput(event, handle) {\n function drop(id) {\n const view = Bokeh.index.get_by_id(id)\n if (view != null) {\n view.model.document.clear()\n Bokeh.index.delete(view)\n }\n }\n\n const cell = handle.cell;\n\n const id = cell.output_area._bokeh_element_id;\n const server_id = cell.output_area._bokeh_server_id;\n\n // Clean up Bokeh references\n if (id != null) {\n drop(id)\n }\n\n if (server_id !== undefined) {\n // Clean up Bokeh references\n const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n cell.notebook.kernel.execute(cmd_clean, {\n iopub: {\n output: function(msg) {\n const id = msg.content.text.trim()\n drop(id)\n }\n }\n });\n // Destroy server and session\n const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n cell.notebook.kernel.execute(cmd_destroy);\n }\n }\n\n /**\n * Handle when a new output is added\n */\n function handleAddOutput(event, handle) {\n const output_area = handle.output_area;\n const output = handle.output;\n\n // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n return\n }\n\n const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n\n if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n // store reference to embed id on output_area\n output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n }\n if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n const bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n const script_attrs = bk_div.children[0].attributes;\n for (let i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n }\n\n function register_renderer(events, OutputArea) {\n\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n const toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[toinsert.length - 1]);\n element.append(toinsert);\n return toinsert\n }\n\n /* Handle when an output is cleared or removed */\n events.on('clear_output.CodeCell', handleClearOutput);\n events.on('delete.Cell', handleClearOutput);\n\n /* Handle when a new output is added */\n events.on('output_added.OutputArea', handleAddOutput);\n\n /**\n * Register the mime type and append_mime function with output_area\n */\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n /* Is output safe? */\n safe: true,\n /* Index of renderer in `output_area.display_order` */\n index: 0\n });\n }\n\n // register the mime type if in Jupyter Notebook environment and previously unregistered\n if (root.Jupyter !== undefined) {\n const events = require('base/js/events');\n const OutputArea = require('notebook/js/outputarea').OutputArea;\n\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n }\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded(error = null) {\n const el = document.getElementById(\"aa16b16c-6126-4fc3-bc14-75eb4b44b50e\");\n if (el != null) {\n const html = (() => {\n if (typeof root.Bokeh === \"undefined\") {\n if (error == null) {\n return \"BokehJS is loading ...\";\n } else {\n return \"BokehJS failed to load.\";\n }\n } else {\n const prefix = `BokehJS ${root.Bokeh.version}`;\n if (error == null) {\n return `${prefix} successfully loaded.`;\n } else {\n return `${prefix} encountered errors while loading and may not function as expected.`;\n }\n }\n })();\n el.innerHTML = html;\n\n if (error != null) {\n const wrapper = document.createElement(\"div\");\n wrapper.style.overflow = \"auto\";\n wrapper.style.height = \"5em\";\n wrapper.style.resize = \"vertical\";\n const content = document.createElement(\"div\");\n content.style.fontFamily = \"monospace\";\n content.style.whiteSpace = \"pre-wrap\";\n content.style.backgroundColor = \"rgb(255, 221, 221)\";\n content.textContent = error.stack ?? error.toString();\n wrapper.append(content);\n el.append(wrapper);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(() => display_loaded(error), 100);\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.4.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.4.1.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n try {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n\n } catch (error) {display_loaded(error);throw error;\n }if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(\"aa16b16c-6126-4fc3-bc14-75eb4b44b50e\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));", + "application/vnd.bokehjs_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# import matplotlib.pyplot as plt\n", + "import pyspark.sql.functions as f\n", + "from gentropy.common.session import Session\n", + "from gentropy.common.spark_helpers import order_array_of_structs_by_field\n", + "from gentropy.dataset.ld_index import LDIndex\n", + "from gentropy.dataset.study_index import StudyIndex\n", + "from gentropy.dataset.study_locus import StudyLocus\n", + "from gentropy.method.susie_inf import SUSIE_inf\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Setting default log level to \"WARN\".\n", + "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "24/08/07 13:43:18 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n", + "24/08/07 13:43:20 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.\n", + "24/08/07 13:43:20 WARN Utils: Service 'SparkUI' could not bind on port 4041. Attempting port 4042.\n" + ] + } + ], + "source": [ + "session = Session(\n", + " extended_spark_conf={\n", + " \"spark.driver.memory\": \"10g\",\n", + " \"spark.executor.memory\": \"10g\",\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Context\n", + "\n", + "Gwas catalog studies with summary statistics were filtered using QC white-list \n", + "Locus breaker clumping was performed on the resulting studies\n", + "\n", + "Parameters: \n", + " lbc_baseline_pvalue: 1e-5, \n", + " lbc_distance_cutoff: 250_000, \n", + " lbc_pvalue_threshold: 1e-8, \n", + " lbc_flanking_distance: 100_000, \n", + " large_loci_size: 1_500_000, \n", + " wbc_clump_distance: 500_000, \n", + " wbc_pvalue_threshold: 1e-8, \n", + " collect_locus: bool = True, \n", + " remove_mhc: bool = True,\n", + "\n", + "Loci with less than 100 variants, or more than 15,000, were filtered out and fine-mapped with PICS.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "24/08/07 13:45:02 WARN SharedInMemoryCache: Evicting cached table partition metadata from memory due to size constraints (spark.sql.hive.filesourcePartitionFileCacheSize = 262144000 bytes). This may impact query planning performance.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "study_index = StudyIndex.from_parquet(\n", + " session, \"/Users/dc16/data/study_index/gwas_catalog/\"\n", + ")\n", + "ld_index = LDIndex.from_parquet(session, \"/Users/dc16/data/ld_index\")\n", + "susie_loci = StudyLocus(\n", + " session.spark.read.parquet(\"/Users/dc16/output/gwas_catalog/clean_loci.parquet\"),\n", + " StudyLocus.get_schema(),\n", + ")\n", + "pics_loci = StudyLocus(\n", + " session.spark.read.parquet(\"/Users/dc16/output/gwas_catalog/filtered_loci.parquet\"),\n", + " StudyLocus.get_schema(),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Susie fine mapping\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculate the total number of unique studyIds\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 7:==============> (1542 + 8) / 5556]\r" + ] + } + ], + "source": [ + "susie_loci.df.select(\"studyId\").distinct().count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculate the total number of loci for finemapping:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/plain": [ + "176096" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "susie_loci.df.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = susie_loci.df.withColumns(\n", + " {\n", + " \"locusSize\": f.size(\"locus\"),\n", + " \"locusLength\": f.col(\"locusEnd\") - f.col(\"locusStart\"),\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0------------------------------\n", + " meanLocusLength | 682633.2350024986 \n", + " q1LocusLength | 327752 \n", + " medianLocusLength | 517819 \n", + " q3LocusLength | 934946 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 21:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0-----------------------------\n", + " meanLocusSize | 3517.7594266763585 \n", + " minLocusSize | 100 \n", + " q1LocusSize | 1383 \n", + " medianLocusSize | 2565 \n", + " q3LocusSize | 4859 \n", + " maxLocusSize | 15000 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "length = df.select(\n", + " f.mean(\"locusLength\").alias(\"meanLocusLength\"),\n", + " f.percentile_approx(\"locusLength\", 0.25).alias(\"q1LocusLength\"),\n", + " f.percentile_approx(\"locusLength\", 0.5).alias(\"medianLocusLength\"),\n", + " f.percentile_approx(\"locusLength\", 0.75).alias(\"q3LocusLength\"),\n", + ")\n", + "size = df.select(\n", + " f.mean(\"locusSize\").alias(\"meanLocusSize\"),\n", + " f.min(\"locusSize\").alias(\"minLocusSize\"),\n", + " f.percentile_approx(\"locusSize\", 0.25).alias(\"q1LocusSize\"),\n", + " f.percentile_approx(\"locusSize\", 0.5).alias(\"medianLocusSize\"),\n", + " f.percentile_approx(\"locusSize\", 0.75).alias(\"q3LocusSize\"),\n", + " f.max(\"locusSize\").alias(\"maxLocusSize\"),\n", + ")\n", + "length.show(vertical=True)\n", + "size.show(vertical=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "panda_df = df.select(\"locusSize\", \"locusLength\").toPandas()\n", + "\n", + "plt.figure(figsize=(12, 6))\n", + "\n", + "# Histogram for locusLength\n", + "plt.subplot(1, 2, 1) # 1 row, 2 columns, 1st subplot\n", + "plt.hist(panda_df[\"locusLength\"], bins=30, alpha=0.7)\n", + "plt.xlabel(\"Locus Length\")\n", + "plt.ylabel(\"Frequency\")\n", + "plt.title(\"Histogram of Locus Length\")\n", + "\n", + "# Histogram for locusSize\n", + "plt.subplot(1, 2, 2) # 1 row, 2 columns, 2nd subplot\n", + "plt.hist(panda_df[\"locusSize\"], bins=30, alpha=0.7)\n", + "plt.xlabel(\"Locus Size\")\n", + "plt.ylabel(\"Frequency\")\n", + "plt.title(\"Histogram of Locus Size\")\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10, 6))\n", + "plt.scatter(panda_df[\"locusSize\"], panda_df[\"locusLength\"], alpha=0.5)\n", + "plt.title(\"Scatter Plot of Locus Size vs Locus Length\")\n", + "plt.xlabel(\"Locus Size\")\n", + "plt.ylabel(\"Locus Length\")\n", + "plt.grid(True)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "susie_fm = StudyLocus.from_parquet(session, \"/Users/dc16/output/gwas_cat_fm\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Total credible sets:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/plain": [ + "632123" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "susie_fm.df.count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It seems a very small fraction of loci have NaN assigned to values for their credible sets\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of credible sets with 'not a number' as the logBF: 39\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 14:====================================================> (175 + 6) / 181]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of credible sets with 'null' as the logBF: 0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "nan = susie_fm.df.filter(f.isnan(\"credibleSetlog10BF\"))\n", + "null = susie_fm.df.filter(f.isnull(\"credibleSetlog10BF\"))\n", + "print(\"Number of credible sets with 'not a number' as the logBF: \", nan.count())\n", + "print(\"Number of credible sets with 'null' as the logBF: \", null.count())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0--------------------------\n", + " meanTopPP | 0.7341557442854078 \n", + " minTopPP | 5.30020287265377E-4 \n", + " q1TopPP | 0.43393218153901475 \n", + " medianTopPP | 0.9841312727728387 \n", + " q3TopPP | 0.9999999999927383 \n", + " maxTopPP | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0------------------------------\n", + " meanCredSetSize | 64.33602495870802 \n", + " minCredSetSize | 1 \n", + " q1CredSetSize | 1 \n", + " medianCredSetSize | 2 \n", + " q3CredSetSize | 13 \n", + " maxCredSetSize | 10710 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0---------------------------------\n", + " meanPurityMeanR2 | 0.6459716225861719 \n", + " minPurityMeanR2 | 0.00469889394266767 \n", + " q1PurityMeanR2 | 0.23746586177412243 \n", + " medianPurityMeanR2 | 0.8203905763487525 \n", + " q3PurityMeanR2 | 1.0 \n", + " maxPurityMeanR2 | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 28:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0---------------------------------\n", + " meanPurityMinR2 | 0.48250211427144807 \n", + " minPurityMinR2 | 0.0 \n", + " q1PurityMinR2 | 6.409217073028031... \n", + " medianPurityMinR2 | 0.46903358691552954 \n", + " q3PurityMinR2 | 1.0 \n", + " maxPurityMinR2 | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "susie_results = (\n", + " susie_fm.df.withColumn(\"credSetSize\", f.size(\"locus\"))\n", + " .withColumn(\n", + " \"locus\",\n", + " f.slice(order_array_of_structs_by_field(\"locus\", \"posteriorProbability\"), 1, 1)[\n", + " 0\n", + " ],\n", + " )\n", + " .withColumn(\"topPP\", f.col(\"locus\").getField(\"posteriorProbability\"))\n", + " .filter(~f.isnan(\"topPP\"))\n", + ")\n", + "(\n", + " susie_results.select(\n", + " f.mean(\"topPP\").alias(\"meanTopPP\"),\n", + " f.min(\"topPP\").alias(\"minTopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.25).alias(\"q1TopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.5).alias(\"medianTopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.75).alias(\"q3TopPP\"),\n", + " f.max(\"topPP\").alias(\"maxTopPP\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " susie_results.select(\n", + " f.mean(\"credSetSize\").alias(\"meanCredSetSize\"),\n", + " f.min(\"credSetSize\").alias(\"minCredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.25).alias(\"q1CredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.5).alias(\"medianCredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.75).alias(\"q3CredSetSize\"),\n", + " f.max(\"credSetSize\").alias(\"maxCredSetSize\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " susie_results.select(\n", + " f.mean(\"purityMeanR2\").alias(\"meanPurityMeanR2\"),\n", + " f.min(\"purityMeanR2\").alias(\"minPurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.25).alias(\"q1PurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.5).alias(\"medianPurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.75).alias(\"q3PurityMeanR2\"),\n", + " f.max(\"purityMeanR2\").alias(\"maxPurityMeanR2\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " susie_results.select(\n", + " f.mean(\"purityMinR2\").alias(\"meanPurityMinR2\"),\n", + " f.min(\"purityMinR2\").alias(\"minPurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.25).alias(\"q1PurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.5).alias(\"medianPurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.75).alias(\"q3PurityMinR2\"),\n", + " f.max(\"purityMinR2\").alias(\"maxPurityMinR2\"),\n", + " ).show(vertical=True)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pdf = susie_results.select(\"purityMinR2\", \"purityMeanR2\", \"topPP\", \"credSetSize\").toPandas()\n", + "plt.figure(figsize=(12, 12))\n", + "\n", + "# Histogram for purityMinR2\n", + "plt.subplot(2, 2, 1)\n", + "plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of purityMinR2\")\n", + "plt.xlabel(\"purityMinR2\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for purityMeanR2\n", + "plt.subplot(2, 2, 2)\n", + "plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of purityMeanR2\")\n", + "plt.xlabel(\"purityMeanR2\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for topPP\n", + "plt.subplot(2, 2, 3)\n", + "plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of topPP\")\n", + "plt.xlabel(\"topPP\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for credSetSize\n", + "plt.subplot(2, 2, 4)\n", + "plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of credSetSize\")\n", + "plt.xlabel(\"credSetSize\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Adjust layout to prevent overlap\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Repeating the same steps, but filtering for only the first credible set" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Total number of primary credible sets and number of unique studyIds:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of primary credible sets: 175991\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 46:====================================================> (176 + 5) / 181]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of unique studyIds in primary credible sets: 5328\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "first_credset = susie_results.filter(f.col(\"credibleSetIndex\") == 1)\n", + "print(\"Number of primary credible sets: \", first_credset.count())\n", + "print(\"Number of unique studyIds in primary credible sets: \", first_credset.select(\"studyId\").distinct().count())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0--------------------------\n", + " meanTopPP | 0.6268846688460689 \n", + " minTopPP | 5.30020287265377E-4 \n", + " q1TopPP | 0.22323522754002711 \n", + " medianTopPP | 0.7547370350843897 \n", + " q3TopPP | 0.9999999999979536 \n", + " maxTopPP | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0------------------------------\n", + " meanCredSetSize | 50.48537709314681 \n", + " minCredSetSize | 1 \n", + " q1CredSetSize | 1 \n", + " medianCredSetSize | 3 \n", + " q3CredSetSize | 19 \n", + " maxCredSetSize | 10710 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0---------------------------------\n", + " meanPurityMeanR2 | 0.674055454136671 \n", + " minPurityMeanR2 | 0.00469889394266767 \n", + " q1PurityMeanR2 | 0.3980306136489271 \n", + " medianPurityMeanR2 | 0.8336824740390751 \n", + " q3PurityMeanR2 | 0.9776068324678869 \n", + " maxPurityMeanR2 | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 61:=====================================================>(178 + 3) / 181]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0---------------------------------\n", + " meanPurityMinR2 | 0.48629879351056204 \n", + " minPurityMinR2 | 0.0 \n", + " q1PurityMinR2 | 3.997840337231423E-6 \n", + " medianPurityMinR2 | 0.5018138600363721 \n", + " q3PurityMinR2 | 0.9490123559383069 \n", + " maxPurityMinR2 | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "(\n", + " first_credset.select(\n", + " f.mean(\"topPP\").alias(\"meanTopPP\"),\n", + " f.min(\"topPP\").alias(\"minTopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.25).alias(\"q1TopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.5).alias(\"medianTopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.75).alias(\"q3TopPP\"),\n", + " f.max(\"topPP\").alias(\"maxTopPP\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " first_credset.select(\n", + " f.mean(\"credSetSize\").alias(\"meanCredSetSize\"),\n", + " f.min(\"credSetSize\").alias(\"minCredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.25).alias(\"q1CredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.5).alias(\"medianCredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.75).alias(\"q3CredSetSize\"),\n", + " f.max(\"credSetSize\").alias(\"maxCredSetSize\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " first_credset.select(\n", + " f.mean(\"purityMeanR2\").alias(\"meanPurityMeanR2\"),\n", + " f.min(\"purityMeanR2\").alias(\"minPurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.25).alias(\"q1PurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.5).alias(\"medianPurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.75).alias(\"q3PurityMeanR2\"),\n", + " f.max(\"purityMeanR2\").alias(\"maxPurityMeanR2\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " first_credset.select(\n", + " f.mean(\"purityMinR2\").alias(\"meanPurityMinR2\"),\n", + " f.min(\"purityMinR2\").alias(\"minPurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.25).alias(\"q1PurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.5).alias(\"medianPurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.75).alias(\"q3PurityMinR2\"),\n", + " f.max(\"purityMinR2\").alias(\"maxPurityMinR2\"),\n", + " ).show(vertical=True)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pdf = first_credset.select(\"purityMinR2\", \"purityMeanR2\", \"topPP\", \"credSetSize\").toPandas()\n", + "plt.figure(figsize=(12, 12))\n", + "\n", + "# Histogram for purityMinR2\n", + "plt.subplot(2, 2, 1)\n", + "plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of purityMinR2\")\n", + "plt.xlabel(\"purityMinR2\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for purityMeanR2\n", + "plt.subplot(2, 2, 2)\n", + "plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of purityMeanR2\")\n", + "plt.xlabel(\"purityMeanR2\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for topPP\n", + "plt.subplot(2, 2, 3)\n", + "plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of topPP\")\n", + "plt.xlabel(\"topPP\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for credSetSize\n", + "plt.subplot(2, 2, 4)\n", + "plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of credSetSize\")\n", + "plt.xlabel(\"credSetSize\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Adjust layout to prevent overlap\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Filtering credible sets with qc function" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "24/08/07 13:49:35 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of high quality credible sets: 220670\n", + "Number of unique studyIds in high quality credible sets: 4862\n" + ] + } + ], + "source": [ + "qc_credsets = SUSIE_inf.credible_set_qc(\n", + " susie_fm, study_index, ld_index, 1e-5, 0.25, 0.8\n", + ").persist()\n", + "\n", + "qc_credsets = (\n", + " qc_credsets.df.withColumn(\"credSetSize\", f.size(\"locus\"))\n", + " .withColumn(\n", + " \"locus\",\n", + " f.slice(order_array_of_structs_by_field(\"locus\", \"posteriorProbability\"), 1, 1)[\n", + " 0\n", + " ],\n", + " )\n", + " .withColumn(\"topPP\", f.col(\"locus\").getField(\"posteriorProbability\"))\n", + " .filter(~f.isnan(\"topPP\"))\n", + ")\n", + "\n", + "print(\"Number of high quality credible sets: \", qc_credsets.count())\n", + "print(\n", + " \"Number of unique studyIds in high quality credible sets: \",\n", + " qc_credsets.select(\"studyId\").distinct().count(),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0---------------------------\n", + " meanTopPP | 0.7351960328222914 \n", + " minTopPP | 0.002487989359821... \n", + " q1TopPP | 0.4347870006604458 \n", + " medianTopPP | 0.9884991069860085 \n", + " q3TopPP | 0.9999999989221352 \n", + " maxTopPP | 1.0 \n", + "\n", + "-RECORD 0------------------------------\n", + " meanCredSetSize | 11.30863667991574 \n", + " minCredSetSize | 1 \n", + " q1CredSetSize | 1 \n", + " medianCredSetSize | 2 \n", + " q3CredSetSize | 8 \n", + " maxCredSetSize | 2681 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0--------------------------------\n", + " meanPurityMeanR2 | 0.9362013839231683 \n", + " minPurityMeanR2 | 0.50525100583065 \n", + " q1PurityMeanR2 | 0.8961545002734692 \n", + " medianPurityMeanR2 | 0.991509701456222 \n", + " q3PurityMeanR2 | 1.0 \n", + " maxPurityMeanR2 | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 192:==================================================> (191 + 8) / 200]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0--------------------------------\n", + " meanPurityMinR2 | 0.8346683610862959 \n", + " minPurityMinR2 | 0.25001241379266637 \n", + " q1PurityMinR2 | 0.6919444964125268 \n", + " medianPurityMinR2 | 0.9832675585945014 \n", + " q3PurityMinR2 | 1.0 \n", + " maxPurityMinR2 | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "(\n", + " qc_credsets.select(\n", + " f.mean(\"topPP\").alias(\"meanTopPP\"),\n", + " f.min(\"topPP\").alias(\"minTopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.25).alias(\"q1TopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.5).alias(\"medianTopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.75).alias(\"q3TopPP\"),\n", + " f.max(\"topPP\").alias(\"maxTopPP\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " qc_credsets.select(\n", + " f.mean(\"credSetSize\").alias(\"meanCredSetSize\"),\n", + " f.min(\"credSetSize\").alias(\"minCredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.25).alias(\"q1CredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.5).alias(\"medianCredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.75).alias(\"q3CredSetSize\"),\n", + " f.max(\"credSetSize\").alias(\"maxCredSetSize\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " qc_credsets.select(\n", + " f.mean(\"purityMeanR2\").alias(\"meanPurityMeanR2\"),\n", + " f.min(\"purityMeanR2\").alias(\"minPurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.25).alias(\"q1PurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.5).alias(\"medianPurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.75).alias(\"q3PurityMeanR2\"),\n", + " f.max(\"purityMeanR2\").alias(\"maxPurityMeanR2\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " qc_credsets.select(\n", + " f.mean(\"purityMinR2\").alias(\"meanPurityMinR2\"),\n", + " f.min(\"purityMinR2\").alias(\"minPurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.25).alias(\"q1PurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.5).alias(\"medianPurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.75).alias(\"q3PurityMinR2\"),\n", + " f.max(\"purityMinR2\").alias(\"maxPurityMinR2\"),\n", + " ).show(vertical=True)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pdf = qc_credsets.select(\"purityMinR2\", \"purityMeanR2\", \"topPP\", \"credSetSize\").toPandas()\n", + "plt.figure(figsize=(12, 12))\n", + "\n", + "# Histogram for purityMinR2\n", + "plt.subplot(2, 2, 1)\n", + "plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of purityMinR2\")\n", + "plt.xlabel(\"purityMinR2\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for purityMeanR2\n", + "plt.subplot(2, 2, 2)\n", + "plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of purityMeanR2\")\n", + "plt.xlabel(\"purityMeanR2\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for topPP\n", + "plt.subplot(2, 2, 3)\n", + "plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of topPP\")\n", + "plt.xlabel(\"topPP\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for credSetSize\n", + "plt.subplot(2, 2, 4)\n", + "plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of credSetSize\")\n", + "plt.xlabel(\"credSetSize\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Adjust layout to prevent overlap\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# WIP" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Clumped loci filtered for usage with PICS\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Number of unique studyIds\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/plain": [ + "231" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pics_loci.df.select(\"studyId\").distinct().count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Number of loci to fine map with PICS\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/plain": [ + "9990" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pics_loci.df.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pics_loci.df.withColumns(\n", + " {\n", + " \"locusSize\": f.size(\"locus\"),\n", + " \"locusLength\": f.col(\"locusEnd\") - f.col(\"locusStart\"),\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How many loci with less than 100 variants from summary statistics?\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/plain": [ + "985" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.filter(f.col(\"locusSize\") < 100).count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How many loci with more than 15,000 variants from summary statistics?\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/plain": [ + "9005" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.filter(f.col(\"locusSize\") > 15_000).count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0------------------------------\n", + " meanLocusLength | 1294768.356956957 \n", + " q1LocusLength | 1231357 \n", + " medianLocusLength | 1500000 \n", + " q3LocusLength | 1500000 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 102:===================================================> (334 + 8) / 346]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0-----------------------------\n", + " meanLocusSize | 18537.992992992993 \n", + " minLocusSize | 1 \n", + " q1LocusSize | 16779 \n", + " medianLocusSize | 19456 \n", + " q3LocusSize | 22436 \n", + " maxLocusSize | 39972 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "length = df.select(\n", + " f.mean(\"locusLength\").alias(\"meanLocusLength\"),\n", + " f.percentile_approx(\"locusLength\", 0.25).alias(\"q1LocusLength\"),\n", + " f.percentile_approx(\"locusLength\", 0.5).alias(\"medianLocusLength\"),\n", + " f.percentile_approx(\"locusLength\", 0.75).alias(\"q3LocusLength\"),\n", + ")\n", + "size = df.select(\n", + " f.mean(\"locusSize\").alias(\"meanLocusSize\"),\n", + " f.min(\"locusSize\").alias(\"minLocusSize\"),\n", + " f.percentile_approx(\"locusSize\", 0.25).alias(\"q1LocusSize\"),\n", + " f.percentile_approx(\"locusSize\", 0.5).alias(\"medianLocusSize\"),\n", + " f.percentile_approx(\"locusSize\", 0.75).alias(\"q3LocusSize\"),\n", + " f.max(\"locusSize\").alias(\"maxLocusSize\"),\n", + ")\n", + "length.show(vertical=True)\n", + "size.show(vertical=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "panda_df = df.select(\"locusSize\", \"locusLength\").toPandas()\n", + "\n", + "plt.figure(figsize=(12, 6))\n", + "\n", + "# Histogram for locusLength\n", + "plt.subplot(1, 2, 1) # 1 row, 2 columns, 1st subplot\n", + "plt.hist(panda_df[\"locusLength\"], bins=30, alpha=0.7)\n", + "plt.xlabel(\"Locus Length\")\n", + "plt.ylabel(\"Frequency\")\n", + "plt.title(\"Histogram of Locus Length\")\n", + "\n", + "# Histogram for locusSize\n", + "plt.subplot(1, 2, 2) # 1 row, 2 columns, 2nd subplot\n", + "plt.hist(panda_df[\"locusSize\"], bins=30, alpha=0.7)\n", + "plt.xlabel(\"Locus Size\")\n", + "plt.ylabel(\"Frequency\")\n", + "plt.title(\"Histogram of Locus Size\")\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10, 6))\n", + "plt.scatter(panda_df[\"locusSize\"], panda_df[\"locusLength\"], alpha=0.5)\n", + "plt.title(\"Scatter Plot of Locus Size vs Locus Length\")\n", + "plt.xlabel(\"Locus Size\")\n", + "plt.ylabel(\"Locus Length\")\n", + "plt.grid(True)\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "gentropy-iQynFIia-py3.10", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/ukb_ppp_benchmark.ipynb b/notebooks/ukb_ppp_benchmark.ipynb new file mode 100644 index 000000000..4a3e200db --- /dev/null +++ b/notebooks/ukb_ppp_benchmark.ipynb @@ -0,0 +1,1468 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + "
\n", + " \n", + " Loading BokehJS ...\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": "'use strict';\n(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\nconst JS_MIME_TYPE = 'application/javascript';\n const HTML_MIME_TYPE = 'text/html';\n const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n const CLASS_NAME = 'output_bokeh rendered_html';\n\n /**\n * Render data to the DOM node\n */\n function render(props, node) {\n const script = document.createElement(\"script\");\n node.appendChild(script);\n }\n\n /**\n * Handle when an output is cleared or removed\n */\n function handleClearOutput(event, handle) {\n function drop(id) {\n const view = Bokeh.index.get_by_id(id)\n if (view != null) {\n view.model.document.clear()\n Bokeh.index.delete(view)\n }\n }\n\n const cell = handle.cell;\n\n const id = cell.output_area._bokeh_element_id;\n const server_id = cell.output_area._bokeh_server_id;\n\n // Clean up Bokeh references\n if (id != null) {\n drop(id)\n }\n\n if (server_id !== undefined) {\n // Clean up Bokeh references\n const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n cell.notebook.kernel.execute(cmd_clean, {\n iopub: {\n output: function(msg) {\n const id = msg.content.text.trim()\n drop(id)\n }\n }\n });\n // Destroy server and session\n const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n cell.notebook.kernel.execute(cmd_destroy);\n }\n }\n\n /**\n * Handle when a new output is added\n */\n function handleAddOutput(event, handle) {\n const output_area = handle.output_area;\n const output = handle.output;\n\n // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n return\n }\n\n const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n\n if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n // store reference to embed id on output_area\n output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n }\n if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n const bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n const script_attrs = bk_div.children[0].attributes;\n for (let i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n }\n\n function register_renderer(events, OutputArea) {\n\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n const toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[toinsert.length - 1]);\n element.append(toinsert);\n return toinsert\n }\n\n /* Handle when an output is cleared or removed */\n events.on('clear_output.CodeCell', handleClearOutput);\n events.on('delete.Cell', handleClearOutput);\n\n /* Handle when a new output is added */\n events.on('output_added.OutputArea', handleAddOutput);\n\n /**\n * Register the mime type and append_mime function with output_area\n */\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n /* Is output safe? */\n safe: true,\n /* Index of renderer in `output_area.display_order` */\n index: 0\n });\n }\n\n // register the mime type if in Jupyter Notebook environment and previously unregistered\n if (root.Jupyter !== undefined) {\n const events = require('base/js/events');\n const OutputArea = require('notebook/js/outputarea').OutputArea;\n\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n }\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded(error = null) {\n const el = document.getElementById(\"d831fede-136f-40d0-a9cd-423fefca302f\");\n if (el != null) {\n const html = (() => {\n if (typeof root.Bokeh === \"undefined\") {\n if (error == null) {\n return \"BokehJS is loading ...\";\n } else {\n return \"BokehJS failed to load.\";\n }\n } else {\n const prefix = `BokehJS ${root.Bokeh.version}`;\n if (error == null) {\n return `${prefix} successfully loaded.`;\n } else {\n return `${prefix} encountered errors while loading and may not function as expected.`;\n }\n }\n })();\n el.innerHTML = html;\n\n if (error != null) {\n const wrapper = document.createElement(\"div\");\n wrapper.style.overflow = \"auto\";\n wrapper.style.height = \"5em\";\n wrapper.style.resize = \"vertical\";\n const content = document.createElement(\"div\");\n content.style.fontFamily = \"monospace\";\n content.style.whiteSpace = \"pre-wrap\";\n content.style.backgroundColor = \"rgb(255, 221, 221)\";\n content.textContent = error.stack ?? error.toString();\n wrapper.append(content);\n el.append(wrapper);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(() => display_loaded(error), 100);\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.4.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.4.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.4.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.4.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.4.1.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n try {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n\n } catch (error) {display_loaded(error);throw error;\n }if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(\"d831fede-136f-40d0-a9cd-423fefca302f\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));", + "application/vnd.bokehjs_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# import matplotlib.pyplot as plt\n", + "import pyspark.sql.functions as f\n", + "from gentropy.common.session import Session\n", + "from gentropy.common.spark_helpers import order_array_of_structs_by_field\n", + "from gentropy.dataset.ld_index import LDIndex\n", + "from gentropy.dataset.study_index import StudyIndex\n", + "from gentropy.dataset.study_locus import StudyLocus\n", + "from gentropy.method.susie_inf import SUSIE_inf\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Setting default log level to \"WARN\".\n", + "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "24/08/07 09:34:15 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" + ] + } + ], + "source": [ + "session = Session(\n", + " extended_spark_conf={\n", + " \"spark.driver.memory\": \"10g\",\n", + " \"spark.executor.memory\": \"10g\",\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Context\n", + "UKB-PPP summary statistics were ingested, a deduplication and sanity filter was run on harmonised summary statistics prior to clumping\n", + "Locus breaker clumping was performed on the resulting studies\n", + "\n", + "Parameters: \n", + " lbc_baseline_pvalue: 1e-5, \n", + " lbc_distance_cutoff: 250_000, \n", + " lbc_pvalue_threshold: 1e-8, \n", + " lbc_flanking_distance: 100_000, \n", + " large_loci_size: 1_500_000, \n", + " wbc_clump_distance: 500_000, \n", + " wbc_pvalue_threshold: 1e-8, \n", + " collect_locus: bool = True, \n", + " remove_mhc: bool = True,\n", + "\n", + "Loci with less than 100 variants, or more than 15,000, were filtered out and fine-mapped with PICS.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "study_index = StudyIndex.from_parquet(\n", + " session, \"/Users/dc16/data/study_index/ukb_ppp/\"\n", + ")\n", + "ld_index = LDIndex.from_parquet(session, \"/Users/dc16/data/ld_index\")\n", + "susie_loci = StudyLocus(\n", + " session.spark.read.parquet(\"/Users/dc16/output/ukb_ppp/clean_loci.parquet/\"),\n", + " StudyLocus.get_schema(),\n", + ")\n", + "pics_loci = StudyLocus(\n", + " session.spark.read.parquet(\"/Users/dc16/output/ukb_ppp/filtered_loci.parquet/\"),\n", + " StudyLocus.get_schema(),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "susie_loci.df = susie_loci.df.filter(f.col(\"pValueExponent\") < -11)\n", + "pics_loci.df = pics_loci.df.filter(f.col(\"pValueExponent\") < -11)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Susie fine mapping\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculate the total number of unique studyIds\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/plain": [ + "2387" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "susie_loci.df.select(\"studyId\").distinct().count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculate the total number of loci for finemapping:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/plain": [ + "17117" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "susie_loci.df.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "df = susie_loci.df.withColumns(\n", + " {\n", + " \"locusSize\": f.size(\"locus\"),\n", + " \"locusLength\": f.col(\"locusEnd\") - f.col(\"locusStart\"),\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0------------------------------\n", + " meanLocusLength | 820240.6895484022 \n", + " q1LocusLength | 371912 \n", + " medianLocusLength | 651459 \n", + " q3LocusLength | 1500000 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 63:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0-----------------------------\n", + " meanLocusSize | 4029.3360986154116 \n", + " minLocusSize | 116 \n", + " q1LocusSize | 1997 \n", + " medianLocusSize | 3348 \n", + " q3LocusSize | 5727 \n", + " maxLocusSize | 13348 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "length = df.select(\n", + " f.mean(\"locusLength\").alias(\"meanLocusLength\"),\n", + " f.percentile_approx(\"locusLength\", 0.25).alias(\"q1LocusLength\"),\n", + " f.percentile_approx(\"locusLength\", 0.5).alias(\"medianLocusLength\"),\n", + " f.percentile_approx(\"locusLength\", 0.75).alias(\"q3LocusLength\"),\n", + ")\n", + "size = df.select(\n", + " f.mean(\"locusSize\").alias(\"meanLocusSize\"),\n", + " f.min(\"locusSize\").alias(\"minLocusSize\"),\n", + " f.percentile_approx(\"locusSize\", 0.25).alias(\"q1LocusSize\"),\n", + " f.percentile_approx(\"locusSize\", 0.5).alias(\"medianLocusSize\"),\n", + " f.percentile_approx(\"locusSize\", 0.75).alias(\"q3LocusSize\"),\n", + " f.max(\"locusSize\").alias(\"maxLocusSize\"),\n", + ")\n", + "length.show(vertical=True)\n", + "size.show(vertical=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAJOCAYAAABm7rQwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAABq3ElEQVR4nO3deVhV5f7//xeDDA6AqICkIjnPmppyHNI0UTmlacccMjXMBiyHUvNUag5pVg6V6alM7Jtm2mlUU8khG3AicZYccCgFLRXEo4hw//7ox/64BU22sDbI83Fd+6q91r3Xft+3uLl97bXu5WKMMQIAAAAAAAAs5OrsAgAAAAAAAFD8EEoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBhUjVqlU1cOBAZ5dx23v99dd15513ys3NTY0bN3Z2ObgJGzZskIuLiz777DNnlwIAKISYQ1mDOdTfi46OlouLi44cOeLsUoAigVAKKCDZv5C2bduW6/527dqpfv36t/w+K1eu1IQJE275OMXFmjVrNHr0aLVq1UoLFizQq6++et22AwcOVOnSpS2sruBVrVpV//znP51dxnUtXrxYs2bNcnYZAAAnYg5VOBX3OdTly5c1e/ZsNWnSRD4+PvLz81O9evU0ZMgQ7d+/39nlAUWWu7MLAPB/EhIS5Oqat6x45cqVmjNnDpOqm7Ru3Tq5urpq/vz58vDwcHY5uMbixYu1e/duDR8+3NmlAACKEOZQBa+4z6F69uypb7/9Vn369NHjjz+ujIwM7d+/X8uXL9c//vEP1a5dW5LUv39/9e7dW56enk6uGCgaCKWAQqQo/vK6cOGCSpUq5ewybtqpU6fk7e1dLCdTAADcrphDFbziPIfaunWrli9frilTpujf//633b533nlH586dsz13c3OTm5ubxRUCRReX7wGFyLXrIWRkZOiVV15RjRo15OXlpXLlyql169aKiYmR9Nep0XPmzJEkubi42B7ZLly4oOeee06VK1eWp6enatWqpTfeeEPGGLv3vXjxop599lmVL19eZcqU0QMPPKDff/9dLi4udt8eTpgwQS4uLtq7d6/69u2rsmXLqnXr1pKknTt3auDAgbrzzjvl5eWloKAgPfbYY/rzzz/t3iv7GL/++qseeeQR+fr6qkKFCnr55ZdljNHx48fVrVs3+fj4KCgoSG+++eZNjd2VK1c0adIkVatWTZ6enqpatar+/e9/Kz093dbGxcVFCxYs0IULF2xjFR0dfVPHv5Fly5apadOm8vb2Vvny5fXII4/o999/z9Fu//796tWrlypUqCBvb2/VqlVLL774om3/wIEDVbVq1Ryvyx6zq8XExKh169by8/NT6dKlVatWrRyTpFvx8ccf2/rk7++v3r176/jx43Ztsi+f2Lt3r9q3b6+SJUvqjjvu0PTp03Mc7+jRo3rggQdUqlQpBQQEaMSIEVq9erVcXFy0YcMG2/FWrFiho0eP2v58rh2PrKwsTZkyRZUqVZKXl5c6dOiggwcP5lu/AQBFE3Mo5lAFOYc6dOiQJKlVq1Y59rm5ualcuXK259euKZVdQ26Pq39ms7KyNGvWLNWrV09eXl4KDAzUE088obNnz96wNqCo40wpoIClpKTojz/+yLE9IyPjb187YcIETZ06VYMHD9bdd9+t1NRUbdu2Tb/88ovuu+8+PfHEEzpx4oRiYmL0//7f/7N7rTFGDzzwgNavX6/IyEg1btxYq1ev1qhRo/T7779r5syZtrYDBw7U0qVL1b9/f7Vs2VLff/+9IiIirlvXv/71L9WoUUOvvvqqbXIWExOjw4cPa9CgQQoKCtKePXv03nvvac+ePdq0aVOOCcHDDz+sOnXqaNq0aVqxYoUmT54sf39//ec//9G9996r1157TYsWLdLzzz+v5s2bq23btjccq8GDB2vhwoV66KGH9Nxzz2nz5s2aOnWq9u3bpy+++EKS9P/+3//Te++9py1btuiDDz6QJP3jH//42z+HG4mOjtagQYPUvHlzTZ06VcnJyZo9e7Z++uknbd++XX5+fpL+mnC2adNGJUqU0JAhQ1S1alUdOnRI33zzjaZMmZKn99yzZ4/++c9/qmHDhpo4caI8PT118OBB/fTTT7fUl2xTpkzRyy+/rF69emnw4ME6ffq03n77bbVt29auT5J09uxZde7cWT169FCvXr302WefacyYMWrQoIG6dOki6a+J/b333quTJ09q2LBhCgoK0uLFi7V+/Xq7933xxReVkpKi3377zfbzee16FNOmTZOrq6uef/55paSkaPr06erXr582b96cL30HABQezKGYQ0mFYw4VEhIiSVq0aJFatWold/eb/2d0jx49VL16dbttcXFxmjVrlgICAmzbnnjiCduYPPvss0pMTNQ777yj7du366efflKJEiXy0FOgCDEACsSCBQuMpBs+6tWrZ/eakJAQM2DAANvzRo0amYiIiBu+T1RUlMntr/KXX35pJJnJkyfbbX/ooYeMi4uLOXjwoDHGmLi4OCPJDB8+3K7dwIEDjSQzfvx427bx48cbSaZPnz453u9///tfjm2ffPKJkWQ2btyY4xhDhgyxbbty5YqpVKmScXFxMdOmTbNtP3v2rPH29rYbk9zEx8cbSWbw4MF2259//nkjyaxbt862bcCAAaZUqVI3PN7Ntr18+bIJCAgw9evXNxcvXrRtX758uZFkxo0bZ9vWtm1bU6ZMGXP06FG7Y2RlZdm9X0hISI73yR6zbDNnzjSSzOnTp2+qH1cLCQm54c/UkSNHjJubm5kyZYrd9l27dhl3d3e77ffcc4+RZD766CPbtvT0dBMUFGR69uxp2/bmm28aSebLL7+0bbt48aKpXbu2kWTWr19v2x4REZHrGKxfv95IMnXq1DHp6em27bNnzzaSzK5du26q/wCAwo85FHOowjaHysrKss17AgMDTZ8+fcycOXNy1GTM//38JiYm5nqs06dPmypVqpgGDRqYtLQ0Y4wxP/zwg5FkFi1aZNd21apVuW4HbidcvgcUsDlz5igmJibHo2HDhn/7Wj8/P+3Zs0cHDhzI8/uuXLlSbm5uevbZZ+22P/fcczLG6Ntvv5UkrVq1SpL09NNP27V75plnrnvsJ598Msc2b29v2/9funRJf/zxh1q2bClJ+uWXX3K0Hzx4sO3/3dzc1KxZMxljFBkZadvu5+enWrVq6fDhw9etRfqrr5I0cuRIu+3PPfecJGnFihU3fL2jtm3bplOnTunpp5+Wl5eXbXtERIRq165te9/Tp09r48aNeuyxx1SlShW7Y1z77efNyP7m8KuvvlJWVpbjHcjF559/rqysLPXq1Ut//PGH7REUFKQaNWrkOLupdOnSeuSRR2zPPTw8dPfdd9v9ma1atUp33HGHHnjgAds2Ly8vPf7443mub9CgQXZrWbRp00aS/vZnBABQ9DCHYg5VWOZQLi4uWr16tSZPnqyyZcvqk08+UVRUlEJCQvTwww/brSl1I5mZmerTp4/Onz+vL774wram2LJly+Tr66v77rvPbv7VtGlTlS5dOsf8C7idEEoBBezuu+9Wx44dczzKli37t6+dOHGizp07p5o1a6pBgwYaNWqUdu7ceVPve/ToUQUHB6tMmTJ22+vUqWPbn/1fV1dXhYaG2rW79jTjq13bVpLOnDmjYcOGKTAwUN7e3qpQoYKtXUpKSo72104sfH195eXlpfLly+fY/nfX0mf34dqag4KC5OfnZ+trfss+bq1atXLsq127tm1/9oQwP25fLf112n6rVq00ePBgBQYGqnfv3lq6dGm+BFQHDhyQMUY1atRQhQoV7B779u3TqVOn7NpXqlQpx6SwbNmydn9mR48eVbVq1XK0u9HP2PVc+3OT/feI9RYA4PbDHIo5VGGaQ3l6eurFF1/Uvn37dOLECX3yySdq2bKlli5dqqFDh97U+7/00ktat26dFi9erGrVqtm2HzhwQCkpKQoICMgx/0pLS8sx/wJuJ6wpBRRibdu21aFDh/TVV19pzZo1+uCDDzRz5kzNmzfP7lsyq139jV62Xr166eeff9aoUaPUuHFjlS5dWllZWercuXOuv+hzuyvJ9e5UYq5ZVPR6HPnGrDC5Xv2ZmZl2z729vbVx40atX79eK1as0KpVq/Tpp5/q3nvv1Zo1a27pji9ZWVlycXHRt99+m+txrl3j6Vb/zPLK6vcDABRNzKH+whyqYOZQFStWVO/evdWzZ0/Vq1dPS5cuVXR09A3Xmvryyy/12muvadKkSercubPdvqysLAUEBGjRokW5vrZChQo3VRdQFHGmFFDI+fv7a9CgQfrkk090/PhxNWzY0O5uLtf7JRwSEqITJ07o/Pnzdtv3799v25/936ysLCUmJtq1y8sdzc6ePau1a9fqhRde0CuvvKIHH3xQ9913n+68886bPsatyO7DtafoJycn69y5c7a+FsT7SlJCQkKOfQkJCbb92eOwe/fuGx6vbNmyuZ7+ndu3lK6ururQoYNmzJihvXv3asqUKVq3bt0tn95drVo1GWMUGhqa67fT2ZcT5EVISIgOHTqUY2Kc289YUZ8UAwAKD+ZQf4851K3NoUqUKKGGDRsqIyMj10X5s/36668aMGCAunfvnuud/qpVq6Y///xTrVq1ynX+1ahRozzXBhQVhFJAIXbtrYBLly6t6tWr292iN/ta9Gt/EXft2lWZmZl655137LbPnDlTLi4utjujhYeHS5Leffddu3Zvv/32TdeZ/a3StaHDrFmzbvoYt6Jr1665vt+MGTMk6YZ3wbkVzZo1U0BAgObNm2f3Z/Ltt99q3759tvetUKGC2rZtqw8//FDHjh2zO8bVY1atWjWlpKTYXV5w8uRJ251vsp05cyZHLY0bN5Ykuzoc0aNHD7m5uemVV17J8edpjMnxM3kzwsPD9fvvv+vrr7+2bbt06ZLef//9HG1LlSqV66UKAADkBXOom8Mc6ubmUAcOHMjx/tJfPzuxsbEqW7bsdc9mSktL04MPPqg77rhDCxcuzDUM7dWrlzIzMzVp0qQc+65cuXLTa1YBRRGX7wGFWN26ddWuXTs1bdpU/v7+2rZtmz777DO769abNm0qSXr22WcVHh4uNzc39e7dW/fff7/at2+vF198UUeOHFGjRo20Zs0affXVVxo+fLjtOvamTZuqZ8+emjVrlv7880/b7Yx//fVXSTd35oqPj4/atm2r6dOnKyMjQ3fccYfWrFmT45vDgtKoUSMNGDBA7733ns6dO6d77rlHW7Zs0cKFC9W9e3e1b9/e4WNnZGRo8uTJObb7+/vr6aef1muvvaZBgwbpnnvuUZ8+fWy3M65atapGjBhha//WW2+pdevWuuuuuzRkyBCFhobqyJEjWrFiheLj4yVJvXv31pgxY/Tggw/q2Wef1f/+9z/NnTtXNWvWtFvodOLEidq4caMiIiIUEhKiU6dO6d1331WlSpXUunXrv+3TwYMHc+1TkyZNFBERocmTJ2vs2LE6cuSIunfvrjJlyigxMVFffPGFhgwZoueffz5PY/jEE0/onXfeUZ8+fTRs2DBVrFhRixYtsi1sevXPWNOmTfXpp59q5MiRat68uUqXLq37778/T+8HAABzqJvDHOrm5lA7duxQ37591aVLF7Vp00b+/v76/ffftXDhQp04cUKzZs267qV/r7zyivbu3auXXnpJX331ld2+atWqKSwsTPfcc4+eeOIJTZ06VfHx8erUqZNKlCihAwcOaNmyZZo9e7YeeuihvAw/UHRYfr8/oJjIvh3s1q1bc91/zz33/O3tjCdPnmzuvvtu4+fnZ7y9vU3t2rXNlClTzOXLl21trly5Yp555hlToUIF4+LiYnfb2/Pnz5sRI0aY4OBgU6JECVOjRg3z+uuv291C1xhjLly4YKKiooy/v78pXbq06d69u0lISDCS7G4vnH1b3dxuo/vbb7+ZBx980Pj5+RlfX1/zr3/9y5w4ceK6t0S+9hjXu3VwbuOUm4yMDPPKK6+Y0NBQU6JECVO5cmUzduxYc+nSpZt6n9wMGDDgureirlatmq3dp59+apo0aWI8PT2Nv7+/6devn/ntt99yHG/37t22MfLy8jK1atUyL7/8sl2bNWvWmPr16xsPDw9Tq1Yt8/HHH+e4nfHatWtNt27dTHBwsPHw8DDBwcGmT58+5tdff/3bPoWEhFy3T5GRkbZ2//3vf03r1q1NqVKlTKlSpUzt2rVNVFSUSUhIsLW53p9NbrdlPnz4sImIiDDe3t6mQoUK5rnnnjP//e9/jSSzadMmW7u0tDTTt29f4+fnZyTZjrN+/XojySxbtszuuImJiUaSWbBgwd/2HQBQNDCHYg51LWfPoZKTk820adPMPffcYypWrGjc3d1N2bJlzb333ms+++wzu7bZP7+JiYl/OxZX/8waY8x7771nmjZtary9vU2ZMmVMgwYNzOjRo82JEyduZtiBIsnFGFaHBZBTfHy8mjRpoo8//lj9+vVzdjm4Dc2aNUsjRozQb7/9pjvuuMPZ5QAAkC+YQwHAzWNNKQC6ePFijm2zZs2Sq6ur2rZt64SKcLu59mfs0qVL+s9//qMaNWoQSAEAiizmUABwa1hTCoCmT5+uuLg4tW/fXu7u7vr222/17bffasiQIapcubKzy8NtoEePHqpSpYoaN26slJQUffzxx9q/f/91b30MAEBRwBwKAG4Nl+8BUExMjG0RxrS0NFWpUkX9+/fXiy++KHd3smvculmzZumDDz7QkSNHlJmZqbp162r06NF6+OGHnV0aAAAOYw4FALeGUAoAAAAAAACWY00pAAAAAAAAWI5QCgAAAAAAAJbjQuebkJWVpRMnTqhMmTJycXFxdjkAAOAWGWN0/vx5BQcHy9WV7+jygnkRAAC3D2fPiQilbsKJEye4ewYAALeh48ePq1KlSs4uo0hhXgQAwO3HWXMiQqmbUKZMGUl//SH5+Pg4uRoAAHCrUlNTVblyZdvveNw85kUAANw+nD0nIpS6Cdmnpvv4+DD5AgDgNsLlZ3nHvAgAgNuPs+ZELKIAAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAy7k7uwAAAHB7i4ze6vBr5w9sno+VAAWHn3MAAPKOM6UAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAJ5s6daqaN2+uMmXKKCAgQN27d1dCQoJdm0uXLikqKkrlypVT6dKl1bNnTyUnJ9u1OXbsmCIiIlSyZEkFBARo1KhRunLlil2bDRs26K677pKnp6eqV6+u6Ojogu4eAABArgilAAAAnOz7779XVFSUNm3apJiYGGVkZKhTp066cOGCrc2IESP0zTffaNmyZfr+++914sQJ9ejRw7Y/MzNTERERunz5sn7++WctXLhQ0dHRGjdunK1NYmKiIiIi1L59e8XHx2v48OEaPHiwVq9ebWl/AQAAJMnd2QUAAAAUd6tWrbJ7Hh0drYCAAMXFxalt27ZKSUnR/PnztXjxYt17772SpAULFqhOnTratGmTWrZsqTVr1mjv3r367rvvFBgYqMaNG2vSpEkaM2aMJkyYIA8PD82bN0+hoaF68803JUl16tTRjz/+qJkzZyo8PNzyfgMAgOKNM6UAAAAKmZSUFEmSv7+/JCkuLk4ZGRnq2LGjrU3t2rVVpUoVxcbGSpJiY2PVoEEDBQYG2tqEh4crNTVVe/bssbW5+hjZbbKPkZv09HSlpqbaPQAAAPIDoRQAAEAhkpWVpeHDh6tVq1aqX7++JCkpKUkeHh7y8/OzaxsYGKikpCRbm6sDqez92ftu1CY1NVUXL17MtZ6pU6fK19fX9qhcufIt9xEAAEAilAIAAChUoqKitHv3bi1ZssTZpUiSxo4dq5SUFNvj+PHjzi4JAADcJlhTCgAAoJAYOnSoli9fro0bN6pSpUq27UFBQbp8+bLOnTtnd7ZUcnKygoKCbG22bNlid7zsu/Nd3ebaO/YlJyfLx8dH3t7eudbk6ekpT0/PW+4bAADAtThTCgAAwMmMMRo6dKi++OILrVu3TqGhoXb7mzZtqhIlSmjt2rW2bQkJCTp27JjCwsIkSWFhYdq1a5dOnTplaxMTEyMfHx/VrVvX1ubqY2S3yT4GAACAlThTCgAAwMmioqK0ePFiffXVVypTpoxtDShfX195e3vL19dXkZGRGjlypPz9/eXj46NnnnlGYWFhatmypSSpU6dOqlu3rvr376/p06crKSlJL730kqKiomxnOj355JN65513NHr0aD322GNat26dli5dqhUrVjit7wAAoPjiTCkAAAAnmzt3rlJSUtSuXTtVrFjR9vj0009tbWbOnKl//vOf6tmzp9q2baugoCB9/vnntv1ubm5avny53NzcFBYWpkceeUSPPvqoJk6caGsTGhqqFStWKCYmRo0aNdKbb76pDz74QOHh4Zb2FwAAQOJMKQAAAKczxvxtGy8vL82ZM0dz5sy5bpuQkBCtXLnyhsdp166dtm/fnucaAQAA8htnSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsVmlBq2rRpcnFx0fDhw23bLl26pKioKJUrV06lS5dWz549lZycbPe6Y8eOKSIiQiVLllRAQIBGjRqlK1eu2LXZsGGD7rrrLnl6eqp69eqKjo62oEcAAAAAAAC4nkIRSm3dulX/+c9/1LBhQ7vtI0aM0DfffKNly5bp+++/14kTJ9SjRw/b/szMTEVEROjy5cv6+eeftXDhQkVHR2vcuHG2NomJiYqIiFD79u0VHx+v4cOHa/DgwVq9erVl/QMAAAAAAIA9p4dSaWlp6tevn95//32VLVvWtj0lJUXz58/XjBkzdO+996pp06ZasGCBfv75Z23atEmStGbNGu3du1cff/yxGjdurC5dumjSpEmaM2eOLl++LEmaN2+eQkND9eabb6pOnToaOnSoHnroIc2cOdMp/QUAAAAAAEAhCKWioqIUERGhjh072m2Pi4tTRkaG3fbatWurSpUqio2NlSTFxsaqQYMGCgwMtLUJDw9Xamqq9uzZY2tz7bHDw8Ntx8hNenq6UlNT7R4AAAAAAADIP+7OfPMlS5bol19+0datW3PsS0pKkoeHh/z8/Oy2BwYGKikpydbm6kAqe3/2vhu1SU1N1cWLF+Xt7Z3jvadOnapXXnnF4X4BAAAAAADgxpx2ptTx48c1bNgwLVq0SF5eXs4qI1djx45VSkqK7XH8+HFnlwQAAAAAAHBbcVooFRcXp1OnTumuu+6Su7u73N3d9f333+utt96Su7u7AgMDdfnyZZ07d87udcnJyQoKCpIkBQUF5bgbX/bzv2vj4+OT61lSkuTp6SkfHx+7BwAAAAAAAPKP00KpDh06aNeuXYqPj7c9mjVrpn79+tn+v0SJElq7dq3tNQkJCTp27JjCwsIkSWFhYdq1a5dOnTplaxMTEyMfHx/VrVvX1ubqY2S3yT4GAAAAAAAArOe0NaXKlCmj+vXr220rVaqUypUrZ9seGRmpkSNHyt/fXz4+PnrmmWcUFhamli1bSpI6deqkunXrqn///po+fbqSkpL00ksvKSoqSp6enpKkJ598Uu+8845Gjx6txx57TOvWrdPSpUu1YsUKazsMAAAAAAAAG6cudP53Zs6cKVdXV/Xs2VPp6ekKDw/Xu+++a9vv5uam5cuX66mnnlJYWJhKlSqlAQMGaOLEibY2oaGhWrFihUaMGKHZs2erUqVK+uCDDxQeHu6MLgEAAAB2IqNz3vTnZs0f2DwfKwEAwFqFKpTasGGD3XMvLy/NmTNHc+bMue5rQkJCtHLlyhset127dtq+fXt+lAgAAAAAAIB84LQ1pQAAAAAAAFB8EUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcu7OLgAAAACAYyKjtzr82vkDm+djJQAA5B1nSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAABQCGzdu1P3336/g4GC5uLjoyy+/tNs/cOBAubi42D06d+5s1+bMmTPq16+ffHx85Ofnp8jISKWlpdm12blzp9q0aSMvLy9VrlxZ06dPL+iuAQAA5IpQCgAAoBC4cOGCGjVqpDlz5ly3TefOnXXy5Enb45NPPrHb369fP+3Zs0cxMTFavny5Nm7cqCFDhtj2p6amqlOnTgoJCVFcXJxef/11TZgwQe+9916B9QsAAOB63J1dAAAAAKQuXbqoS5cuN2zj6empoKCgXPft27dPq1at0tatW9WsWTNJ0ttvv62uXbvqjTfeUHBwsBYtWqTLly/rww8/lIeHh+rVq6f4+HjNmDHDLrwCAACwAmdKAQAAFBEbNmxQQECAatWqpaeeekp//vmnbV9sbKz8/PxsgZQkdezYUa6urtq8ebOtTdu2beXh4WFrEx4eroSEBJ09e9a6jgAAAIgzpQAAAIqEzp07q0ePHgoNDdWhQ4f073//W126dFFsbKzc3NyUlJSkgIAAu9e4u7vL399fSUlJkqSkpCSFhobatQkMDLTtK1u2bI73TU9PV3p6uu15ampqfncNAAAUU4RSAAAARUDv3r1t/9+gQQM1bNhQ1apV04YNG9ShQ4cCe9+pU6fqlVdeKbDjAwCA4ovL9wAAAIqgO++8U+XLl9fBgwclSUFBQTp16pRdmytXrujMmTO2daiCgoKUnJxs1yb7+fXWqho7dqxSUlJsj+PHj+d3VwAAQDFFKAUAAFAE/fbbb/rzzz9VsWJFSVJYWJjOnTunuLg4W5t169YpKytLLVq0sLXZuHGjMjIybG1iYmJUq1atXC/dk/5aXN3Hx8fuAQAAkB8IpQAAAAqBtLQ0xcfHKz4+XpKUmJio+Ph4HTt2TGlpaRo1apQ2bdqkI0eOaO3aterWrZuqV6+u8PBwSVKdOnXUuXNnPf7449qyZYt++uknDR06VL1791ZwcLAkqW/fvvLw8FBkZKT27NmjTz/9VLNnz9bIkSOd1W0AAFCMEUoBAAAUAtu2bVOTJk3UpEkTSdLIkSPVpEkTjRs3Tm5ubtq5c6ceeOAB1axZU5GRkWratKl++OEHeXp62o6xaNEi1a5dWx06dFDXrl3VunVrvffee7b9vr6+WrNmjRITE9W0aVM999xzGjdunIYMGWJ5fwEAAFjoHAAAoBBo166djDHX3b969eq/PYa/v78WL158wzYNGzbUDz/8kOf6AAAA8htnSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwnLuzCwAAAACcLTJ6q7NLAACg2OFMKQAAAAAAAFiOUAoAAAAAAACWI5QCAAAAAACA5QilAAAAAAAAYDlCKQAAAAAAAFiOUAoAAAAAAACWI5QCAAAAAACA5QilAAAAAAAAYDmnhlJz585Vw4YN5ePjIx8fH4WFhenbb7+17b906ZKioqJUrlw5lS5dWj179lRycrLdMY4dO6aIiAiVLFlSAQEBGjVqlK5cuWLXZsOGDbrrrrvk6emp6tWrKzo62oruAQAAAAAA4DqcGkpVqlRJ06ZNU1xcnLZt26Z7771X3bp10549eyRJI0aM0DfffKNly5bp+++/14kTJ9SjRw/b6zMzMxUREaHLly/r559/1sKFCxUdHa1x48bZ2iQmJioiIkLt27dXfHy8hg8frsGDB2v16tWW9xcAAAAAAAB/cTHGGGcXcTV/f3+9/vrreuihh1ShQgUtXrxYDz30kCRp//79qlOnjmJjY9WyZUt9++23+uc//6kTJ04oMDBQkjRv3jyNGTNGp0+floeHh8aMGaMVK1Zo9+7dtvfo3bu3zp07p1WrVt1UTampqfL19VVKSop8fHzyv9MAANzGIqO3Ovza+QOb52Ml/4ff7Y67XcfuVn5Oi6qC+vsFACg6nP17vdCsKZWZmaklS5bowoULCgsLU1xcnDIyMtSxY0dbm9q1a6tKlSqKjY2VJMXGxqpBgwa2QEqSwsPDlZqaajvbKjY21u4Y2W2yjwEAAAAAAADruTu7gF27diksLEyXLl1S6dKl9cUXX6hu3bqKj4+Xh4eH/Pz87NoHBgYqKSlJkpSUlGQXSGXvz953ozapqam6ePGivL29c9SUnp6u9PR02/PU1NRb7icAAAAAAAD+j9PPlKpVq5bi4+O1efNmPfXUUxowYID27t3r1JqmTp0qX19f26Ny5cpOrQcAAAAAAOB24/RQysPDQ9WrV1fTpk01depUNWrUSLNnz1ZQUJAuX76sc+fO2bVPTk5WUFCQJCkoKCjH3fiyn/9dGx8fn1zPkpKksWPHKiUlxfY4fvx4fnQVAAAAAAAA/z+nh1LXysrKUnp6upo2baoSJUpo7dq1tn0JCQk6duyYwsLCJElhYWHatWuXTp06ZWsTExMjHx8f1a1b19bm6mNkt8k+Rm48PT3l4+Nj9wAAAAAAAED+ceqaUmPHjlWXLl1UpUoVnT9/XosXL9aGDRu0evVq+fr6KjIyUiNHjpS/v798fHz0zDPPKCwsTC1btpQkderUSXXr1lX//v01ffp0JSUl6aWXXlJUVJQ8PT0lSU8++aTeeecdjR49Wo899pjWrVunpUuXasWKFc7sOgAAAAAAQLHm1FDq1KlTevTRR3Xy5En5+vqqYcOGWr16te677z5J0syZM+Xq6qqePXsqPT1d4eHhevfdd22vd3Nz0/Lly/XUU08pLCxMpUqV0oABAzRx4kRbm9DQUK1YsUIjRozQ7NmzValSJX3wwQcKDw+3vL8AAAAAAAD4i1NDqfnz599wv5eXl+bMmaM5c+Zct01ISIhWrlx5w+O0a9dO27dvd6hGAAAAAAAA5L9Ct6YUAAAAAAAAbn+EUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAs51Aodfjw4fyuAwAAoEhiXgQAAOAYh0Kp6tWrq3379vr444916dKl/K4JAACgyGBeBAAA4BiHQqlffvlFDRs21MiRIxUUFKQnnnhCW7Zsye/aAAAACj3mRQAAAI5xKJRq3LixZs+erRMnTujDDz/UyZMn1bp1a9WvX18zZszQ6dOn87tOAACAQol5EQAAgGNuaaFzd3d39ejRQ8uWLdNrr72mgwcP6vnnn1flypX16KOP6uTJk/lVJwAAQKHGvAgAACBvbimU2rZtm55++mlVrFhRM2bM0PPPP69Dhw4pJiZGJ06cULdu3fKrTgAAgEKNeREAAEDeuDvyohkzZmjBggVKSEhQ165d9dFHH6lr165ydf0r4woNDVV0dLSqVq2an7UCAAAUOsyLAAAAHONQKDV37lw99thjGjhwoCpWrJhrm4CAAM2fP/+WigMAACjsmBcBAAA4xqFQ6sCBA3/bxsPDQwMGDHDk8AAAAEUG8yIAAADHOLSm1IIFC7Rs2bIc25ctW6aFCxfeclEAAABFBfMiAAAAxzgUSk2dOlXly5fPsT0gIECvvvrqLRcFAABQVDAvAgAAcIxDodSxY8cUGhqaY3tISIiOHTt2y0UBAAAUFcyLAAAAHONQKBUQEKCdO3fm2L5jxw6VK1fulosCAAAoKpgXAQAAOMahUKpPnz569tlntX79emVmZiozM1Pr1q3TsGHD1Lt37/yuEQAAoNBiXgQAAOAYh+6+N2nSJB05ckQdOnSQu/tfh8jKytKjjz7K2gkAAKBYYV4EAADgGIdCKQ8PD3366aeaNGmSduzYIW9vbzVo0EAhISH5XR8AAEChxrwIAADAMQ6FUtlq1qypmjVr5lctAAAARRbzIgAAgLxxKJTKzMxUdHS01q5dq1OnTikrK8tu/7p16/KlOAAAgMKOeREAAIBjHAqlhg0bpujoaEVERKh+/fpycXHJ77oAAACKBOZFAAAAjnEolFqyZImWLl2qrl275nc9AAAARQrzIgAAAMe4OvIiDw8PVa9ePb9rAQAAKHKYFwEAADjGoVDqueee0+zZs2WMye96AAAAihTmRQAAAI5x6PK9H3/8UevXr9e3336revXqqUSJEnb7P//883wpDgAAoLBjXgQAAOAYh0IpPz8/Pfjgg/ldCwAAQJHDvAgAAMAxDoVSCxYsyO86AAAAiiTmRQAAAI5xaE0pSbpy5Yq+++47/ec//9H58+clSSdOnFBaWlq+FQcAAFAUMC8CAADIO4fOlDp69Kg6d+6sY8eOKT09Xffdd5/KlCmj1157Tenp6Zo3b15+1wkAAFAoMS8CAABwjENnSg0bNkzNmjXT2bNn5e3tbdv+4IMPau3atflWHAAAQGHHvAgAAMAxDp0p9cMPP+jnn3+Wh4eH3faqVavq999/z5fCAAAAigLmRQAAAI5x6EyprKwsZWZm5tj+22+/qUyZMrdcFAAAQFHBvAgAAMAxDoVSnTp10qxZs2zPXVxclJaWpvHjx6tr1675VRsAAEChx7wIAADAMQ5dvvfmm28qPDxcdevW1aVLl9S3b18dOHBA5cuX1yeffJLfNQIAABRazIsAAAAc41AoValSJe3YsUNLlizRzp07lZaWpsjISPXr189ugU8AAIDbHfMiAAAAxzgUSkmSu7u7HnnkkfysBQAAoEhiXgQAAJB3DoVSH3300Q33P/roow4VAwAAUNQwLwIAAHCMQ6HUsGHD7J5nZGTof//7nzw8PFSyZEkmXwAAoNhgXgQAAOAYh+6+d/bsWbtHWlqaEhIS1Lp1axb0BAAAxQrzIgAAAMc4FErlpkaNGpo2bVqObwsBAACKG+ZFAAAAfy/fQinpr0U+T5w4kZ+HBAAAKJKYFwEAANyYQ2tKff3113bPjTE6efKk3nnnHbVq1SpfCgMAACgKmBcBAAA4xqFQqnv37nbPXVxcVKFCBd177716880386MuAACAIoF5EQAAgGMcCqWysrLyuw4AAIAiiXkRAACAY/J1TSkAAAAAAADgZjh0ptTIkSNvuu2MGTMceQsAAIAigXkRAACAYxwKpbZv367t27crIyNDtWrVkiT9+uuvcnNz01133WVr5+Likj9VAgAAFFLMiwAAABzjUCh1//33q0yZMlq4cKHKli0rSTp79qwGDRqkNm3a6LnnnsvXIgEAAAor5kUAAACOcWhNqTfffFNTp061TbwkqWzZspo8eTJ3mQEAAMUK8yIAAADHOBRKpaam6vTp0zm2nz59WufPn7/logAAAIoK5kUAAACOcejyvQcffFCDBg3Sm2++qbvvvluStHnzZo0aNUo9evTI1wIBAAAKM+ZFKKoio7c6/Nr5A5vnYyUAgOLKoVBq3rx5ev7559W3b19lZGT8dSB3d0VGRur111/P1wIBAAAKM+ZFAAAAjnEolCpZsqTeffddvf766zp06JAkqVq1aipVqlS+FgcAAFDYMS8CAABwjENrSmU7efKkTp48qRo1aqhUqVIyxuRXXQAAAEUK8yIAAIC8cSiU+vPPP9WhQwfVrFlTXbt21cmTJyVJkZGR3PYYAAAUK8yLAAAAHONQKDVixAiVKFFCx44dU8mSJW3bH374Ya1atSrfigMAACjsmBcBAAA4xqE1pdasWaPVq1erUqVKdttr1Kiho0eP5kthAAAARQHzIgAAAMc4dKbUhQsX7L4JzHbmzBl5enreclEAAABFBfMiAAAAxzgUSrVp00YfffSR7bmLi4uysrI0ffp0tW/fPt+KAwAAKOyYFwEAADjGocv3pk+frg4dOmjbtm26fPmyRo8erT179ujMmTP66aef8rtGAACAQot5EQAAgGMcOlOqfv36+vXXX9W6dWt169ZNFy5cUI8ePbR9+3ZVq1Ytv2sEAAAotJgXAQAAOCbPZ0plZGSoc+fOmjdvnl588cWCqAkAAKBIYF4EAADguDyfKVWiRAnt3LmzIGoBAAAoUpgXAQAAOM6hy/ceeeQRzZ8/P79rAQAAKHKYFwEAADjGoYXOr1y5og8//FDfffedmjZtqlKlStntnzFjRr4UBwAAUNjl17xo48aNev311xUXF6eTJ0/qiy++UPfu3W37jTEaP3683n//fZ07d06tWrXS3LlzVaNGDVubM2fO6JlnntE333wjV1dX9ezZU7Nnz1bp0qVtbXbu3KmoqCht3bpVFSpU0DPPPKPRo0ff2iAAAAA4IE+h1OHDh1W1alXt3r1bd911lyTp119/tWvj4uKSf9UBAAAUUvk9L7pw4YIaNWqkxx57TD169Mixf/r06Xrrrbe0cOFChYaG6uWXX1Z4eLj27t0rLy8vSVK/fv108uRJxcTEKCMjQ4MGDdKQIUO0ePFiSVJqaqo6deqkjh07at68edq1a5cee+wx+fn5aciQIY4OBQAAgEPyFErVqFFDJ0+e1Pr16yVJDz/8sN566y0FBgYWSHEAAACFVX7Pi7p06aIuXbrkus8Yo1mzZumll15St27dJEkfffSRAgMD9eWXX6p3797at2+fVq1apa1bt6pZs2aSpLfffltdu3bVG2+8oeDgYC1atEiXL1/Whx9+KA8PD9WrV0/x8fGaMWMGoRQAALBcntaUMsbYPf/222914cKFfC0IAACgKLByXpSYmKikpCR17NjRts3X11ctWrRQbGysJCk2NlZ+fn62QEqSOnbsKFdXV23evNnWpm3btvLw8LC1CQ8PV0JCgs6ePZvre6enpys1NdXuAQAAkB8cWug827WTMQAAgOKqIOdFSUlJkpTjLKzAwEDbvqSkJAUEBNjtd3d3l7+/v12b3I5x9Xtca+rUqfL19bU9KleufOsdAgAAUB5DKRcXlxxrI7CGFAAAKI6Ky7xo7NixSklJsT2OHz/u7JIAAMBtIk9rShljNHDgQHl6ekqSLl26pCeffDLHXWY+//zz/KsQAACgELJyXhQUFCRJSk5OVsWKFW3bk5OT1bhxY1ubU6dO2b3uypUrOnPmjO31QUFBSk5OtmuT/Ty7zbU8PT1tfQQAAMhPeQqlBgwYYPf8kUceyddiAAAAigor50WhoaEKCgrS2rVrbSFUamqqNm/erKeeekqSFBYWpnPnzikuLk5NmzaVJK1bt05ZWVlq0aKFrc2LL76ojIwMlShRQpIUExOjWrVqqWzZsgVWPwAAQG7yFEotWLCgoOoAAAAoUvJ7XpSWlqaDBw/anicmJio+Pl7+/v6qUqWKhg8frsmTJ6tGjRoKDQ3Vyy+/rODgYHXv3l2SVKdOHXXu3FmPP/645s2bp4yMDA0dOlS9e/dWcHCwJKlv37565ZVXFBkZqTFjxmj37t2aPXu2Zs6cma99AQAAuBl5CqUAAABQMLZt26b27dvbno8cOVLSX2dkRUdHa/To0bpw4YKGDBmic+fOqXXr1lq1apW8vLxsr1m0aJGGDh2qDh06yNXVVT179tRbb71l2+/r66s1a9YoKipKTZs2Vfny5TVu3DgNGTLEuo4CAAD8/wilAAAACoF27drd8A5+Li4umjhxoiZOnHjdNv7+/lq8ePEN36dhw4b64YcfHK4TAAAgv+Tp7nsAAAAAAABAfiCUAgAAAAAAgOUIpQAAAAAAAGA5QikAAAAAAABYjlAKAAAAAAAAliOUAgAAAAAAgOWcGkpNnTpVzZs3V5kyZRQQEKDu3bsrISHBrs2lS5cUFRWlcuXKqXTp0urZs6eSk5Pt2hw7dkwREREqWbKkAgICNGrUKF25csWuzYYNG3TXXXfJ09NT1atXV3R0dEF3DwAAAAAAANfh1FDq+++/V1RUlDZt2qSYmBhlZGSoU6dOunDhgq3NiBEj9M0332jZsmX6/vvvdeLECfXo0cO2PzMzUxEREbp8+bJ+/vlnLVy4UNHR0Ro3bpytTWJioiIiItS+fXvFx8dr+PDhGjx4sFavXm1pfwEAAAAAAPAXd2e++apVq+yeR0dHKyAgQHFxcWrbtq1SUlI0f/58LV68WPfee68kacGCBapTp442bdqkli1bas2aNdq7d6++++47BQYGqnHjxpo0aZLGjBmjCRMmyMPDQ/PmzVNoaKjefPNNSVKdOnX0448/aubMmQoPD7e83wAAAAAAAMVdoVpTKiUlRZLk7+8vSYqLi1NGRoY6duxoa1O7dm1VqVJFsbGxkqTY2Fg1aNBAgYGBtjbh4eFKTU3Vnj17bG2uPkZ2m+xjAAAAAAAAwFpOPVPqallZWRo+fLhatWql+vXrS5KSkpLk4eEhPz8/u7aBgYFKSkqytbk6kMren73vRm1SU1N18eJFeXt72+1LT09Xenq67XlqauqtdxAAAAAAAAA2heZMqaioKO3evVtLlixxdimaOnWqfH19bY/KlSs7uyQAAAAAAIDbSqEIpYYOHarly5dr/fr1qlSpkm17UFCQLl++rHPnztm1T05OVlBQkK3NtXfjy37+d218fHxynCUlSWPHjlVKSortcfz48VvuIwAAAAAAAP6PU0MpY4yGDh2qL774QuvWrVNoaKjd/qZNm6pEiRJau3atbVtCQoKOHTumsLAwSVJYWJh27dqlU6dO2drExMTIx8dHdevWtbW5+hjZbbKPcS1PT0/5+PjYPQAAAAAAAJB/nLqmVFRUlBYvXqyvvvpKZcqUsa0B5evrK29vb/n6+ioyMlIjR46Uv7+/fHx89MwzzygsLEwtW7aUJHXq1El169ZV//79NX36dCUlJemll15SVFSUPD09JUlPPvmk3nnnHY0ePVqPPfaY1q1bp6VLl2rFihVO6zsAAAAAAEBx5tRQau7cuZKkdu3a2W1fsGCBBg4cKEmaOXOmXF1d1bNnT6Wnpys8PFzvvvuura2bm5uWL1+up556SmFhYSpVqpQGDBigiRMn2tqEhoZqxYoVGjFihGbPnq1KlSrpgw8+UHh4eIH3EQAAALjdREZvvaXXzx/YPJ8qAQAUZU4NpYwxf9vGy8tLc+bM0Zw5c67bJiQkRCtXrrzhcdq1a6ft27fnuUYAAAAAAADkv0Kx0DkAAAAAAACKF0IpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWI5QCgAAAAAAAJYjlAIAAAAAAIDlCKUAAAAAAABgOXdnFwAAAAAAVoiM3urwa+cPbJ6PlQAAJM6UAgAAAAAAgBMQSgEAAAAAAMByXL4HAAAAwFJcRgcAkDhTCgAAAAAAAE5AKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcu7OLgAAAAAAblZk9FZnlwAAyCecKQUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcu7OLgAAAAAAbneR0Vsdfu38gc3zsRIAKDwIpYq4W/nlJvELDgAAAAAAOAeX7wEAAAAAAMBynCkFAAAAAH/jVq9QAADkxJlSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsByhFAAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcu7OLgAAAAAAUPhERm91+LXzBzbPx0oA3K44UwoAAAAAAACWI5QCAAAAAACA5QilAAAAAAAAYDlCKQAAAAAAAFiOUAoAAAAAAACWI5QCAAAAAACA5dydXQCci9u8AgAAAAAAZ+BMKQAAAAAAAFiOUAoAAAAAAACW4/I9AAAAALhN3cpyHQBQ0AilAAAAAAD56lbDMNavBYoHQikAAAAAQKHCDZmA4oE1pQAAAAAAAGA5QikAAAAAAABYjlAKAAAAAAAAliOUAgAAAAAAgOUIpQAAAAAAAGA57r4HAAAAAIXYrdyJDgAKM86UAgAAKAImTJggFxcXu0ft2rVt+y9duqSoqCiVK1dOpUuXVs+ePZWcnGx3jGPHjikiIkIlS5ZUQECARo0apStXrljdFQAAAEmcKQUAAFBk1KtXT999953tubv7/03lRowYoRUrVmjZsmXy9fXV0KFD1aNHD/3000+SpMzMTEVERCgoKEg///yzTp48qUcffVQlSpTQq6++anlfAAAACKUAAACKCHd3dwUFBeXYnpKSovnz52vx4sW69957JUkLFixQnTp1tGnTJrVs2VJr1qzR3r179d133ykwMFCNGzfWpEmTNGbMGE2YMEEeHh5WdwcACsStXO44f2DzfKwEwN/h8j0AAIAi4sCBAwoODtadd96pfv366dixY5KkuLg4ZWRkqGPHjra2tWvXVpUqVRQbGytJio2NVYMGDRQYGGhrEx4ertTUVO3Zs+e675menq7U1FS7BwAAQH4glAIAACgCWrRooejoaK1atUpz585VYmKi2rRpo/PnzyspKUkeHh7y8/Oze01gYKCSkpIkSUlJSXaBVPb+7H3XM3XqVPn6+toelStXzt+OAQCAYovL9wAAAIqALl262P6/YcOGatGihUJCQrR06VJ5e3sX2PuOHTtWI0eOtD1PTU0lmAIAAPmCM6UAAACKID8/P9WsWVMHDx5UUFCQLl++rHPnztm1SU5Otq1BFRQUlONufNnPc1unKpunp6d8fHzsHgAAAPmBUAoAAKAISktL06FDh1SxYkU1bdpUJUqU0Nq1a237ExISdOzYMYWFhUmSwsLCtGvXLp06dcrWJiYmRj4+Pqpbt67l9QMAADg1lNq4caPuv/9+BQcHy8XFRV9++aXdfmOMxo0bp4oVK8rb21sdO3bUgQMH7NqcOXNG/fr1k4+Pj/z8/BQZGam0tDS7Njt37lSbNm3k5eWlypUra/r06QXdNQAAgHz1/PPP6/vvv9eRI0f0888/68EHH5Sbm5v69OkjX19fRUZGauTIkVq/fr3i4uI0aNAghYWFqWXLlpKkTp06qW7duurfv7927Nih1atX66WXXlJUVJQ8PT2d3DsAAFAcOTWUunDhgho1aqQ5c+bkun/69Ol66623NG/ePG3evFmlSpVSeHi4Ll26ZGvTr18/7dmzRzExMVq+fLk2btyoIUOG2PanpqaqU6dOCgkJUVxcnF5//XVNmDBB7733XoH3DwAAIL/89ttv6tOnj2rVqqVevXqpXLly2rRpkypUqCBJmjlzpv75z3+qZ8+eatu2rYKCgvT555/bXu/m5qbly5fLzc1NYWFheuSRR/Too49q4sSJzuoSAAAo5py60HmXLl3sFu28mjFGs2bN0ksvvaRu3bpJkj766CMFBgbqyy+/VO/evbVv3z6tWrVKW7duVbNmzSRJb7/9trp27ao33nhDwcHBWrRokS5fvqwPP/xQHh4eqlevnuLj4zVjxgy78AoAAKAwW7JkyQ33e3l5ac6cOdf9sk+SQkJCtHLlyvwuDQAAwCGFdk2pxMREJSUlqWPHjrZtvr6+atGihWJjYyVJsbGx8vPzswVSktSxY0e5urpq8+bNtjZt27aVh4eHrU14eLgSEhJ09uxZi3oDAAAAAACAqzn1TKkbSUpKkiQFBgbabQ8MDLTtS0pKUkBAgN1+d3d3+fv727UJDQ3NcYzsfWXLls3x3unp6UpPT7c9T01NvcXeAAAAAAAA4GqF9kwpZ5o6dap8fX1tj8qVKzu7JAAAAAAAgNtKoQ2lgoKCJEnJycl225OTk237goKC7G5rLElXrlzRmTNn7Nrkdoyr3+NaY8eOVUpKiu1x/PjxW+8QAAAAAAAAbAptKBUaGqqgoCCtXbvWti01NVWbN29WWFiYJCksLEznzp1TXFycrc26deuUlZWlFi1a2Nps3LhRGRkZtjYxMTGqVatWrpfuSZKnp6d8fHzsHgAAAAAAAMg/Tg2l0tLSFB8fr/j4eEl/LW4eHx+vY8eOycXFRcOHD9fkyZP19ddfa9euXXr00UcVHBys7t27S5Lq1Kmjzp076/HHH9eWLVv0008/aejQoerdu7eCg4MlSX379pWHh4ciIyO1Z88effrpp5o9e7ZGjhzppF4DAAAAAADAqQudb9u2Te3bt7c9zw6KBgwYoOjoaI0ePVoXLlzQkCFDdO7cObVu3VqrVq2Sl5eX7TWLFi3S0KFD1aFDB7m6uqpnz5566623bPt9fX21Zs0aRUVFqWnTpipfvrzGjRunIUOGWNdRAAAAAAAA2HFqKNWuXTsZY66738XFRRMnTtTEiROv28bf31+LFy++4fs0bNhQP/zwg8N1AgAAAAAAIH8V2jWlAAAAAAAAcPsilAIAAAAAAIDlCKUAAAAAAABgOUIpAAAAAAAAWM6pC50DjoiM3npLr58/sHk+VQIAAAAAABzFmVIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMuxphQcditrO7GuEwAAAAAAxRtnSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwnLuzC0DxFBm91dklAAAAAAAAJ+JMKQAAAAAAAFiOUAoAAAAAAACWI5QCAAAAAACA5VhTCsXOraxnNX9g83ysBAAAAACA4oszpQAAAAAAAGA5QikAAAAAAABYjlAKAAAAAAAAlmNNqULgVtY4AgAAAAAAKIo4UwoAAAAAAACW40wpAAAAAADEnboBq3GmFAAAAAAAACxHKAUAAAAAAADLcfkekAeczgsAAAAAQP7gTCkAAAAAAABYjlAKAAAAAAAAliOUAgAAAAAAgOUIpQAAAAAAAGA5QikAAAAAAABYjlAKAAAAAAAAliOUAgAAAAAAgOUIpQAAAAAAAGA5QikAAAAAAABYjlAKAAAAAAAAliOUAgAAAAAAgOUIpQAAAAAAAGA5QikAAAAAAABYjlAKAAAAAAAAliOUAgAAAAAAgOXcnV0AUFxERm91+LXzBzbPx0oAAAAAAHA+zpQCAAAAAACA5QilAAAAAAAAYDlCKQAAAAAAAFiOUAoAAAAAAACWI5QCAAAAAACA5bj7HlAE3Mqd+24Vd/4DAAAAABQEQikABeZWwjTCMAAAAAC4vXH5HgAAAAAAACxHKAUAAAAAAADLEUoBAAAAAADAcoRSAAAAAAAAsBwLnQO4IWfe+Q8AAAAAcPviTCkAAAAAAABYjjOlAAAAAAC4Rbd6hcH8gc3zqRKg6OBMKQAAAAAAAFiOUAoAAAAAAACWI5QCAAAAAACA5VhTCkChdCvX5HM9PgAAAAAUfpwpBQAAAAAAAMtxphQA5CPO8AIA57nVO18BgDMxj0RxRCgFANfgHzUAAAAAUPC4fA8AAAAAAACWI5QCAAAAAACA5QilAAAAAAAAYDlCKQAAAAAAAFiOUAoAAAAAAACW4+57AG473D0PAAAAAAo/zpQCAAAAAACA5QilAAAAAAAAYDlCKQAAAAAAAFiONaUAoJi71TW45g9snk+VAAAAAChOCKUAoJC4lXCIYAgAAABAUUMoBQC3Ae44CAAAAKCoYU0pAAAAAAAAWI4zpQAATsMliwAAAEDxRSgFALglXDoIAAAAwBFcvgcAAAAAAADLcaYUAKBI4tK/vGG8AAC4ffF7HkUVoRQAAHnExA8AAAC4dVy+BwAAAAAAAMtxphQAoNhhcXYAAADA+QilAACwEIEYAAAoTFiWAM5EKAUAAG6IIA0AAAAFgVAKAAAAhQYhKAAAxQehFAAAAAAAyDNnfpHApYO3h2IVSs2ZM0evv/66kpKS1KhRI7399tu6++67nV0WAACApZgTAQCKOtbCuj24OrsAq3z66acaOXKkxo8fr19++UWNGjVSeHi4Tp065ezSAAAALMOcCAAAFBbFJpSaMWOGHn/8cQ0aNEh169bVvHnzVLJkSX344YfOLg0AAMAyzIkAAEBhUSxCqcuXLysuLk4dO3a0bXN1dVXHjh0VGxvrxMoAAACsw5wIAAAUJsViTak//vhDmZmZCgwMtNseGBio/fv352ifnp6u9PR02/OUlBRJUmpqaoHUd/liWoEcFwCAoq6gfvdmH9cYUyDHL6zyOieSmBcBAG4//eeud3YJDpnTr2m+H9PZc6JiEUrl1dSpU/XKK6/k2F65cmUnVAMAQPH18dMFe/zz58/L19e3YN+kiGNeBABA4VCQ86I///zTKXOiYhFKlS9fXm5ubkpOTrbbnpycrKCgoBztx44dq5EjR9qeZ2Vl6cyZMypXrpxcXFzytbbU1FRVrlxZx48fl4+PT74euyhjXK6Psckd45I7xiV3jMv1FZexMcbo/PnzCg4OdnYplsrrnEgq2HlRcfl5ywvGJCfGJCfGJCfGJCfGJCfGJKeUlBRVqVJF/v7+Tnn/YhFKeXh4qGnTplq7dq26d+8u6a8J1dq1azV06NAc7T09PeXp6Wm3zc/Pr0Br9PHx4S9FLhiX62Nscse45I5xyR3jcn3FYWyK4xlSeZ0TSdbMi4rDz1teMSY5MSY5MSY5MSY5MSY5MSY5ubo6Z8nxYhFKSdLIkSM1YMAANWvWTHfffbdmzZqlCxcuaNCgQc4uDQAAwDLMiQAAQGFRbEKphx9+WKdPn9a4ceOUlJSkxo0ba9WqVTkW+gQAALidMScCAACFRbEJpSRp6NCh1z013Vk8PT01fvz4HKfFF3eMy/UxNrljXHLHuOSOcbk+xqZ4KCxzIn7ecmJMcmJMcmJMcmJMcmJMcmJMcnL2mLiY4nYvZAAAAAAAADidc1ayAgAAAAAAQLFGKAUAAAAAAADLEUoBAAAAAADAcoRSFpgzZ46qVq0qLy8vtWjRQlu2bLlu2/fff19t2rRR2bJlVbZsWXXs2PGG7YuyvIzL1ZYsWSIXFxd17969YAt0oryOzblz5xQVFaWKFSvK09NTNWvW1MqVKy2q1jp5HZdZs2apVq1a8vb2VuXKlTVixAhdunTJomqtsXHjRt1///0KDg6Wi4uLvvzyy799zYYNG3TXXXfJ09NT1atXV3R0dIHXabW8jsvnn3+u++67TxUqVJCPj4/CwsK0evVqa4q1kCM/L9l++uknubu7q3HjxgVWH4oXR+cBhd3UqVPVvHlzlSlTRgEBAerevbsSEhLs2ly6dElRUVEqV66cSpcurZ49eyo5OdmuzbFjxxQREaGSJUsqICBAo0aN0pUrV+zaFNXP82nTpsnFxUXDhw+3bSuOY/L777/rkUceUbly5eTt7a0GDRpo27Zttv3GGI0bN04VK1aUt7e3OnbsqAMHDtgd48yZM+rXr598fHzk5+enyMhIpaWl2bXZuXOn2rRpIy8vL1WuXFnTp0+3pH95lZmZqZdfflmhoaHy9vZWtWrVNGnSJF29BHJxGJO/+11t5RgsW7ZMtWvXlpeXlxo0aOC0f1/caEwyMjI0ZswYNWjQQKVKlVJwcLAeffRRnThxwu4YxWlMrvXkk0/KxcVFs2bNstteaMbEoEAtWbLEeHh4mA8//NDs2bPHPP7448bPz88kJyfn2r5v375mzpw5Zvv27Wbfvn1m4MCBxtfX1/z2228WV16w8jou2RITE80dd9xh2rRpY7p162ZNsRbL69ikp6ebZs2ama5du5off/zRJCYmmg0bNpj4+HiLKy9YeR2XRYsWGU9PT7No0SKTmJhoVq9ebSpWrGhGjBhhceUFa+XKlebFF180n3/+uZFkvvjiixu2P3z4sClZsqQZOXKk2bt3r3n77beNm5ubWbVqlTUFWySv4zJs2DDz2muvmS1btphff/3VjB071pQoUcL88ssv1hRskbyOS7azZ8+aO++803Tq1Mk0atSoQGtE8eDoPKAoCA8PNwsWLDC7d+828fHxpmvXrqZKlSomLS3N1ubJJ580lStXNmvXrjXbtm0zLVu2NP/4xz9s+69cuWLq169vOnbsaLZv325Wrlxpypcvb8aOHWtrU1Q/z7ds2WKqVq1qGjZsaIYNG2bbXtzG5MyZMyYkJMQMHDjQbN682Rw+fNisXr3aHDx40NZm2rRpxtfX13z55Zdmx44d5oEHHjChoaHm4sWLtjadO3c2jRo1Mps2bTI//PCDqV69uunTp49tf0pKigkMDDT9+vUzu3fvNp988onx9vY2//nPfyzt782YMmWKKVeunFm+fLlJTEw0y5YtM6VLlzazZ8+2tSkOY/J3v6utGoOffvrJuLm5menTp5u9e/eal156yZQoUcLs2rWrwMfgWjcak3PnzpmOHTuaTz/91Ozfv9/Exsaau+++2zRt2tTuGMVpTK72+eefm0aNGpng4GAzc+ZMu32FZUwIpQrY3XffbaKiomzPMzMzTXBwsJk6depNvf7KlSumTJkyZuHChQVVolM4Mi5Xrlwx//jHP8wHH3xgBgwYcNuGUnkdm7lz55o777zTXL582aoSnSKv4xIVFWXuvfdeu20jR440rVq1KtA6nelmQobRo0ebevXq2W17+OGHTXh4eAFW5lx5CV+uVrduXfPKK6/kf0GFRF7G5eGHHzYvvfSSGT9+PKEU8sWtzo+KklOnThlJ5vvvvzfG/PUPqBIlSphly5bZ2uzbt89IMrGxscaYv/6x4erqapKSkmxt5s6da3x8fEx6eroxpmh+np8/f97UqFHDxMTEmHvuuccWShXHMRkzZoxp3br1dfdnZWWZoKAg8/rrr9u2nTt3znh6eppPPvnEGGPM3r17jSSzdetWW5tvv/3WuLi4mN9//90YY8y7775rypYtaxuj7PeuVatWfnfplkVERJjHHnvMbluPHj1Mv379jDHFc0yu/V1t5Rj06tXLRERE2NXTokUL88QTT+RrH/PqZuYvW7ZsMZLM0aNHjTHFd0x+++03c8cdd5jdu3ebkJAQu1CqMI0Jl+8VoMuXLysuLk4dO3a0bXN1dVXHjh0VGxt7U8f43//+p4yMDPn7+xdUmZZzdFwmTpyogIAARUZGWlGmUzgyNl9//bXCwsIUFRWlwMBA1a9fX6+++qoyMzOtKrvAOTIu//jHPxQXF2e7HOTw4cNauXKlunbtaknNhVVsbKzdOEpSeHj4TX8mFRdZWVk6f/78bfXZ66gFCxbo8OHDGj9+vLNLwW0iP+ZHRUlKSook2T5P4uLilJGRYdf/2rVrq0qVKrb+x8bGqkGDBgoMDLS1CQ8PV2pqqvbs2WNrU9Q+z6OiohQREZGj7uI4Jl9//bWaNWumf/3rXwoICFCTJk30/vvv2/YnJiYqKSnJrj++vr5q0aKF3Zj4+fmpWbNmtjYdO3aUq6urNm/ebGvTtm1beXh42NqEh4crISFBZ8+eLehu5sk//vEPrV27Vr/++qskaceOHfrxxx/VpUsXScVzTK5l5RgUpb9P10pJSZGLi4v8/PwkFc8xycrKUv/+/TVq1CjVq1cvx/7CNCbueeoZ8uSPP/5QZmam3S9PSQoMDNT+/ftv6hhjxoxRcHBwjj/oosyRcfnxxx81f/58xcfHW1Ch8zgyNocPH9a6devUr18/rVy5UgcPHtTTTz+tjIyM2+YfkY6MS9++ffXHH3+odevWMsboypUrevLJJ/Xvf//bipILraSkpFzHMTU1VRcvXpS3t7eTKitc3njjDaWlpalXr17OLsWpDhw4oBdeeEE//PCD3N2ZMiB/5Mf8qKjIysrS8OHD1apVK9WvX1/SX5/DHh4etn8sZQsMDFRSUpKtTW7jk73vRm0K6+f5kiVL9Msvv2jr1q059hXHMTl8+LDmzp2rkSNH6t///re2bt2qZ599Vh4eHhowYICtT7n15+r+BgQE2O13d3eXv7+/XZvQ0NAcx8jeV7Zs2QLpnyNeeOEFpaamqnbt2nJzc1NmZqamTJmifv36SVKxHJNrWTkG1/v7lH2MwurSpUsaM2aM+vTpIx8fH0nFc0xee+01ubu769lnn811f2EaE2aYhdi0adO0ZMkSbdiwQV5eXs4ux2nOnz+v/v376/3331f58uWdXU6hk5WVpYCAAL333ntyc3NT06ZN9fvvv+v111+/bUIpR2zYsEGvvvqq3n33XbVo0UIHDx7UsGHDNGnSJL388svOLg+F2OLFi/XKK6/oq6++yvHLujjJzMxU37599corr6hmzZrOLgcokqKiorR79279+OOPzi7FqY4fP65hw4YpJiamWM9pr5aVlaVmzZrp1VdflSQ1adJEu3fv1rx58zRgwAAnV+ccS5cu1aJFi7R48WLVq1dP8fHxGj58uIKDg4vtmCBvMjIy1KtXLxljNHfuXGeX4zRxcXGaPXu2fvnlF7m4uDi7nL/F5XsFqHz58nJzc8tx55Dk5GQFBQXd8LVvvPGGpk2bpjVr1qhhw4YFWabl8jouhw4d0pEjR3T//ffL3d1d7u7u+uijj/T111/L3d1dhw4dsqr0AufIz0zFihVVs2ZNubm52bbVqVNHSUlJunz5coHWaxVHxuXll19W//79NXjwYDVo0EAPPvigXn31VU2dOlVZWVlWlF0oBQUF5TqOPj4+heobZGdZsmSJBg8erKVLl95WZ6g64vz589q2bZuGDh1q++ydOHGiduzYIXd3d61bt87ZJaKIupX5UVEydOhQLV++XOvXr1elSpVs24OCgnT58mWdO3fOrv3V/b/eZ3X2vhu1KYyf53FxcTp16pTuuusu2+fJ999/r7feekvu7u4KDAwsdmNSsWJF1a1b125bnTp1dOzYMUn/16cb/T0JCgrSqVOn7PZfuXJFZ86cydO4FRajRo3SCy+8oN69e6tBgwbq37+/RowYoalTp0oqnmNyLSvH4HptCusYZQdSR48eVUxMjO0sKan4jckPP/ygU6dOqUqVKrbP3KNHj+q5555T1apVJRWuMSGUKkAeHh5q2rSp1q5da9uWlZWltWvXKiws7Lqvmz59uiZNmqRVq1bZXeN5u8jruNSuXVu7du1SfHy87fHAAw+offv2io+PV+XKla0sv0A58jPTqlUrHTx40C5o+fXXX1WxYkW763+LMkfG5X//+59cXe0/4rKDO3PVrYWLm7CwMLtxlKSYmJgbfiYVF5988okGDRqkTz75RBEREc4ux+l8fHxyfPY++eSTqlWrluLj49WiRQtnl4giytH5UVFhjNHQoUP1xRdfaN26dTkufWjatKlKlChh1/+EhAQdO3bM1v+wsDDt2rXL7h8M2f/Iyg4yitLneYcOHXJ8njRr1kz9+vWz/X9xG5NWrVopISHBbtuvv/6qkJAQSVJoaKiCgoLs+pOamqrNmzfbjcm5c+cUFxdna7Nu3TplZWXZPqPDwsK0ceNGZWRk2NrExMSoVq1ahe4ytevN3bLnuMVxTK5l5RgUpb9P2YHUgQMH9N1336lcuXJ2+4vbmPTv3187d+60+8wNDg7WqFGjtHr1akmFbExuekl0OGTJkiXG09PTREdHm71795ohQ4YYPz8/251D+vfvb1544QVb+2nTphkPDw/z2WefmZMnT9oe58+fd1YXCkRex+Vat/Pd9/I6NseOHTNlypQxQ4cONQkJCWb58uUmICDATJ482VldKBB5HZfx48ebMmXKmE8++cQcPnzYrFmzxlSrVs306tXLWV0oEOfPnzfbt28327dvN5LMjBkzzPbt2213G3nhhRdM//79be2zb5c9atQos2/fPjNnzpxCe7vsW5HXcVm0aJFxd3c3c+bMsfvsPXfunLO6UCDyOi7X4u57yC9/95lelD311FPG19fXbNiwwe7z5H//+5+tzZNPPmmqVKli1q1bZ7Zt22bCwsJMWFiYbf+VK1dM/fr1TadOnUx8fLxZtWqVqVChghk7dqytTVH/PL/67nvGFL8x2bJli3F3dzdTpkwxBw4cMIsWLTIlS5Y0H3/8sa3NtGnTjJ+fn/nqq6/Mzp07Tbdu3UxoaKi5ePGirU3nzp1NkyZNzObNm82PP/5oatSoYXdL93PnzpnAwEDTv39/s3v3brNkyRJTsmRJu1u6FxYDBgwwd9xxh1m+fLlJTEw0n3/+uSlfvrwZPXq0rU1xGJO/+11t1Rj89NNPxt3d3bzxxhtm3759Zvz48aZEiRJm165d1g3G/+9GY3L58mXzwAMPmEqVKpn4+Hi7z92r7xpXnMYkN9fefc+YwjMmhFIWePvtt02VKlWMh4eHufvuu82mTZts++655x4zYMAA2/OQkBAjKcdj/Pjx1hdewPIyLte6nUMpY/I+Nj///LNp0aKF8fT0NHfeeaeZMmWKuXLlisVVF7y8jEtGRoaZMGGCqVatmvHy8jKVK1c2Tz/9tDl79qz1hReg9evX5/qZkT0WAwYMMPfcc0+O1zRu3Nh4eHiYO++80yxYsMDyugtaXsflnnvuuWH724UjPy9XI5RCfrrRZ3pRltvfMUl2n7UXL140Tz/9tClbtqwpWbKkefDBB83JkyftjnPkyBHTpUsX4+3tbcqXL2+ee+45k5GRYdemKH+eXxtKFccx+eabb0z9+vWNp6enqV27tnnvvffs9mdlZZmXX37ZBAYGGk9PT9OhQweTkJBg1+bPP/80ffr0MaVLlzY+Pj5m0KBBOb7M3rFjh2ndurXx9PQ0d9xxh5k2bVqB980RqampZtiwYaZKlSrGy8vL3HnnnebFF1+0CxaKw5j83e9qK8dg6dKlpmbNmsbDw8PUq1fPrFixosD6fSM3GpPExMTrfu6uX7/edoziNCa5yS2UKixj4mJMMb6OBQAAAAAAAE7BmlIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAAAAAAACwHKEUAAAAAAAALEcoBQAAAAAAAMsRSgEAAAAAAMByhFIAUES0a9dOw4cPd3YZgCU2btyo+++/X8HBwXJxcdGXX36Z52MYY/TGG2+oZs2a8vT01B133KEpU6bkf7EAgOs6cuSIXFxcFB8f7+xSABRChFIACtTAgQPVvXt3Z5dx0wpD8LNhwwa5uLjo3LlzTq0DcKYLFy6oUaNGmjNnjsPHGDZsmD744AO98cYb2r9/v77++mvdfffd+VglAOS/ojZ3SkxMVN++fRUcHCwvLy9VqlRJ3bp10/79+yVJlStX1smTJ1W/fn0nVwqgMHJ3dgEAAADX6tKli7p06XLd/enp6XrxxRf1ySef6Ny5c6pfv75ee+01tWvXTpK0b98+zZ07V7t371atWrUkSaGhoVaUDgDFRkZGhu677z7VqlVLn3/+uSpWrKjffvtN3377re3LNTc3NwUFBTm3UACFFmdKAXCq77//Xnfffbc8PT1VsWJFvfDCC7py5Yptf1ZWlqZPn67q1avL09NTVapUsV1+k9sZRfHx8XJxcdGRI0ckSUePHtX999+vsmXLqlSpUqpXr55WrlzpcL0//vij2rRpI29vb1WuXFnPPvusLly4YNtftWpVvfrqq3rsscdUpkwZValSRe+9957dMX7++Wc1btxYXl5eatasmb788kvbae1HjhxR+/btJUlly5aVi4uLBg4caDceo0ePlr+/v4KCgjRhwgSH+wIUZUOHDlVsbKyWLFminTt36l//+pc6d+6sAwcOSJK++eYb3XnnnVq+fLlCQ0NVtWpVDR48WGfOnHFy5QBwawrT3GnPnj06dOiQ3n33XbVs2VIhISFq1aqVJk+erJYtW0rKefnewIED5eLikuOxYcMGSX996fD888/rjjvuUKlSpdSiRQvbPgC3H0IpAE7z+++/q2vXrmrevLl27NihuXPnav78+Zo8ebKtzdixYzVt2jS9/PLL2rt3rxYvXqzAwMCbfo+oqCilp6dr48aN2rVrl1577TWVLl3aoXoPHTqkzp07q2fPntq5c6c+/fRT/fjjjxo6dKhduzfffFPNmjXT9u3b9fTTT+upp55SQkKCJCk1NVX333+/GjRooF9++UWTJk3SmDFjbK+tXLmy/vvf/0qSEhISdPLkSc2ePdu2f+HChSpVqpQ2b96s6dOna+LEiYqJiXGoP0BRdezYMS1YsEDLli1TmzZtVK1aNT3//PNq3bq1FixYIEk6fPiwjh49qmXLlumjjz5SdHS04uLi9NBDDzm5egBwXGGbO1WoUEGurq767LPPlJmZeVPHnz17tk6ePGl7DBs2TAEBAapdu7akv//SAcBtxgBAARowYIDp1q1brvv+/e9/m1q1apmsrCzbtjlz5pjSpUubzMxMk5qaajw9Pc3777+f6+vXr19vJJmzZ8/atm3fvt1IMomJicYYYxo0aGAmTJhw0/Xec889ZtiwYbnui4yMNEOGDLHb9sMPPxhXV1dz8eJFY4wxISEh5pFHHrHtz8rKMgEBAWbu3LnGGGPmzp1rypUrZ2tvjDHvv/++kWS2b99+3X5l19a6dWu7bc2bNzdjxoy56f4BRZEk88UXX9ieL1++3EgypUqVsnu4u7ubXr16GWOMefzxx40kk5CQYHtdXFyckWT2799vdRcA4KYVtbnTO++8Y0qWLGnKlClj2rdvbyZOnGgOHTpk25+YmGg3z7naf//7X+Pl5WV+/PFHY4wxR48eNW5ubub333+3a9ehQwczduzYm64JQNHBmlIAnGbfvn0KCwuTi4uLbVurVq2Ulpam3377TUlJSUpPT1eHDh0cfo9nn31WTz31lNasWaOOHTuqZ8+eatiwoUPH2rFjh3bu3KlFixbZthljlJWVpcTERNWpU0eS7I7v4uKioKAgnTp1StJfZz81bNhQXl5etjZ5WXj52torVqxoOzZQXKSlpcnNzU1xcXFyc3Oz25f9bX7FihXl7u6umjVr2vZl/x09duyYbZ0pAChKCuPcKSoqSo8++qg2bNigTZs2admyZXr11Vf19ddf67777rvu67Zv367+/fvrnXfeUatWrSRJu3btUmZmpt1nt/TXJX3lypVzuE8ACi8u3wNQaHl7e99wv6vrXx9hxhjbtoyMDLs2gwcP1uHDh9W/f3/t2rVLzZo109tvv+1QPWlpaXriiScUHx9ve+zYsUMHDhxQtWrVbO1KlChh9zoXFxdlZWU59J7XKshjA0VFkyZNlJmZqVOnTql69ep2j+zFdFu1aqUrV67o0KFDttf9+uuvkqSQkBCn1A0ABc1Zc6cyZcro/vvv15QpU7Rjxw61adPG7pLCayUlJemBBx7Q4MGDFRkZadt+9ZcOV8+39u3bZ7ecAYDbB6EUAKepU6eOYmNj7SZGP/30k8qUKaNKlSqpRo0a8vb21tq1a3N9fYUKFSRJJ0+etG3LXkTzapUrV9aTTz6pzz//XM8995zef/99h+q96667tHfv3hz/CK5evbo8PDxu6hi1atXSrl27lJ6ebtu2detWuzbZx7rZtRmA21FaWprtHyPSX7ccj4+P17Fjx1SzZk3169dPjz76qD7//HMlJiZqy5Ytmjp1qlasWCFJ6tixo+666y499thj2r59u+Li4vTEE0/ovvvuy/ENPAAUFUVh7uTi4qLatWvb3QjmapcuXVK3bt1Uu3ZtzZgxw27fzXzpAOD2QigFoMClpKTYfdsVHx+v48eP6+mnn9bx48f1zDPPaP/+/frqq680fvx4jRw5Uq6urvLy8tKYMWM0evRoffTRRzp06JA2bdqk+fPnS5KqV6+uypUra8KECTpw4IBWrFihN9980+69hw8frtWrVysxMVG//PKL1q9fb7uE53pOnz6do97k5GSNGTNGP//8s4YOHar4+HgdOHBAX331VY6Fzm+kb9++ysrK0pAhQ7Rv3z6tXr1ab7zxhiTZTsUPCQmRi4uLli9frtOnTystLS0vww3cFrZt26YmTZqoSZMmkqSRI0eqSZMmGjdunCRpwYIFevTRR/Xcc8+pVq1a6t69u7Zu3aoqVapI+utsgG+++Ubly5dX27ZtFRERoTp16mjJkiVO6xMA3KyiMneKj49Xt27d9Nlnn2nv3r06ePCg5s+frw8//FDdunXL9TVPPPGEjh8/rrfeekunT59WUlKSkpKSdPny5Zv60gHAbcapK1oBuO0NGDDASMrxiIyMNMYYs2HDBtO8eXPj4eFhgoKCzJgxY0xGRobt9ZmZmWby5MkmJCTElChRwlSpUsW8+uqrtv0//vijadCggfHy8jJt2rQxy5Yts1usc+jQoaZatWrG09PTVKhQwfTv39/88ccf1633nnvuybXeSZMmGWOM2bJli7nvvvtM6dKlTalSpUzDhg3NlClTbK8PCQkxM2fOtDtmo0aNzPjx423Pf/rpJ9OwYUPj4eFhmjZtahYvXpxj8eWJEyeaoKAg4+LiYgYMGGCr7dpF2Lt162bbDwAAir6iNHc6ffq0efbZZ039+vVN6dKlTZkyZUyDBg3MG2+8YTIzM40xORc6DwkJybV/69evN8YYc/nyZTNu3DhTtWpVU6JECVOxYkXz4IMPmp07d+bzSAMoDFyMuercTwCA5RYtWqRBgwYpJSXlb9eCAAAAAIDbBXffAwCLffTRR7rzzjt1xx13aMeOHRozZox69epFIAUAAACgWCGUAgCLJSUlady4cUpKSlLFihX1r3/9S1OmTHF2WQAAAABgKS7fAwAAAAAAgOW4+x4AAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAsRygFAAAAAAAAyxFKAQAAAAAAwHKEUgAAAAAAALAcoRQAAAAAAAAs9/8BwLwmqMOvvfAAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "panda_df = df.select(\"locusSize\", \"locusLength\").toPandas()\n", + "\n", + "plt.figure(figsize=(12, 6))\n", + "\n", + "# Histogram for locusLength\n", + "plt.subplot(1, 2, 1) # 1 row, 2 columns, 1st subplot\n", + "plt.hist(panda_df[\"locusLength\"], bins=30, alpha=0.7)\n", + "plt.xlabel(\"Locus Length\")\n", + "plt.ylabel(\"Frequency\")\n", + "plt.title(\"Histogram of Locus Length\")\n", + "\n", + "# Histogram for locusSize\n", + "plt.subplot(1, 2, 2) # 1 row, 2 columns, 2nd subplot\n", + "plt.hist(panda_df[\"locusSize\"], bins=30, alpha=0.7)\n", + "plt.xlabel(\"Locus Size\")\n", + "plt.ylabel(\"Frequency\")\n", + "plt.title(\"Histogram of Locus Size\")\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10, 6))\n", + "plt.scatter(panda_df[\"locusSize\"], panda_df[\"locusLength\"], alpha=0.5)\n", + "plt.title(\"Scatter Plot of Locus Size vs Locus Length\")\n", + "plt.xlabel(\"Locus Size\")\n", + "plt.ylabel(\"Locus Length\")\n", + "plt.grid(True)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "24/08/07 09:53:33 WARN SharedInMemoryCache: Evicting cached table partition metadata from memory due to size constraints (spark.sql.hive.filesourcePartitionFileCacheSize = 262144000 bytes). This may impact query planning performance.\n" + ] + } + ], + "source": [ + "susie_fm = StudyLocus.from_parquet(session, \"/Users/dc16/output/ukb_ppp_fm\")\n", + "susie_fm.df = (\n", + " susie_fm.df\n", + " .filter(f.col(\"pValueExponent\") < -11)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Total credible sets:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "text/plain": [ + "57880" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "susie_fm.df.count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check for NaN and nulls in the credible set logBF\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of credible sets with 'not a number' as the logBF: 25\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 73:====================================================> (868 + 8) / 893]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of credible sets with 'null' as the logBF: 0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "nan = susie_fm.df.filter(f.isnan(\"credibleSetlog10BF\"))\n", + "null = susie_fm.df.filter(f.isnull(\"credibleSetlog10BF\"))\n", + "print(\"Number of credible sets with 'not a number' as the logBF: \", nan.count())\n", + "print(\"Number of credible sets with 'null' as the logBF: \", null.count())" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0---------------------------\n", + " meanTopPP | 0.9005192231704642 \n", + " minTopPP | 0.011231041748358246 \n", + " q1TopPP | 0.9884181401759444 \n", + " medianTopPP | 0.9999999997648601 \n", + " q3TopPP | 1.0 \n", + " maxTopPP | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0------------------------------\n", + " meanCredSetSize | 4.344101633393829 \n", + " minCredSetSize | 1 \n", + " q1CredSetSize | 1 \n", + " medianCredSetSize | 1 \n", + " q3CredSetSize | 2 \n", + " maxCredSetSize | 1767 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0----------------------------------\n", + " meanPurityMeanR2 | 0.8042287002688577 \n", + " minPurityMeanR2 | 0.011839476509578747 \n", + " q1PurityMeanR2 | 0.6278035788726534 \n", + " medianPurityMeanR2 | 1.0 \n", + " q3PurityMeanR2 | 1.0 \n", + " maxPurityMeanR2 | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 87:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0--------------------------------\n", + " meanPurityMinR2 | 0.7062586247097518 \n", + " minPurityMinR2 | 0.0 \n", + " q1PurityMinR2 | 0.06978249208480057 \n", + " medianPurityMinR2 | 1.0 \n", + " q3PurityMinR2 | 1.0 \n", + " maxPurityMinR2 | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "susie_results = (\n", + " susie_fm.df.withColumn(\"credSetSize\", f.size(\"locus\"))\n", + " .withColumn(\n", + " \"locus\",\n", + " f.slice(order_array_of_structs_by_field(\"locus\", \"posteriorProbability\"), 1, 1)[\n", + " 0\n", + " ],\n", + " )\n", + " .withColumn(\"topPP\", f.col(\"locus\").getField(\"posteriorProbability\"))\n", + " .filter(~f.isnan(\"topPP\"))\n", + ")\n", + "(\n", + " susie_results.select(\n", + " f.mean(\"topPP\").alias(\"meanTopPP\"),\n", + " f.min(\"topPP\").alias(\"minTopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.25).alias(\"q1TopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.5).alias(\"medianTopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.75).alias(\"q3TopPP\"),\n", + " f.max(\"topPP\").alias(\"maxTopPP\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " susie_results.select(\n", + " f.mean(\"credSetSize\").alias(\"meanCredSetSize\"),\n", + " f.min(\"credSetSize\").alias(\"minCredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.25).alias(\"q1CredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.5).alias(\"medianCredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.75).alias(\"q3CredSetSize\"),\n", + " f.max(\"credSetSize\").alias(\"maxCredSetSize\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " susie_results.select(\n", + " f.mean(\"purityMeanR2\").alias(\"meanPurityMeanR2\"),\n", + " f.min(\"purityMeanR2\").alias(\"minPurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.25).alias(\"q1PurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.5).alias(\"medianPurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.75).alias(\"q3PurityMeanR2\"),\n", + " f.max(\"purityMeanR2\").alias(\"maxPurityMeanR2\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " susie_results.select(\n", + " f.mean(\"purityMinR2\").alias(\"meanPurityMinR2\"),\n", + " f.min(\"purityMinR2\").alias(\"minPurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.25).alias(\"q1PurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.5).alias(\"medianPurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.75).alias(\"q3PurityMinR2\"),\n", + " f.max(\"purityMinR2\").alias(\"maxPurityMinR2\"),\n", + " ).show(vertical=True)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "pdf = susie_results.select(\"purityMinR2\", \"purityMeanR2\", \"topPP\", \"credSetSize\").toPandas()\n", + "plt.figure(figsize=(12, 12))\n", + "\n", + "# Histogram for purityMinR2\n", + "plt.subplot(2, 2, 1)\n", + "plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of purityMinR2\")\n", + "plt.xlabel(\"purityMinR2\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for purityMeanR2\n", + "plt.subplot(2, 2, 2)\n", + "plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of purityMeanR2\")\n", + "plt.xlabel(\"purityMeanR2\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for topPP\n", + "plt.subplot(2, 2, 3)\n", + "plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of topPP\")\n", + "plt.xlabel(\"topPP\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for credSetSize\n", + "plt.subplot(2, 2, 4)\n", + "plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of credSetSize\")\n", + "plt.xlabel(\"credSetSize\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Adjust layout to prevent overlap\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Repeating the same steps, but filtering for only the first credible set" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Total number of primary credible sets and number of unique studyIds:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of primary credible sets: 9495\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 209:====================================================>(880 + 8) / 893]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of unique studyIds in primary credible sets: 2015\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "first_credset = susie_results.filter(f.col(\"credibleSetIndex\") == 1)\n", + "print(\"Number of primary credible sets: \", first_credset.count())\n", + "print(\"Number of unique studyIds in primary credible sets: \", first_credset.select(\"studyId\").distinct().count())" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0---------------------------\n", + " meanTopPP | 0.8085156719419792 \n", + " minTopPP | 0.011231041748358246 \n", + " q1TopPP | 0.6270685668930966 \n", + " medianTopPP | 0.9999984922517341 \n", + " q3TopPP | 1.0 \n", + " maxTopPP | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0------------------------------\n", + " meanCredSetSize | 5.875934702474987 \n", + " minCredSetSize | 1 \n", + " q1CredSetSize | 1 \n", + " medianCredSetSize | 1 \n", + " q3CredSetSize | 4 \n", + " maxCredSetSize | 1022 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0----------------------------------\n", + " meanPurityMeanR2 | 0.7587573150386597 \n", + " minPurityMeanR2 | 0.011839476509578747 \n", + " q1PurityMeanR2 | 0.520396112272186 \n", + " medianPurityMeanR2 | 0.9839697266684606 \n", + " q3PurityMeanR2 | 1.0 \n", + " maxPurityMeanR2 | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 226:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0---------------------------------\n", + " meanPurityMinR2 | 0.6480281642261705 \n", + " minPurityMinR2 | 0.0 \n", + " q1PurityMinR2 | 0.002472980432442... \n", + " medianPurityMinR2 | 0.9659616302635498 \n", + " q3PurityMinR2 | 1.0 \n", + " maxPurityMinR2 | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "(\n", + " first_credset.select(\n", + " f.mean(\"topPP\").alias(\"meanTopPP\"),\n", + " f.min(\"topPP\").alias(\"minTopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.25).alias(\"q1TopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.5).alias(\"medianTopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.75).alias(\"q3TopPP\"),\n", + " f.max(\"topPP\").alias(\"maxTopPP\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " first_credset.select(\n", + " f.mean(\"credSetSize\").alias(\"meanCredSetSize\"),\n", + " f.min(\"credSetSize\").alias(\"minCredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.25).alias(\"q1CredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.5).alias(\"medianCredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.75).alias(\"q3CredSetSize\"),\n", + " f.max(\"credSetSize\").alias(\"maxCredSetSize\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " first_credset.select(\n", + " f.mean(\"purityMeanR2\").alias(\"meanPurityMeanR2\"),\n", + " f.min(\"purityMeanR2\").alias(\"minPurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.25).alias(\"q1PurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.5).alias(\"medianPurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.75).alias(\"q3PurityMeanR2\"),\n", + " f.max(\"purityMeanR2\").alias(\"maxPurityMeanR2\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " first_credset.select(\n", + " f.mean(\"purityMinR2\").alias(\"meanPurityMinR2\"),\n", + " f.min(\"purityMinR2\").alias(\"minPurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.25).alias(\"q1PurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.5).alias(\"medianPurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.75).alias(\"q3PurityMinR2\"),\n", + " f.max(\"purityMinR2\").alias(\"maxPurityMinR2\"),\n", + " ).show(vertical=True)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAASlCAYAAAB5vWpLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAADSxElEQVR4nOzdd3gUZfv28TOkQxolBSSEQOhFJLQo0kQCxEJ7BKWEJqChN0V56AqCtEciiCABBREU9SehRZpSVAxEKYpKERQSUCGhps77h28WloSSkMyS8P0cxx6y99w7c80kyuU5szN2hmEYAgAAAAAAAExUxNYFAAAAAAAA4P5DKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUUQOXLl1fPnj1tXUahN2PGDFWoUEH29vaqU6eOrcuxaNasmZo1a2brMiRJUVFRsrOz0/Hjx21dCgAA9EgmoUcCkFcIpQAby/yf+u+//z7b5c2aNVPNmjXvejvr1q3ThAkT7no994tNmzZp9OjReuSRR7RkyRK9/vrrti7ppk6dOqUJEyYoLi4u1+to1qyZ7OzsVKlSpWyXx8TEyM7OTnZ2dvr4449zvZ3rbdu2zbJOOzs72dvby8fHR506ddJPP/2UZf6aNWvUuXNnVahQQUWLFlWVKlU0YsQInT9/Pk/qAQDcW+iR7k30SNbyo0e6WxMmTLDqsRwdHVW+fHkNHjw4S990+fJlRUZGqlWrVipdurTc3d310EMPaf78+UpPT7fNDuC+4mDrAgDk3OHDh1WkSM4y5XXr1ikyMpKm6w5t2bJFRYoU0eLFi+Xk5GTrcqxs2rTJ6v2pU6c0ceJElS9f/q7OVrq4uOi3337Td999pwYNGlgtW758uVxcXHT16lWr8e7du6tLly5ydnbO9XYHDx6s+vXrKzU1VT/++KMWLFigbdu26cCBA/Lz87PM69evn8qUKaNu3bqpXLly2r9/v+bNm6d169Zp7969cnV1zXUNAIDCgR4p/9Ej3VmPdC+YP3++3NzcdOnSJW3evFlvvfWW9u7dqx07dljmHD16VIMGDdJjjz2m4cOHy8PDQxs3btSLL76ob775RkuXLrXhHuB+QCgFFEB3EwDYyqVLl1SsWDFbl3HHzpw5I1dX13uq2bp8+bKKFi2abzVVrFhRaWlp+vDDD60arqtXr+rTTz9VWFiYPvnkE6vP2Nvby97e/q62++ijj6pTp06W91WqVNELL7ygZcuWafTo0Zbxjz/+OMsl+cHBwQoPD9fy5cvVt2/fu6oDAFDw0SPlP3qkO+uR7gWdOnVSqVKlJEn9+/dXly5d9NFHH1mFa35+ftq/f79q1Khh+Vz//v3Vu3dvLVmyRP/9738VFBRkk/pxf+Dre0ABdOP9ElJTUzVx4kRVqlRJLi4uKlmypBo3bqyYmBhJUs+ePRUZGSlJVpfyZrp06ZJGjBghf39/OTs7q0qVKnrzzTdlGIbVdq9cuaLBgwerVKlScnd311NPPaU///xTdnZ2VmcXMy8ZPnTokJ577jkVL15cjRs3liT9+OOP6tmzpypUqCAXFxf5+fmpd+/e+vvvv622lbmOX375Rd26dZOnp6e8vb313//+V4Zh6OTJk3r66afl4eEhPz8/zZw5846OXVpamiZPnqyKFSvK2dlZ5cuX1yuvvKLk5GTLHDs7Oy1ZskSXLl2yHKuoqKibrjPz6wOxsbF6+OGH5erqqsDAQC1YsMBq3s3uv5T5NbZt27Zlu84mTZqoaNGieuWVVyzLMsOZbdu2qX79+pKkXr16WdU7fvx4OTo66uzZs1lq7tevn7y8vLKc1Xv22Wf10UcfKSMjwzL2xRdf6PLly3rmmWeyrCe7fSpfvryeeOIJ7dixQw0aNJCLi4sqVKigZcuW3fQYXu/RRx+VJB05csRqPLt7RLRv316Ssv26HwDg/kOPRI90r/RIkvTnn3+qd+/e8vX1lbOzs2rUqKH33nvPak5KSorGjRun4OBgeXp6qlixYnr00Ue1detWq3nHjx+XnZ2d3nzzTS1cuNDyc6pfv7727NmT7fZvlF2PVapUKatAKhM9FsxCKAXcIxITE/XXX39leaWmpt72sxMmTNDEiRPVvHlzzZs3T6+++qrKlSunvXv3Svr3bMfjjz8uSXr//fctL0kyDENPPfWUZs+erdatW2vWrFmqUqWKRo0apeHDh1ttp2fPnnrrrbfUtm1bvfHGG3J1dVVYWNhN6/rPf/6jy5cv6/XXX9fzzz8v6d/v3R89elS9evXSW2+9pS5dumjlypVq27ZtlgZPkjp37qyMjAxNmzZNDRs21JQpUzRnzhw9/vjjeuCBB/TGG28oKChII0eO1FdffXXbY9W3b1+NGzdOdevW1ezZs9W0aVNNnTpVXbp0scx5//339eijj8rZ2dlyrJo0aXLL9Z47d05t27ZVcHCwpk+frrJly+qFF17I0njkxN9//602bdqoTp06mjNnjpo3b55lTrVq1TRp0iRJ/zZR19fbvXt3paWl6aOPPrL6TEpKij7++GN17NhRLi4uVsuee+45nT592qr5W7FihR577DH5+Pjcce2//fabOnXqpMcff1wzZ85U8eLF1bNnTx08ePC2n81sSIsXL37bufHx8ZJkOQsIACh86JHokW5UEHqkhIQENWrUSF9++aUGDhyouXPnKigoSH369NGcOXMs85KSkrRo0SI1a9ZMb7zxhiZMmKCzZ88qNDQ023thrVixQjNmzFD//v01ZcoUHT9+XB06dLijfx/osXBPMgDY1JIlSwxJt3zVqFHD6jMBAQFGeHi45f2DDz5ohIWF3XI7ERERRnb/yn/22WeGJGPKlClW4506dTLs7OyM3377zTAMw4iNjTUkGUOHDrWa17NnT0OSMX78eMvY+PHjDUnGs88+m2V7ly9fzjL24YcfGpKMr776Kss6+vXrZxlLS0szypYta9jZ2RnTpk2zjJ87d85wdXW1OibZiYuLMyQZffv2tRofOXKkIcnYsmWLZSw8PNwoVqzYLdeXqWnTpoYkY+bMmZax5ORko06dOoaPj4+RkpJiGMa1n/WxY8esPr9161ZDkrF169Ys61ywYEG222vatKnl/Z49ewxJxpIlS7LMDQkJMRo2bGg1tmbNmmy3l/l7Vq9ePaNPnz6GYfx7bJ2cnIylS5da6ly9erXlc9ntU0BAQJaf55kzZwxnZ2djxIgRWfb7vffeM86ePWucOnXK2LBhgxEUFGTY2dkZ3333XZb9uVGfPn0Me3t745dffrntXABAwUKPRI9UkHukPn36GKVLlzb++usvq2106dLF8PT0tPy809LSjOTkZKs5586dM3x9fY3evXtbxo4dO2ZIMkqWLGn8888/lvHPP//ckGR88cUXlrHM35HDhw8bZ8+eNY4fP2689957hqurq+Ht7W1cunQpy/G4XnJyslG9enUjMDDQSE1NveVc4G5xpRRwj4iMjFRMTEyWV+3atW/7WS8vLx08eFC//vprjre7bt062dvba/DgwVbjI0aMkGEYWr9+vSRpw4YNkqQXX3zRat6gQYNuuu4BAwZkGbv+ZtRXr17VX3/9pUaNGkmS5azl9a6/T5C9vb3q1asnwzDUp08fy7iXl5eqVKmio0eP3rQW6d99lZTl7OaIESMkSdHR0bf8/K04ODiof//+lvdOTk7q37+/zpw5o9jY2Fyt09nZWb169cp1TZLUo0cPffvtt1aXaS9fvlz+/v5q2rRptp957rnntGbNGsvZQnt7e8sl3HeqevXqlkvEJcnb2/umP6PevXvL29tbZcqUUevWrZWYmKj333/fcsn9zaxYsUKLFy/WiBEjbvpEHABAwUePRI90o3u9RzIMQ5988omefPJJGYZhdYVfaGioEhMTLT9Te3t7y72wMjIy9M8//ygtLU316tXL9ufeuXNnqyudMvut7H7GVapUkbe3t8qXL6/evXsrKChI69evV9GiRW95bAYOHKhDhw5p3rx5cnDgNtTIX4RSwD2iQYMGatmyZZbXnVxeO2nSJJ0/f16VK1dWrVq1NGrUKP344493tN3ff/9dZcqUkbu7u9V4tWrVLMsz/1mkSBEFBgZazbvVjQ9vnCtJ//zzj4YMGSJfX1+5urrK29vbMi8xMTHL/HLlylm99/T0lIuLS5ZLiT09PXXu3Lmb1nL9PtxYs5+fn7y8vCz7mhtlypTJcpPSypUrS1KW+yPcqQceeOCub9jZuXNnOTs7a/ny5ZL+PcZr165V165dre6Zcb0uXbooMTFR69ev1/Lly/XEE09k+f24nRt/btK/l4pn9zMaN26cYmJi9Omnn6pHjx5KTEy87ZOTvv76a/Xp00ehoaF67bXXclQbAKBgoUeiR7rRvd4jnT17VufPn9fChQvl7e1t9coM086cOWOZv3TpUtWuXdty3zNvb29FR0ff0c8989+D7H7Gn3zyiWJiYrRixQo1atTIcpP6W5kxY4beffddTZ48WW3btr3lXCAvEHsChUCTJk105MgRff7559q0aZMWLVqk2bNna8GCBTZ9Ill2f+k988wz2rVrl0aNGqU6derIzc1NGRkZat26tdWNIzNl92S3mz3tzcjmfgvZuVmjkd9utt309PRsx2/XNNyJ4sWL64knntDy5cs1btw4ffzxx0pOTla3bt1u+pnSpUurWbNmmjlzpnbu3Jmrp8nk5GdUq1YttWzZUpLUrl07Xb58Wc8//7waN24sf3//LPN/+OEHPfXUU6pZs6Y+/vhjzuABAG6KHulf9EhZ5WePlPnz6tatm8LDw7Odk3ml3wcffKCePXuqXbt2GjVqlHx8fGRvb6+pU6dmeeiLlLOfcZMmTSwh5ZNPPqlatWqpa9euio2NzfYEYFRUlF566SUNGDBAY8eOzXY7QF7jSimgkChRooR69eqlDz/8UCdPnlTt2rWtnvZys7/sAwICdOrUKV24cMFq/Oeff7Ysz/xnRkaGjh07ZjXvt99+u+Maz507p82bN+vll1/WxIkT1b59ez3++OOqUKHCHa/jbmTuw42X8CckJOj8+fOWfc2NU6dO6dKlS1Zjv/zyi6R/nwQkXTuTdf78eat5d3P2Ubp9A9mjRw/98ssv2rNnj5YvX66HHnoo26esXO+5557T119/LQ8PD9PPkk2bNk1Xr17N9gqoI0eOqHXr1vLx8dG6devk5uZmam0AgIKHHun26JHytkfy9vaWu7u70tPTs73Kr2XLlpabo3/88ceqUKGC1qxZo+7duys0NFQtW7bM8vS/u+Xm5qbx48crLi5Oq1atyrL8888/V9++fdWhQwfLEykBMxBKAYXAjY8KdnNzU1BQkNUjfDMvm77xL/u2bdsqPT1d8+bNsxqfPXu27Ozs1KZNG0lSaGioJOntt9+2mvfWW2/dcZ2ZZ3ZuPJNz/RNI8lNm43Dj9mbNmiVJt3xKzu2kpaXpnXfesbxPSUnRO++8I29vbwUHB0uSKlasKElWT8BJT0/XwoULc71d6eY/20xt2rRRqVKl9MYbb2j79u23PAOYqVOnTho/frzefvvtu748PqcqVqyojh07KioqyvLkF+nfp8C0atVKRYoU0caNG+Xt7W1qXQCAgoce6c7QI+Vtj2Rvb6+OHTvqk08+0YEDB7IsP3v2rNVcyfpn/+2332r37t23rSWnunbtqrJly+qNN96wGv/qq6/UpUsXNWnSRMuXL7/tbRSAvMR3HoBCoHr16mrWrJmCg4NVokQJff/99/r44481cOBAy5zMv/QHDx6s0NBQ2dvbq0uXLnryySfVvHlzvfrqqzp+/LgefPBBbdq0SZ9//rmGDh1qaRKCg4PVsWNHzZkzR3///bcaNWqk7du3W8503cnl3h4eHmrSpImmT5+u1NRUPfDAA9q0aVOWM4v55cEHH1R4eLgWLlyo8+fPq2nTpvruu++0dOlStWvXLtvHCd+pMmXK6I033tDx48dVuXJlffTRR4qLi9PChQvl6OgoSapRo4YaNWqkMWPG6J9//lGJEiW0cuVKpaWl3dV+VaxYUV5eXlqwYIHc3d1VrFgxNWzY0HIfCkdHR3Xp0kXz5s2Tvb29nn322duu09PT0+osstlGjRqlVatWac6cOZo2bZokqXXr1jp69KhGjx6tHTt2aMeOHZb5vr6+lkd6AwCQiR7pztAj5X2PNG3aNG3dulUNGzbU888/r+rVq+uff/7R3r179eWXX+qff/6RJD3xxBNas2aN2rdvr7CwMB07dkwLFixQ9erVdfHixbva/xs5OjpqyJAhGjVqlDZs2KDWrVvr999/11NPPSU7Ozt16tRJq1evtvpM7dq17+ihAkCu2eSZfwAsMh+Bu2fPnmyXX/8Y2kw3Pu54ypQpRoMGDQwvLy/D1dXVqFq1qvHaa69ZHrNrGP8+bnbQoEGGt7e3YWdnZ/Xo4wsXLhjDhg0zypQpYzg6OhqVKlUyZsyYYWRkZFht99KlS0ZERIRRokQJw83NzWjXrp1x+PBhQ5LV44czH0N79uzZLPvzxx9/GO3btze8vLwMT09P4z//+Y9x6tSpmz4y+cZ13OwxxNkdp+ykpqYaEydONAIDAw1HR0fD39/fGDNmjHH16tU72k52Mrf9/fffGyEhIYaLi4sREBBgzJs3L8vcI0eOGC1btjScnZ0NX19f45VXXjFiYmJu+fjh7LZ3/eOODePfxwFXr17dcHBwyPbRx999950hyWjVqtUt9+FWsnvccXaPcA4ICMj28ds31p3d+q7XrFkzw8PDwzh//rxhGMYtHwl+4/EAABR89Ej0SAW5RzIMw0hISDAiIiIMf39/w9HR0fDz8zMee+wxY+HChZY5GRkZxuuvv24EBAQYzs7OxkMPPWSsXbvWCA8PNwICAizzjh07ZkgyZsyYkWX7d/o7YhiGkZiYaHh6elqOU2btN3tdv14gP9gZxh3e9Q4AshEXF6eHHnpIH3zwgbp27WrrcmyiWbNm+uuvv7K9PPte8cMPP6hOnTpatmyZunfvbutyAAAo9OiR6JEA3B5fFgVwx65cuZJlbM6cOSpSpIiaNGlig4pwp9599125ubmpQ4cOti4FAIBChx6p4KJHAmyLe0oBuGPTp09XbGysmjdvLgcHB61fv17r169Xv3795O/vb+vykI0vvvhChw4d0sKFCzVw4EDLDT8BAEDeoUcqeOiRgHsDX98DcMdiYmI0ceJEHTp0SBcvXlS5cuXUvXt3vfrqq3JwuH8z7nv50vTy5csrISFBoaGhev/99+Xu7m7rkgAAKHTokbJHjwTgdgilAAAAAAAAYDruKQUAAAAAAADT3b/XkuZARkaGTp06JXd3d9nZ2dm6HAAAYCLDMHThwgWVKVNGRYpwPi8n6KEAALg/3Wn/RCh1B06dOsUNCgEAuM+dPHlSZcuWtXUZBQo9FAAA97fb9U+EUncg86Z3J0+elIeHh42rAQAAZkpKSpK/vz83wc0FeigAAO5Pd9o/EUrdgczLzT08PGioAAC4T/H1s5yjhwIA4P52u/6JGyMAAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAFCATJgwQXZ2dlavqlWrWpZfvXpVERERKlmypNzc3NSxY0clJCRYrePEiRMKCwtT0aJF5ePjo1GjRiktLc1qzrZt21S3bl05OzsrKChIUVFRZuweAAC4jxBKAQAAFDA1atTQ6dOnLa8dO3ZYlg0bNkxffPGFVq9ere3bt+vUqVPq0KGDZXl6errCwsKUkpKiXbt2aenSpYqKitK4ceMsc44dO6awsDA1b95ccXFxGjp0qPr27auNGzeaup8AAKBwc7B1AQAAAMgZBwcH+fn5ZRlPTEzU4sWLtWLFCrVo0UKStGTJElWrVk3ffPONGjVqpE2bNunQoUP68ssv5evrqzp16mjy5Ml66aWXNGHCBDk5OWnBggUKDAzUzJkzJUnVqlXTjh07NHv2bIWGht60ruTkZCUnJ1veJyUl5fGeAwCAwoQrpQAAAAqYX3/9VWXKlFGFChXUtWtXnThxQpIUGxur1NRUtWzZ0jK3atWqKleunHbv3i1J2r17t2rVqiVfX1/LnNDQUCUlJengwYOWOdevI3NO5jpuZurUqfL09LS8/P3982R/AQBA4UQoBQAAUIA0bNhQUVFR2rBhg+bPn69jx47p0Ucf1YULFxQfHy8nJyd5eXlZfcbX11fx8fGSpPj4eKtAKnN55rJbzUlKStKVK1duWtuYMWOUmJhoeZ08efJudxcAABRifH0PAACgAGnTpo3lz7Vr11bDhg0VEBCgVatWydXV1YaVSc7OznJ2drZpDQAAoODgSikAAIACzMvLS5UrV9Zvv/0mPz8/paSk6Pz581ZzEhISLPeg8vPzy/I0vsz3t5vj4eFh8+ALAAAUHlwpBQAA7gl9ovbk+rOLe9bPw0oKlosXL+rIkSPq3r27goOD5ejoqM2bN6tjx46SpMOHD+vEiRMKCQmRJIWEhOi1117TmTNn5OPjI0mKiYmRh4eHqlevbpmzbt06q+3ExMRY1gEAAO4Nd9M/SbbvobhSCgAAoAAZOXKktm/fruPHj2vXrl1q37697O3t9eyzz8rT01N9+vTR8OHDtXXrVsXGxqpXr14KCQlRo0aNJEmtWrVS9erV1b17d/3www/auHGjxo4dq4iICMtX7wYMGKCjR49q9OjR+vnnn/X2229r1apVGjZsmC13HQAAFDJcKQUAAFCA/PHHH3r22Wf1999/y9vbW40bN9Y333wjb29vSdLs2bNVpEgRdezYUcnJyQoNDdXbb79t+by9vb3Wrl2rF154QSEhISpWrJjCw8M1adIky5zAwEBFR0dr2LBhmjt3rsqWLatFixYpNDTU9P0FAACFF6EUAABAAbJy5cpbLndxcVFkZKQiIyNvOicgICDL1/Nu1KxZM+3bty9XNQIAANwJvr4HAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUgAAAAAAADAdoRQAAAAAAABMd8+EUtOmTZOdnZ2GDh1qGbt69aoiIiJUsmRJubm5qWPHjkpISLD63IkTJxQWFqaiRYvKx8dHo0aNUlpamtWcbdu2qW7dunJ2dlZQUJCioqJM2CMAAAAAAADczD0RSu3Zs0fvvPOOateubTU+bNgwffHFF1q9erW2b9+uU6dOqUOHDpbl6enpCgsLU0pKinbt2qWlS5cqKipK48aNs8w5duyYwsLC1Lx5c8XFxWno0KHq27evNm7caNr+AQAAAAAAwJrNQ6mLFy+qa9euevfdd1W8eHHLeGJiohYvXqxZs2apRYsWCg4O1pIlS7Rr1y598803kqRNmzbp0KFD+uCDD1SnTh21adNGkydPVmRkpFJSUiRJCxYsUGBgoGbOnKlq1app4MCB6tSpk2bPnm2T/QUAAAAAAMA9EEpFREQoLCxMLVu2tBqPjY1Vamqq1XjVqlVVrlw57d69W5K0e/du1apVS76+vpY5oaGhSkpK0sGDBy1zblx3aGioZR3ZSU5OVlJSktULAAAAAAAAecfBlhtfuXKl9u7dqz179mRZFh8fLycnJ3l5eVmN+/r6Kj4+3jLn+kAqc3nmslvNSUpK0pUrV+Tq6ppl21OnTtXEiRNzvV8AAAAAAAC4NZtdKXXy5EkNGTJEy5cvl4uLi63KyNaYMWOUmJhoeZ08edLWJQEAAAAAABQqNgulYmNjdebMGdWtW1cODg5ycHDQ9u3b9b///U8ODg7y9fVVSkqKzp8/b/W5hIQE+fn5SZL8/PyyPI0v8/3t5nh4eGR7lZQkOTs7y8PDw+oFAAAAAACAvGOzUOqxxx7T/v37FRcXZ3nVq1dPXbt2tfzZ0dFRmzdvtnzm8OHDOnHihEJCQiRJISEh2r9/v86cOWOZExMTIw8PD1WvXt0y5/p1ZM7JXAcAAAAAAADMZ7N7Srm7u6tmzZpWY8WKFVPJkiUt43369NHw4cNVokQJeXh4aNCgQQoJCVGjRo0kSa1atVL16tXVvXt3TZ8+XfHx8Ro7dqwiIiLk7OwsSRowYIDmzZun0aNHq3fv3tqyZYtWrVql6Ohoc3cYAAAAAAAAFja90fntzJ49W0WKFFHHjh2VnJys0NBQvf3225bl9vb2Wrt2rV544QWFhISoWLFiCg8P16RJkyxzAgMDFR0drWHDhmnu3LkqW7asFi1apNDQUFvsEgAAAAAAAHSPhVLbtm2zeu/i4qLIyEhFRkbe9DMBAQFat27dLdfbrFkz7du3Ly9KBAAAAAAAQB6w2T2lAAAAAAAAcP8ilAIAAAAAAIDpCKUAAAAAAABgOkIpAAAAAAAAmI5QCgAAAAAAAKYjlAIAAAAAAIDpCKUAAAAAAABgOkIpAAAAAAAAmI5QCgAAoACbNm2a7OzsNHToUMvY1atXFRERoZIlS8rNzU0dO3ZUQkKC1edOnDihsLAwFS1aVD4+Pho1apTS0tKs5mzbtk1169aVs7OzgoKCFBUVZcIeAQCA+wWhFAAAQAG1Z88evfPOO6pdu7bV+LBhw/TFF19o9erV2r59u06dOqUOHTpYlqenpyssLEwpKSnatWuXli5dqqioKI0bN84y59ixYwoLC1Pz5s0VFxenoUOHqm/fvtq4caNp+wcAAAo3QikAAIAC6OLFi+rataveffddFS9e3DKemJioxYsXa9asWWrRooWCg4O1ZMkS7dq1S998840kadOmTTp06JA++OAD1alTR23atNHkyZMVGRmplJQUSdKCBQsUGBiomTNnqlq1aho4cKA6deqk2bNn37Sm5ORkJSUlWb0AAABuhlAKAACgAIqIiFBYWJhatmxpNR4bG6vU1FSr8apVq6pcuXLavXu3JGn37t2qVauWfH19LXNCQ0OVlJSkgwcPWubcuO7Q0FDLOrIzdepUeXp6Wl7+/v53vZ8AAKDwIpQCAAAoYFauXKm9e/dq6tSpWZbFx8fLyclJXl5eVuO+vr6Kj4+3zLk+kMpcnrnsVnOSkpJ05cqVbOsaM2aMEhMTLa+TJ0/mav8AAMD9wcHWBQAAAODOnTx5UkOGDFFMTIxcXFxsXY4VZ2dnOTs727oMAABQQHClFAAAQAESGxurM2fOqG7dunJwcJCDg4O2b9+u//3vf3JwcJCvr69SUlJ0/vx5q88lJCTIz89PkuTn55flaXyZ7283x8PDQ66urvm0dwAA4H5CKAUAAFCAPPbYY9q/f7/i4uIsr3r16qlr166WPzs6Omrz5s2Wzxw+fFgnTpxQSEiIJCkkJET79+/XmTNnLHNiYmLk4eGh6tWrW+Zcv47MOZnrAAAAuFt8fQ8AAKAAcXd3V82aNa3GihUrppIlS1rG+/Tpo+HDh6tEiRLy8PDQoEGDFBISokaNGkmSWrVqperVq6t79+6aPn264uPjNXbsWEVERFi+fjdgwADNmzdPo0ePVu/evbVlyxatWrVK0dHR5u4wAAAotAilAAAACpnZs2erSJEi6tixo5KTkxUaGqq3337bstze3l5r167VCy+8oJCQEBUrVkzh4eGaNGmSZU5gYKCio6M1bNgwzZ07V2XLltWiRYsUGhpqi10CAACFEKEUAABAAbdt2zar9y4uLoqMjFRkZORNPxMQEKB169bdcr3NmjXTvn378qJEAACALLinFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExn01Bq/vz5ql27tjw8POTh4aGQkBCtX7/esvzq1auKiIhQyZIl5ebmpo4dOyohIcFqHSdOnFBYWJiKFi0qHx8fjRo1SmlpaVZztm3bprp168rZ2VlBQUGKiooyY/cAAAAAAABwEzYNpcqWLatp06YpNjZW33//vVq0aKGnn35aBw8elCQNGzZMX3zxhVavXq3t27fr1KlT6tChg+Xz6enpCgsLU0pKinbt2qWlS5cqKipK48aNs8w5duyYwsLC1Lx5c8XFxWno0KHq27evNm7caPr+AgAAAAAA4F92hmEYti7ieiVKlNCMGTPUqVMneXt7a8WKFerUqZMk6eeff1a1atW0e/duNWrUSOvXr9cTTzyhU6dOydfXV5K0YMECvfTSSzp79qycnJz00ksvKTo6WgcOHLBso0uXLjp//rw2bNiQbQ3JyclKTk62vE9KSpK/v78SExPl4eGRj3sPAMD9q0/Unlx/dnHP+nlYibWkpCR5enrSB+QCxw4AgPx1N/2TlH891J32APfMPaXS09O1cuVKXbp0SSEhIYqNjVVqaqpatmxpmVO1alWVK1dOu3fvliTt3r1btWrVsgRSkhQaGqqkpCTL1Va7d++2WkfmnMx1ZGfq1Kny9PS0vPz9/fNyVwEAAAAAAO57Ng+l9u/fLzc3Nzk7O2vAgAH69NNPVb16dcXHx8vJyUleXl5W8319fRUfHy9Jio+PtwqkMpdnLrvVnKSkJF25ciXbmsaMGaPExETL6+TJk3mxqwAAAAAAAPj/HGxdQJUqVRQXF6fExER9/PHHCg8P1/bt221ak7Ozs5ydnW1aAwAAAAAAQGFm81DKyclJQUFBkqTg4GDt2bNHc+fOVefOnZWSkqLz589bXS2VkJAgPz8/SZKfn5++++47q/VlPp3v+jk3PrEvISFBHh4ecnV1za/dAgAAAAAAwC3Y/Ot7N8rIyFBycrKCg4Pl6OiozZs3W5YdPnxYJ06cUEhIiCQpJCRE+/fv15kzZyxzYmJi5OHhoerVq1vmXL+OzDmZ6wAAAAAAAID5bHql1JgxY9SmTRuVK1dOFy5c0IoVK7Rt2zZt3LhRnp6e6tOnj4YPH64SJUrIw8NDgwYNUkhIiBo1aiRJatWqlapXr67u3btr+vTpio+P19ixYxUREWH5+t2AAQM0b948jR49Wr1799aWLVu0atUqRUdH23LXAQAAAAAA7ms2DaXOnDmjHj166PTp0/L09FTt2rW1ceNGPf7445Kk2bNnq0iRIurYsaOSk5MVGhqqt99+2/J5e3t7rV27Vi+88IJCQkJUrFgxhYeHa9KkSZY5gYGBio6O1rBhwzR37lyVLVtWixYtUmhoqOn7CwAAAAAAgH/ZNJRavHjxLZe7uLgoMjJSkZGRN50TEBCgdevW3XI9zZo10759+3JVIwAAAAAAAPLePXdPKQAAAAAAABR+hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAUIDMnz9ftWvXloeHhzw8PBQSEqL169dbll+9elUREREqWbKk3Nzc1LFjRyUkJFit48SJEwoLC1PRokXl4+OjUaNGKS0tzWrOtm3bVLduXTk7OysoKEhRUVFm7B4AALiPEEoBAAAUIGXLltW0adMUGxur77//Xi1atNDTTz+tgwcPSpKGDRumL774QqtXr9b27dt16tQpdejQwfL59PR0hYWFKSUlRbt27dLSpUsVFRWlcePGWeYcO3ZMYWFhat68ueLi4jR06FD17dtXGzduNH1/AQBA4WVnGIZh6yLudUlJSfL09FRiYqI8PDxsXQ4AAIVSn6g9uf7s4p7187ASawWhDyhRooRmzJihTp06ydvbWytWrFCnTp0kST///LOqVaum3bt3q1GjRlq/fr2eeOIJnTp1Sr6+vpKkBQsW6KWXXtLZs2fl5OSkl156SdHR0Tpw4IBlG126dNH58+e1YcOGO66rIBw7AAAKsrvpn6T866HutAfgSikAAIACKj09XStXrtSlS5cUEhKi2NhYpaamqmXLlpY5VatWVbly5bR7925J0u7du1WrVi1LICVJoaGhSkpKslxttXv3bqt1ZM7JXMfNJCcnKykpyeoFAABwM4RSAAAABcz+/fvl5uYmZ2dnDRgwQJ9++qmqV6+u+Ph4OTk5ycvLy2q+r6+v4uPjJUnx8fFWgVTm8sxlt5qTlJSkK1eu3LSuqVOnytPT0/Ly9/e/210FAACFGKEUAABAAVOlShXFxcXp22+/1QsvvKDw8HAdOnTI1mVpzJgxSkxMtLxOnjxp65IAAMA9zMHWBQAAACBnnJycFBQUJEkKDg7Wnj17NHfuXHXu3FkpKSk6f/681dVSCQkJ8vPzkyT5+fnpu+++s1pf5tP5rp9z4xP7EhIS5OHhIVdX15vW5ezsLGdn57vePwAAcH/gSikAAIACLiMjQ8nJyQoODpajo6M2b95sWXb48GGdOHFCISEhkqSQkBDt379fZ86cscyJiYmRh4eHqlevbplz/Toy52SuAwAAIC9wpRQAAEABMmbMGLVp00blypXThQsXtGLFCm3btk0bN26Up6en+vTpo+HDh6tEiRLy8PDQoEGDFBISokaNGkmSWrVqperVq6t79+6aPn264uPjNXbsWEVERFiuchowYIDmzZun0aNHq3fv3tqyZYtWrVql6OhoW+46AAAoZAilAAAACpAzZ86oR48eOn36tDw9PVW7dm1t3LhRjz/+uCRp9uzZKlKkiDp27Kjk5GSFhobq7bfftnze3t5ea9eu1QsvvKCQkBAVK1ZM4eHhmjRpkmVOYGCgoqOjNWzYMM2dO1dly5bVokWLFBoaavr+AgCAwitXodTRo0dVoUKFvK4FAACgUMuLHmrx4sW3XO7i4qLIyEhFRkbedE5AQIDWrVt3y/U0a9ZM+/bty1WNAAAAdyJX95QKCgpS8+bN9cEHH+jq1at5XRMAAEChRA8FAABwTa5Cqb1796p27doaPny4/Pz81L9//yxPcQEAAIA1eigAAIBrchVK1alTR3PnztWpU6f03nvv6fTp02rcuLFq1qypWbNm6ezZs3ldJwAAQIFHDwUAAHBNrkKpTA4ODurQoYNWr16tN954Q7/99ptGjhwpf39/yw04AQAAYI0eCgAA4C5Dqe+//14vvviiSpcurVmzZmnkyJE6cuSIYmJidOrUKT399NN5VScAAEChQQ8FAACQy6fvzZo1S0uWLNHhw4fVtm1bLVu2TG3btlWRIv9mXIGBgYqKilL58uXzslYAAIACjR4KAADgmlyFUvPnz1fv3r3Vs2dPlS5dOts5Pj4+t31kMQAAwP2EHgoAAOCaXIVSv/76623nODk5KTw8PDerBwAAKJTooQAAAK7J1T2llixZotWrV2cZX716tZYuXXrXRQEAABRG9FAAAADX5CqUmjp1qkqVKpVl3MfHR6+//vpdFwUAAFAY0UMBAABck6tQ6sSJEwoMDMwyHhAQoBMnTtx1UQAAAIURPRQAAMA1uQqlfHx89OOPP2YZ/+GHH1SyZMm7LgoAAKAwoocCAAC4Jleh1LPPPqvBgwdr69atSk9PV3p6urZs2aIhQ4aoS5cueV0jAABAoUAPBQAAcE2unr43efJkHT9+XI899pgcHP5dRUZGhnr06MH9EAAAAG6CHgoAAOCaXIVSTk5O+uijjzR58mT98MMPcnV1Va1atRQQEJDX9QEAABQa9FAAAADX5CqUylS5cmVVrlw5r2oBAAC4L9BDAQAA5DKUSk9PV1RUlDZv3qwzZ84oIyPDavmWLVvypDgAAIDChB4KAADgmlyFUkOGDFFUVJTCwsJUs2ZN2dnZ5XVdAAAAhQ49FAAAwDW5CqVWrlypVatWqW3btnldDwAAQKFFDwUAAHBNkdx8yMnJSUFBQXldCwAAQKFGDwUAAHBNrkKpESNGaO7cuTIMI6/rAQAAKLTooQAAAK7J1df3duzYoa1bt2r9+vWqUaOGHB0drZavWbMmT4oDAAAoTOihAAAArslVKOXl5aX27dvndS0AAACFGj0UAADANbkKpZYsWZLXdQAAABR69FAAAADX5OqeUpKUlpamL7/8Uu+8844uXLggSTp16pQuXryYZ8UBAAAUNvRQAAAA/8rVlVK///67WrdurRMnTig5OVmPP/643N3d9cYbbyg5OVkLFizI6zoBAAAKPHooAACAa3J1pdSQIUNUr149nTt3Tq6urpbx9u3ba/PmzXlWHAAAQGFCDwUAAHBNrq6U+vrrr7Vr1y45OTlZjZcvX15//vlnnhQGAABQ2NBDAQAAXJOrK6UyMjKUnp6eZfyPP/6Qu7v7XRcFAABQGNFDAQAAXJOrUKpVq1aaM2eO5b2dnZ0uXryo8ePHq23btnlVGwAAQKFCDwUAAHBNrr6+N3PmTIWGhqp69eq6evWqnnvuOf36668qVaqUPvzww7yu8b7QJ2pPrj+7uGf9PKwEAADkF3ooAACAa3IVSpUtW1Y//PCDVq5cqR9//FEXL15Unz591LVrV6ubdgIAAOAaeigAAIBrchVKSZKDg4O6deuWl7UAAAAUevRQAAAA/8pVKLVs2bJbLu/Ro0euigEAACjM6KEAAACuyVUoNWTIEKv3qampunz5spycnFS0aFEaKgAAgGzQQwEAAFyTq6fvnTt3zup18eJFHT58WI0bN+YmnQAAADdBDwUAAHBNrkKp7FSqVEnTpk3LcgYQAAAAN0cPBQAA7ld5FkpJ/96489SpU3m5SgAAgEKPHgoAANyPcnVPqf/7v/+zem8Yhk6fPq158+bpkUceyZPCAAAACht6KAAAgGtyFUq1a9fO6r2dnZ28vb3VokULzZw5My/qAgAAKHTooQAAAK7JVSiVkZGR13UAAAAUevRQAAAA1+TpPaUAAAAAAACAO5GrK6WGDx9+x3NnzZqVm00AAAAUOvRQAAAA1+QqlNq3b5/27dun1NRUValSRZL0yy+/yN7eXnXr1rXMs7Ozy5sqAQAACgF6KAAAgGtyFUo9+eSTcnd319KlS1W8eHFJ0rlz59SrVy89+uijGjFiRJ4WCQAAUBjQQwEAAFyTq3tKzZw5U1OnTrU0U5JUvHhxTZkyhSfHAAAA3AQ9FAAAwDW5CqWSkpJ09uzZLONnz57VhQsX7rooAACAwogeCgAA4JpchVLt27dXr169tGbNGv3xxx/6448/9Mknn6hPnz7q0KFDXtcIAABQKNBDAQAAXJOre0otWLBAI0eO1HPPPafU1NR/V+TgoD59+mjGjBl5WiAAAEBhQQ8FAABwTa5CqaJFi+rtt9/WjBkzdOTIEUlSxYoVVaxYsTwtDgAAoDChhwIAALgmV1/fy3T69GmdPn1alSpVUrFixWQYRl7VBQAAUGjRQwEAAOQylPr777/12GOPqXLlymrbtq1Onz4tSerTpw+PMgYAALgJeigAAIBrchVKDRs2TI6Ojjpx4oSKFi1qGe/cubM2bNiQZ8UBAAAUJvRQAAAA1+TqnlKbNm3Sxo0bVbZsWavxSpUq6ffff8+TwgAAAAobeigAAIBrcnWl1KVLl6zO7mX6559/5OzsfNdFAQAAFEb0UAAAANfkKpR69NFHtWzZMst7Ozs7ZWRkaPr06WrevHmeFQcAAFCY0EMBAABck6uv702fPl2PPfaYvv/+e6WkpGj06NE6ePCg/vnnH+3cuTOvawQAACgU6KEAAACuydWVUjVr1tQvv/yixo0b6+mnn9alS5fUoUMH7du3TxUrVszrGgEAAAoFeigAAIBrcnylVGpqqlq3bq0FCxbo1VdfzY+aAAAACh16KAAAAGs5vlLK0dFRP/74Y37UAgAAUGjRQwEAAFjL1df3unXrpsWLF+d1LQAAAIUaPRQAAMA1ubrReVpamt577z19+eWXCg4OVrFixayWz5o1K0+KAwAAKEzooQAAAK7JUSh19OhRlS9fXgcOHFDdunUlSb/88ovVHDs7u7yrDgAAoBCghwIAAMgqR6FUpUqVdPr0aW3dulWS1LlzZ/3vf/+Tr69vvhQHAABQGNBDAQAAZJWje0oZhmH1fv369bp06VKeFgQAAFDY0EMBAABklasbnWe6scECAADA7dFDAQAA5DCUsrOzy3K/A+5/AAAAcGv0UAAAAFnl6J5ShmGoZ8+ecnZ2liRdvXpVAwYMyPLkmDVr1uRdhQAAAAUcPRQAAEBWOQqlwsPDrd5369YtT4sBAAAojOihAAAAsspRKLVkyZL8qgMAAKDQoocCAADI6q5udA4AAAAAAADkBqEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0Ng2lpk6dqvr168vd3V0+Pj5q166dDh8+bDXn6tWrioiIUMmSJeXm5qaOHTsqISHBas6JEycUFhamokWLysfHR6NGjVJaWprVnG3btqlu3bpydnZWUFCQoqKi8nv3AAAAAAAAcBM2DaW2b9+uiIgIffPNN4qJiVFqaqpatWqlS5cuWeYMGzZMX3zxhVavXq3t27fr1KlT6tChg2V5enq6wsLClJKSol27dmnp0qWKiorSuHHjLHOOHTumsLAwNW/eXHFxcRo6dKj69u2rjRs3mrq/AAAAAAAA+JeDLTe+YcMGq/dRUVHy8fFRbGysmjRposTERC1evFgrVqxQixYtJElLlixRtWrV9M0336hRo0batGmTDh06pC+//FK+vr6qU6eOJk+erJdeekkTJkyQk5OTFixYoMDAQM2cOVOSVK1aNe3YsUOzZ89WaGholrqSk5OVnJxseZ+UlJSPRwEAAAAAAOD+c0/dUyoxMVGSVKJECUlSbGysUlNT1bJlS8ucqlWrqly5ctq9e7ckaffu3apVq5Z8fX0tc0JDQ5WUlKSDBw9a5ly/jsw5meu40dSpU+Xp6Wl5+fv7591OAgAAAAAA4N4JpTIyMjR06FA98sgjqlmzpiQpPj5eTk5O8vLysprr6+ur+Ph4y5zrA6nM5ZnLbjUnKSlJV65cyVLLmDFjlJiYaHmdPHkyT/YRAAAAAAAA/7Lp1/euFxERoQMHDmjHjh22LkXOzs5ydna2dRkAAAAAAACF1j1xpdTAgQO1du1abd26VWXLlrWM+/n5KSUlRefPn7ean5CQID8/P8ucG5/Gl/n+dnM8PDzk6uqa17sDAAAAAACA27BpKGUYhgYOHKhPP/1UW7ZsUWBgoNXy4OBgOTo6avPmzZaxw4cP68SJEwoJCZEkhYSEaP/+/Tpz5oxlTkxMjDw8PFS9enXLnOvXkTkncx0AAAAAAAAwl02/vhcREaEVK1bo888/l7u7u+UeUJ6ennJ1dZWnp6f69Omj4cOHq0SJEvLw8NCgQYMUEhKiRo0aSZJatWql6tWrq3v37po+fbri4+M1duxYRUREWL6CN2DAAM2bN0+jR49W7969tWXLFq1atUrR0dE223cAAAAAAID7mU2vlJo/f74SExPVrFkzlS5d2vL66KOPLHNmz56tJ554Qh07dlSTJk3k5+enNWvWWJbb29tr7dq1sre3V0hIiLp166YePXpo0qRJljmBgYGKjo5WTEyMHnzwQc2cOVOLFi1SaGioqfsLAABwt6ZOnar69evL3d1dPj4+ateunQ4fPmw15+rVq4qIiFDJkiXl5uamjh07ZrmVwYkTJxQWFqaiRYvKx8dHo0aNUlpamtWcbdu2qW7dunJ2dlZQUJCioqLye/cAAMB9xKZXShmGcds5Li4uioyMVGRk5E3nBAQEaN26dbdcT7NmzbRv374c1wgAAHAv2b59uyIiIlS/fn2lpaXplVdeUatWrXTo0CEVK1ZMkjRs2DBFR0dr9erV8vT01MCBA9WhQwft3LlTkpSenq6wsDD5+flp165dOn36tHr06CFHR0e9/vrrkqRjx44pLCxMAwYM0PLly7V582b17dtXpUuX5sQeAADIE/fM0/cAAABwexs2bLB6HxUVJR8fH8XGxqpJkyZKTEzU4sWLtWLFCrVo0UKStGTJElWrVk3ffPONGjVqpE2bNunQoUP68ssv5evrqzp16mjy5Ml66aWXNGHCBDk5OWnBggUKDAzUzJkzJUnVqlXTjh07NHv2bEIpAACQJ+6Jp+8BAAAgdxITEyVJJUqUkCTFxsYqNTVVLVu2tMypWrWqypUrp927d0uSdu/erVq1asnX19cyJzQ0VElJSTp48KBlzvXryJyTuY7sJCcnKykpyeoFAABwM4RSAAAABVRGRoaGDh2qRx55RDVr1pQkxcfHy8nJSV5eXlZzfX19LQ+ViY+PtwqkMpdnLrvVnKSkJF25ciXbeqZOnSpPT0/Ly9/f/673EQAAFF6EUgAAAAVURESEDhw4oJUrV9q6FEnSmDFjlJiYaHmdPHnS1iUBAIB7GPeUAgAAKIAGDhyotWvX6quvvlLZsmUt435+fkpJSdH58+etrpZKSEiQn5+fZc53331ntb7Mp/NdP+fGJ/YlJCTIw8NDrq6u2dbk7OwsZ2fnu943AABwf+BKKQAAgALEMAwNHDhQn376qbZs2aLAwECr5cHBwXJ0dNTmzZstY4cPH9aJEycUEhIiSQoJCdH+/ft15swZy5yYmBh5eHioevXqljnXryNzTuY6AAAA7hZXSgEAABQgERERWrFihT7//HO5u7tb7gHl6ekpV1dXeXp6qk+fPho+fLhKlCghDw8PDRo0SCEhIWrUqJEkqVWrVqpevbq6d++u6dOnKz4+XmPHjlVERITlSqcBAwZo3rx5Gj16tHr37q0tW7Zo1apVio6Ottm+AwCAwoUrpQAAAAqQ+fPnKzExUc2aNVPp0qUtr48++sgyZ/bs2XriiSfUsWNHNWnSRH5+flqzZo1lub29vdauXSt7e3uFhISoW7du6tGjhyZNmmSZExgYqOjoaMXExOjBBx/UzJkztWjRIoWGhpq6vwAAoPDiSikAAIACxDCM285xcXFRZGSkIiMjbzonICBA69atu+V6mjVrpn379uW4RgAAgDvBlVIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANMRSgEAAAAAAMB0hFIAAAAAAAAwHaEUAAAAAAAATEcoBQAAAAAAANPZNJT66quv9OSTT6pMmTKys7PTZ599ZrXcMAyNGzdOpUuXlqurq1q2bKlff/3Vas4///yjrl27ysPDQ15eXurTp48uXrxoNefHH3/Uo48+KhcXF/n7+2v69On5vWsAAAAAAAC4BZuGUpcuXdKDDz6oyMjIbJdPnz5d//vf/7RgwQJ9++23KlasmEJDQ3X16lXLnK5du+rgwYOKiYnR2rVr9dVXX6lfv36W5UlJSWrVqpUCAgIUGxurGTNmaMKECVq4cGG+7x8AAAAAAACy52DLjbdp00Zt2rTJdplhGJozZ47Gjh2rp59+WpK0bNky+fr66rPPPlOXLl30008/acOGDdqzZ4/q1asnSXrrrbfUtm1bvfnmmypTpoyWL1+ulJQUvffee3JyclKNGjUUFxenWbNmWYVX10tOTlZycrLlfVJSUh7vOQAAAAAAwP3tnr2n1LFjxxQfH6+WLVtaxjw9PdWwYUPt3r1bkrR79255eXlZAilJatmypYoUKaJvv/3WMqdJkyZycnKyzAkNDdXhw4d17ty5bLc9depUeXp6Wl7+/v75sYsAAAAAAAD3rXs2lIqPj5ck+fr6Wo37+vpalsXHx8vHx8dquYODg0qUKGE1J7t1XL+NG40ZM0aJiYmW18mTJ+9+hwAAAAAAAGBxz4ZStuTs7CwPDw+rFwAAwL2Ch8UAAIDC4J4Npfz8/CRJCQkJVuMJCQmWZX5+fjpz5ozV8rS0NP3zzz9Wc7Jbx/XbAAAAKEh4WAwAACgM7tlQKjAwUH5+ftq8ebNlLCkpSd9++61CQkIkSSEhITp//rxiY2Mtc7Zs2aKMjAw1bNjQMuerr75SamqqZU5MTIyqVKmi4sWLm7Q3AAAAeadNmzaaMmWK2rdvn2XZjQ+LqV27tpYtW6ZTp05ZrqjKfFjMokWL1LBhQzVu3FhvvfWWVq5cqVOnTkmS1cNiatSooS5dumjw4MGaNWvWTetKTk5WUlKS1QsAAOBmbBpKXbx4UXFxcYqLi5P0783N4+LidOLECdnZ2Wno0KGaMmWK/u///k/79+9Xjx49VKZMGbVr106SVK1aNbVu3VrPP/+8vvvuO+3cuVMDBw5Uly5dVKZMGUnSc889JycnJ/Xp00cHDx7URx99pLlz52r48OE22msAAID8w8NiAABAQWHTUOr777/XQw89pIceekiSNHz4cD300EMaN26cJGn06NEaNGiQ+vXrp/r16+vixYvasGGDXFxcLOtYvny5qlatqscee0xt27ZV48aNrS4r9/T01KZNm3Ts2DEFBwdrxIgRGjdunNXl6QAAAIUFD4sBAAAFhYMtN96sWTMZhnHT5XZ2dpo0aZImTZp00zklSpTQihUrbrmd2rVr6+uvv851nQAAALg9Z2dnOTs727oMAABQQNyz95QCAABAzvGwGAAAUFAQSgEAABQiPCwGAAAUFIRSAAAABQwPiwEAAIWBTe8pBQAAgJz7/vvv1bx5c8v7zKAoPDxcUVFRGj16tC5duqR+/frp/Pnzaty4cbYPixk4cKAee+wxFSlSRB07dtT//vc/y/LMh8VEREQoODhYpUqV4mExAAAgTxFKAQAAFDA8LAYAABQGfH0PAAAAAAAApiOUAgAAAAAAgOkIpQAAAAAAAGA6QikAAAAAAACYjlAKAAAAAAAApiOUAgAAAAAAgOkIpQAAAAAAAGA6QikAAAAAAACYjlAKAAAAAAAApiOUAgAAAAAAgOkIpQAAAAAAAGA6QikAAAAAAACYjlAKAAAAAAAApiOUAgAAAAAAgOkIpQAAAAAAAGA6QikAAAAAAACYjlAKAAAAAAAApiOUAgAAAAAAgOkIpQAAAAAAAGA6QikAAAAAAACYjlAKAAAAAAAApiOUAgAAAAAAgOkIpQAAAAAAAGA6QikAAAAAAACYjlAKAAAAAAAApiOUAgAAAAAAgOkIpQAAAAAAAGA6QikAAAAAAACYjlAKAAAAAAAApiOUAgAAAAAAgOkIpQAAAAAAAGA6QikAAAAAAACYjlAKAAAAAAAApiOUAgAAAAAAgOkIpQAAAAAAAGA6QikAAAAAAACYjlAKAAAAAAAApiOUAgAAAAAAgOkIpQAAAAAAAGA6QikAAAAAAACYjlAKAAAAAAAApiOUAgAAAAAAgOkIpQAAAAAAAGA6QikAAAAAAACYzsHWBQAAAAC50Sdqz119fnHP+nlUCQAAyA2ulAIAAAAAAIDpuFIKAAAAAADARu72yt+CjFAKAAAA96W7+Z8AvvoHAMDd4+t7AAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAExHKAUAAAAAAADTEUoBAAAAAADAdIRSAAAAAAAAMB2hFAAAAAAAAEznYOsCAAAAAAAACqo+UXtsXUKBRSgFAAAAAMjW3fzP9uKe9fOwEgCFEV/fAwAAAAAAgOkIpQAAAAAAAGA6vr4HAAAAAADuW9wTyna4UgoAAAAAAACmI5QCAAAAAACA6QilAAAAAAAAYDpCKQAAAAAAAJiOUAoAAAAAAACm4+l7AAAAQA7d7ZOaFvesn0eVAABQcHGlFAAAAAAAAEzHlVIAACBP3O2VIwAAALlFH1IwcaUUAAAAAAAATMeVUgAAFCKcJQQAAEBBwZVSAAAAAAAAMB2hFAAAAAAAAEzH1/cAAAAAFHp3+/XmxT3r51ElAIBMhFIAAAAAAMCmuC/m/YlQCgAAADDZ3fzPF1fsAAAKC0IpALjH8XWDgoczfQAAoCCih4HZCKUAALgBDRmAe1lBPllRkGsHAOQ9QikAQKFEsAQA2SMYAgo3eiAUJIRSAFDI2bIx4X9cAAC4fxGAArgdQikA9wWaItvgRr4AAAA5w5VOuJ8QSgEoMGz5FzThCgAAgLlseVKRE5qAOQilAOQI4QzMwllCAABQUNHHAHeGUKoQ4AzC/aegBkP361/O9+t+AwCQ17hqGjlBDwbc+wilYFP8RWE+jjkAALgb9BI5x4lcAMjefRVKRUZGasaMGYqPj9eDDz6ot956Sw0aNLB1WTZHYwEAAG6G/gmwPfp1AIVVEVsXYJaPPvpIw4cP1/jx47V37149+OCDCg0N1ZkzZ2xdGgAAwD2J/gkAAOSn++ZKqVmzZun5559Xr169JEkLFixQdHS03nvvPb388stWc5OTk5WcnGx5n5iYKElKSkrKt/pSrlzMt3UDAFDY5eff0ZnrNgwj37Zxr8pJ/ySZ30PRPwEAcHfy6+/oO+2f7otQKiUlRbGxsRozZoxlrEiRImrZsqV2796dZf7UqVM1ceLELOP+/v75WicAAMidD17M/21cuHBBnp6e+b+he0RO+yeJHgoAgIImv3uo2/VP90Uo9ddffyk9PV2+vr5W476+vvr555+zzB8zZoyGDx9ueZ+RkaF//vlHJUuWlJ2dXZ7Xl5SUJH9/f508eVIeHh55vn5kxTE3H8fcNjju5uOYmy+/j7lhGLpw4YLKlCmT5+u+l+W0f5Lyr4fi3yvb4LjbBsfddjj2tsFxt538PPZ32j/dF6FUTjk7O8vZ2dlqzMvLK9+36+Hhwb+EJuOYm49jbhscd/NxzM2Xn8f8frpC6m7kdw/Fv1e2wXG3DY677XDsbYPjbjv5dezvpH+6L250XqpUKdnb2yshIcFqPCEhQX5+fjaqCgAA4N5F/wQAAPLbfRFKOTk5KTg4WJs3b7aMZWRkaPPmzQoJCbFhZQAAAPcm+icAAJDf7puv7w0fPlzh4eGqV6+eGjRooDlz5ujSpUuWp8nYkrOzs8aPH5/lcnfkH465+TjmtsFxNx/H3Hwc8/xzr/RP/Ixtg+NuGxx32+HY2wbH3XbuhWNvZ9xHzzeeN2+eZsyYofj4eNWpU0f/+9//1LBhQ1uXBQAAcM+ifwIAAPnlvgqlAAAAAAAAcG+4L+4pBQAAAAAAgHsLoRQAAAAAAABMRygFAAAAAAAA0xFKAQAAAAAAwHSEUiaJjIxU+fLl5eLiooYNG+q777675fzVq1eratWqcnFxUa1atbRu3TqTKi08cnLM3333XT366KMqXry4ihcvrpYtW972Z4Sscvp7nmnlypWys7NTu3bt8rfAQiinx/z8+fOKiIhQ6dKl5ezsrMqVK/Pfl1zI6XGfM2eOqlSpIldXV/n7+2vYsGG6evWqSdUWfF999ZWefPJJlSlTRnZ2dvrss89u+5lt27apbt26cnZ2VlBQkKKiovK9TtwdeiXboF+yDXom26F3sg16J/MVmP7JQL5buXKl4eTkZLz33nvGwYMHjeeff97w8vIyEhISsp2/c+dOw97e3pg+fbpx6NAhY+zYsYajo6Oxf/9+kysvuHJ6zJ977jkjMjLS2Ldvn/HTTz8ZPXv2NDw9PY0//vjD5MoLrpwe80zHjh0zHnjgAePRRx81nn76aXOKLSRyesyTk5ONevXqGW3btjV27NhhHDt2zNi2bZsRFxdncuUFW06P+/Llyw1nZ2dj+fLlxrFjx4yNGzcapUuXNoYNG2Zy5QXXunXrjFdffdVYs2aNIcn49NNPbzn/6NGjRtGiRY3hw4cbhw4dMt566y3D3t7e2LBhgzkFI8folWyDfsk26Jlsh97JNuidbKOg9E+EUiZo0KCBERERYXmfnp5ulClTxpg6dWq285955hkjLCzMaqxhw4ZG//7987XOwiSnx/xGaWlphru7u7F06dL8KrHQyc0xT0tLMx5++GFj0aJFRnh4OA1WDuX0mM+fP9+oUKGCkZKSYlaJhVJOj3tERITRokULq7Hhw4cbjzzySL7WWVjdSVM1evRoo0aNGlZjnTt3NkJDQ/OxMtwNeiXboF+yDXom26F3sg16J9u7l/snvr6Xz1JSUhQbG6uWLVtaxooUKaKWLVtq9+7d2X5m9+7dVvMlKTQ09KbzYS03x/xGly9fVmpqqkqUKJFfZRYquT3mkyZNko+Pj/r06WNGmYVKbo75//3f/ykkJEQRERHy9fVVzZo19frrrys9Pd2ssgu83Bz3hx9+WLGxsZbL1I8ePap169apbdu2ptR8P+Lv0YKFXsk26Jdsg57JduidbIPeqeCw1d+tDvm6duivv/5Senq6fH19rcZ9fX31888/Z/uZ+Pj4bOfHx8fnW52FSW6O+Y1eeukllSlTJsu/lMhebo75jh07tHjxYsXFxZlQYeGTm2N+9OhRbdmyRV27dtW6dev022+/6cUXX1RqaqrGjx9vRtkFXm6O+3PPPae//vpLjRs3lmEYSktL04ABA/TKK6+YUfJ96WZ/jyYlJenKlStydXW1UWXIDr2SbdAv2QY9k+3QO9kGvVPBYav+iSulgBtMmzZNK1eu1KeffioXFxdbl1MoXbhwQd27d9e7776rUqVK2bqc+0ZGRoZ8fHy0cOFCBQcHq3Pnznr11Ve1YMECW5dWqG3btk2vv/663n77be3du1dr1qxRdHS0Jk+ebOvSACDX6JfMQc9kW/ROtkHvdH/hSql8VqpUKdnb2yshIcFqPCEhQX5+ftl+xs/PL0fzYS03xzzTm2++qWnTpunLL79U7dq187PMQiWnx/zIkSM6fvy4nnzySctYRkaGJMnBwUGHDx9WxYoV87foAi43v+elS5eWo6Oj7O3tLWPVqlVTfHy8UlJS5OTklK81Fwa5Oe7//e9/1b17d/Xt21eSVKtWLV26dEn9+vXTq6++qiJFOD+U127296iHhwdXSd2D6JVsg37JNuiZbIfeyTbonQoOW/VP/DTzmZOTk4KDg7V582bLWEZGhjZv3qyQkJBsPxMSEmI1X5JiYmJuOh/WcnPMJWn69OmaPHmyNmzYoHr16plRaqGR02NetWpV7d+/X3FxcZbXU089pebNmysuLk7+/v5mll8g5eb3/JFHHtFvv/1maWYl6ZdfflHp0qVpqu5Qbo775cuXszRPmc2tYRj5V+x9jL9HCxZ6JdugX7INeibboXeyDXqngsNmf7fm623UYRjGv4/AdHZ2NqKiooxDhw4Z/fr1M7y8vIz4+HjDMAyje/fuxssvv2yZv3PnTsPBwcF48803jZ9++skYP348jznOoZwe82nTphlOTk7Gxx9/bJw+fdryunDhgq12ocDJ6TG/EU+SybmcHvMTJ04Y7u7uxsCBA43Dhw8ba9euNXx8fIwpU6bYahcKpJwe9/Hjxxvu7u7Ghx9+aBw9etTYtGmTUbFiReOZZ56x1S4UOBcuXDD27dtn7Nu3z5BkzJo1y9i3b5/x+++/G4ZhGC+//LLRvXt3y/zMRxqPGjXK+Omnn4zIyEhTHmmM3KNXsg36JdugZ7IdeifboHeyjYLSPxFKmeStt94yypUrZzg5ORkNGjQwvvnmG8uypk2bGuHh4VbzV61aZVSuXNlwcnIyatSoYURHR5tcccGXk2MeEBBgSMryGj9+vPmFF2A5/T2/Hg1W7uT0mO/atcto2LCh4ezsbFSoUMF47bXXjLS0NJOrLvhyctxTU1ONCRMmGBUrVjRcXFwMf39/48UXXzTOnTtnfuEF1NatW7P9b3TmcQ4PDzeaNm2a5TN16tQxnJycjAoVKhhLliwxvW7kDL2SbdAv2QY9k+3QO9kGvZP5Ckr/ZGcYXP8GAAAAAAAAc3FPKQAAAAAAAJiOUAoAAAAAAACmI5QCAAAAAACA6QilAAAAAAAAYDpCKQAAAAAAAJiOUAoAAAAAAACmI5QCAAAAAACA6QilAAAAAAAAYDpCKQCFVlRUlLy8vEzfbrNmzTR06FDTtwsAAHCnbNUnAcD1CKUAFFqdO3fWL7/8Ynk/YcIE1alTJ8fr6dmzp+zs7DRgwIAsyyIiImRnZ6eePXtaxtasWaPJkyfnaBt2dnaWl4eHh+rXr6/PP//cas6aNWv0+OOPy9vbWx4eHgoJCdHGjRtzvD8AAAC26pPM0qxZM0tv5eLiosqVK2vq1KkyDMMy54cfftCzzz4rf39/ubq6qlq1apo7d67ptQL3M0IpAIVSamqqXF1d5ePjkyfr8/f318qVK3XlyhXL2NWrV7VixQqVK1fOam6JEiXk7u6e420sWbJEp0+f1vfff69HHnlEnTp10v79+y3Lv/rqKz3++ONat26dYmNj1bx5cz355JPat29f7ncMAADcd2zZJ5np+eef1+nTp3X48GGNGTNG48aN04IFCyzLY2Nj5ePjow8++EAHDx7Uq6++qjFjxmjevHk2qxm43xBKAbC5Zs2aaeDAgRo4cKA8PT1VqlQp/fe//7WcybKzs9Nnn31m9RkvLy9FRUVJko4fPy47Ozt99NFHatq0qVxcXLR8+XKry9KjoqI0ceJE/fDDD5azZlFRUerdu7eeeOIJq3WnpqbKx8dHixcvtozVrVtX/v7+WrNmjWVszZo1KleunB566KEs+3P91/fKly+v119/Xb1795a7u7vKlSunhQsXZjkOXl5e8vPzU+XKlTV58mSlpaVp69atluVz5szR6NGjVb9+fVWqVEmvv/66KlWqpC+++OKOjzUAAChYCluflJGRoalTpyowMFCurq568MEH9fHHH1uWp6enq0+fPpblVapUyXL1Us+ePdWuXTu9+eabKl26tEqWLKmIiAilpqZazStatKj8/PwUEBCgXr16qXbt2oqJibEs7927t+bOnaumTZuqQoUK6tatm3r16mW1HwDyF6EUgHvC0qVL5eDgoO+++05z587VrFmztGjRohyt4+WXX9aQIUP0008/KTQ01GpZ586dNWLECNWoUUOnT5/W6dOn1blzZ/Xt21cbNmzQ6dOnLXPXrl2ry5cvq3Pnzlbr6N27t5YsWWJ5/95776lXr153VNvMmTNVr1497du3Ty+++KJeeOEFHT58ONu5aWlplkbPycnppuvMyMjQhQsXVKJEiTuqAQAAFEyFqU+aOnWqli1bpgULFujgwYMaNmyYunXrpu3bt0v6t78pW7asVq9erUOHDmncuHF65ZVXtGrVKqv1bN26VUeOHNHWrVu1dOlSRUVFWYK4GxmGoa+//lo///zzLXsrSUpMTKS3AkzkYOsCAED697Lv2bNny87OTlWqVNH+/fs1e/ZsPf/883e8jqFDh6pDhw7ZLnN1dZWbm5scHBzk5+dnGX/44YdVpUoVvf/++xo9erSkf79G95///Edubm5W6+jWrZvGjBmj33//XZK0c+dOrVy5Utu2bbttbW3bttWLL74oSXrppZc0e/Zsbd26VVWqVLHMefbZZ2Vvb68rV64oIyND5cuX1zPPPHPTdb755pu6ePHiLecAAICCr7D0ScnJyXr99df15ZdfKiQkRJJUoUIF7dixQ++8846aNm0qR0dHTZw40fKZwMBA7d69W6tWrbLqeYoXL6558+bJ3t5eVatWVVhYmDZv3mx1TN5++20tWrRIKSkpSk1NlYuLiwYPHnzTY7Rr1y599NFHio6Ovt3hBJBHuFIKwD2hUaNGsrOzs7wPCQnRr7/+qvT09DteR7169XK17b59+1rO7CUkJGj9+vXq3bt3lnne3t4KCwtTVFSUlixZorCwMJUqVeqOtlG7dm3Ln+3s7OTn56czZ85YzZk9e7bi4uK0fv16Va9eXYsWLbrpmboVK1Zo4sSJWrVqVZ7dDwIAANybCkuf9Ntvv+ny5ct6/PHH5ebmZnktW7ZMR44cscyLjIxUcHCwvL295ebmpoULF+rEiRNW66pRo4bs7e0t70uXLp2lt+ratavi4uK0c+dOtWnTRq+++qoefvjhbPfzwIEDevrppzV+/Hi1atUqZwcJQK5xpRSAe56dnZ3Vk1IkZblngCQVK1YsV+vv0aOHXn75Ze3evVu7du1SYGCgHn300Wzn9u7dWwMHDpT0b8N0pxwdHa3e29nZKSMjw2rMz89PQUFBCgoK0pIlS9S2bVsdOnQoS+i0cuVK9e3bV6tXr1bLli3vuAYAAFD4FKQ+6eLFi5Kk6OhoPfDAA1bLnJ2dJf3b54wcOVIzZ85USEiI3N3dNWPGDH377bdW8++kt/L09FRQUJAkadWqVQoKClKjRo2y9E+HDh3SY489pn79+mns2LG3PB4A8hahFIB7wo2NxjfffKNKlSrJ3t5e3t7eVvcy+PXXX3X58uUcb8PJySnbM4olS5ZUu3bttGTJEu3evfuW94lq3bq1UlJSZGdnl+V+DHmpQYMGCg4O1muvvWZ1c88PP/xQvXv31sqVKxUWFpZv2wcAAPeOwtInVa9eXc7Ozjpx4oSaNm2a7Tp27typhx9+2HLbA0lWV1Hllpubm4YMGaKRI0dq3759livPDh48qBYtWig8PFyvvfbaXW8HQM4QSgG4J5w4cULDhw9X//79tXfvXr311luaOXOmJKlFixaaN2+eQkJClJ6erpdeeinL2bE7Ub58eR07dkxxcXEqW7as3N3dLWfl+vbtqyeeeELp6ekKDw+/6Trs7e31008/Wf6cn4YOHar27dtr9OjReuCBB7RixQqFh4dr7ty5atiwoeLj4yX9ex8IT0/PfK0FAADYTmHpk9zd3TVy5EgNGzZMGRkZaty4sRITE7Vz5055eHgoPDxclSpV0rJly7Rx40YFBgbq/fff1549exQYGJjjfbpR//79NXnyZH3yySfq1KmTDhw4oBYtWig0NFTDhw+39FaZYR+A/Mc9pQDcE3r06KErV66oQYMGioiI0JAhQ9SvXz9J/z65zt/fX48++qiee+45jRw5UkWLFs3xNjp27KjWrVurefPm8vb21ocffmhZ1rJlS5UuXVqhoaEqU6bMLdfj4eEhDw+PHG8/p1q3bq3AwEDLWbuFCxcqLS1NERERKl26tOU1ZMiQfK8FAADYTmHqkyZPnqz//ve/mjp1qqpVq6bWrVsrOjraEjr1799fHTp0UOfOndWwYUP9/fffVldN3Y0SJUqoR48emjBhgjIyMvTxxx/r7Nmz+uCDD6x6q/r16+fJ9gDcnp1x4xeQAcBkzZo1U506dTRnzhyb1XDx4kU98MADWrJkyU2fTAMAAGA2+iQAhRlf3wNwX8vIyNBff/2lmTNnysvLS0899ZStSwIAALgn0CcByG+EUgDuaydOnFBgYKDKli2rqKgoOTjwn0UAAACJPglA/uPrewAAAAAAADAdNzoHAAAAAACA6QilAAAAAAAAYDpCKQAAAAAAAJiOUAoAAAAAAACmI5QCAAAAAACA6QilAAAAAAAAYDpCKQAAAAAAAJiOUAoAAAAAAACmI5QCAAAAAACA6QilAAAAAAAAYDpCKQAAAAAAAJiOUAoAAAAAAACmI5QCAAAAAACA6QilANxS+fLl1bNnT1uXUejNmDFDFSpUkL29verUqWPrcgAAKLTobcxRUHqbgvL7MGHCBNnZ2dm6DCDPEUoB95GoqCjZ2dnp+++/z3Z5s2bNVLNmzbvezrp16zRhwoS7Xs/9YtOmTRo9erQeeeQRLVmyRK+//vpN565YsUJz5swxpa7M5ifzVbRoUVWvXl1jx45VUlKSZV7m71Xmy8XFRZUrV9bAgQOVkJBgSq0AgPsTvc29KSe9zb3o+PHj6tWrlypWrCgXFxf5+fmpSZMmGj9+fK7Wd6vfn4sXL2r8+PGqWbOmihUrppIlS6pOnToaMmSITp06dRd7ARQMDrYuAMC97fDhwypSJGf59bp16xQZGUnzdoe2bNmiIkWKaPHixXJycrrl3BUrVujAgQMaOnSoOcVJmj9/vtzc3HTx4kVt2rRJr732mrZs2aKdO3danbGbNGmSAgMDdfXqVe3YsUPz58/XunXrdODAARUtWtS0egEAuBV6m/yXk97mXvPbb7+pfv36cnV1Ve/evVW+fHmdPn1ae/fu1RtvvKGJEyfmeJ03+/1JTU1VkyZN9PPPPys8PFyDBg3SxYsXdfDgQa1YsULt27dXmTJlJEljx47Vyy+/nBe7CNxTCKUA3JKzs7OtS8ixS5cuqVixYrYu446dOXNGrq6u92zT1qlTJ5UqVUqSNGDAAHXs2FFr1qzRN998o5CQEMu8Nm3aqF69epKkvn37qmTJkpo1a5Y+//xzPfvsszapHQCAG9Hb5D8zepv8OiazZ8/WxYsXFRcXp4CAAKtlZ86cydNtffbZZ9q3b5+WL1+u5557zmrZ1atXlZKSYnnv4OAgBwf+9x2FD1/fA3BLN37PPjU1VRMnTlSlSpXk4uKikiVLqnHjxoqJiZEk9ezZU5GRkZJk9ZWuTJcuXdKIESPk7+8vZ2dnValSRW+++aYMw7Da7pUrVzR48GCVKlVK7u7ueuqpp/Tnn3/Kzs7O6ixT5lfMDh06pOeee07FixdX48aNJUk//vijevbsqQoVKlguve7du7f+/vtvq21lruOXX35Rt27d5OnpKW9vb/33v/+VYRg6efKknn76aXl4eMjPz08zZ868o2OXlpamyZMnq2LFinJ2dlb58uX1yiuvKDk52TLHzs5OS5Ys0aVLlyzHKioqKtv1NWvWTNHR0fr9998tc8uXL29ZfubMGfXp00e+vr5ycXHRgw8+qKVLl1qt4/jx47Kzs9Obb76p2bNnKyAgQK6urmratKkOHDhwR/vVokULSdKxY8fyZB4AAGait7l3eptM3377rdq2bavixYurWLFiql27tubOnWtZ3rNnT7m5uenIkSNq27at3N3d1bVrV0lSRkaG5syZoxo1asjFxUW+vr7q37+/zp07Z7UNwzA0ZcoUlS1bVkWLFlXz5s118ODBLLUcOXJEZcuWzRJISZKPj0+WsfXr1+vRRx9VsWLF5O7urrCwMKv13ur358iRI5KkRx55JMt6XVxc5OHhYXl/4z2levbsabW+61/X/z4lJydr/PjxCgoKkrOzs/z9/TV69GirnxlgS0StwH0oMTFRf/31V5bx1NTU2352woQJmjp1qvr27asGDRooKSlJ33//vfbu3avHH39c/fv316lTpxQTE6P333/f6rOGYeipp57S1q1b1adPH9WpU0cbN27UqFGj9Oeff2r27NmWuT179tSqVavUvXt3NWrUSNu3b1dYWNhN6/rPf/6jSpUq6fXXX7c0gTExMTp69Kh69eolPz8/HTx4UAsXLtTBgwf1zTffZLlZZOfOnVWtWjVNmzZN0dHRmjJlikqUKKF33nlHLVq00BtvvKHly5dr5MiRql+/vpo0aXLLY9W3b18tXbpUnTp10ogRI/Ttt99q6tSp+umnn/Tpp59Kkt5//30tXLhQ3333nRYtWiRJevjhh7Nd36uvvqrExET98ccflmPl5uYm6d9Gt1mzZvrtt980cOBABQYGavXq1erZs6fOnz+vIUOGWK1r2bJlunDhgiIiInT16lXNnTtXLVq00P79++Xr63vL/cpsoEqWLJkn8wAAuFv0NgWzt8ncpyeeeEKlS5fWkCFD5Ofnp59++klr16616l/S0tIUGhqqxo0b680337TcGqB///6KiopSr169NHjwYB07dkzz5s3Tvn37tHPnTjk6OkqSxo0bpylTpqht27Zq27at9u7dq1atWlldjSRJAQEB+vLLL7VlyxbLCbabef/99xUeHq7Q0FC98cYbunz5subPn6/GjRtr3759Kl++/C1/fzKDr2XLlmns2LE5upF5//791bJlS6uxDRs2aPny5ZbwLCMjQ0899ZR27Nihfv36qVq1atq/f79mz56tX375RZ999tkdbw/INwaA+8aSJUsMSbd81ahRw+ozAQEBRnh4uOX9gw8+aISFhd1yOxEREUZ2/3n57LPPDEnGlClTrMY7depk2NnZGb/99pthGIYRGxtrSDKGDh1qNa9nz56GJGP8+PGWsfHjxxuSjGeffTbL9i5fvpxl7MMPPzQkGV999VWWdfTr188ylpaWZpQtW9aws7Mzpk2bZhk/d+6c4erqanVMshMXF2dIMvr27Ws1PnLkSEOSsWXLFstYeHi4UaxYsVuuL1NYWJgREBCQZXzOnDmGJOODDz6wjKWkpBghISGGm5ubkZSUZBiGYRw7dsyQZLi6uhp//PGHZe63335rSDKGDRtmGcs8LocPHzbOnj1rHDt2zHjnnXcMZ2dnw9fX17h06ZJhGNd+r7788kvj7NmzxsmTJ42VK1caJUuWzLIdAADyEr1Nwe5t0tLSjMDAQCMgIMA4d+6c1bKMjAyr9UkyXn75Zas5X3/9tSHJWL58udX4hg0brMbPnDljODk5GWFhYVbrfeWVVwxJVvt+4MABw9XV1ZBk1KlTxxgyZIjx2WefWfqeTBcuXDC8vLyM559/3mo8Pj7e8PT0tBq/2e/P5cuXjSpVqhiSjICAAKNnz57G4sWLjYSEhCxzM3+mN/Prr78anp6exuOPP26kpaUZhmEY77//vlGkSBHj66+/tpq7YMECQ5Kxc+fOm64PMAtf3wPuQ5GRkYqJicnyql279m0/6+XlpYMHD+rXX3/N8XbXrVsne3t7DR482Gp8xIgRMgxD69evl/TvWR5JevHFF63mDRo06KbrHjBgQJYxV1dXy5+vXr2qv/76S40aNZIk7d27N8v8vn37Wv5sb2+vevXqyTAM9enTxzLu5eWlKlWq6OjRozetRfp3XyVp+PDhVuMjRoyQJEVHR9/y8zm1bt06+fn5Wd27ydHRUYMHD9bFixe1fft2q/nt2rXTAw88YHnfoEEDNWzY0FL39apUqSJvb28FBgaqf//+CgoKUnR0dJabl7ds2VLe3t7y9/dXly5d5Obmpk8//dRqOwAA5Ad6m4LZ2+zbt0/Hjh3T0KFD5eXlZbUsu6uGXnjhBav3q1evlqenpx5//HH99ddflldwcLDc3Ny0detWSdKXX36plJQUDRo0yGq92T04pkaNGoqLi1O3bt10/PhxzZ07V+3atZOvr6/effddy7yYmBidP39ezz77rNW27e3t1bBhQ8u2b8XV1VXffvutRo0aJenfp0n26dNHpUuX1qBBg+74K3aXLl1S+/btVbx4cX344Yeyt7e3HJ9q1aqpatWqVjVmXgF2JzUC+Y2v7wH3oQYNGlhuSH294sWLZ3vp+/UmTZqkp59+WpUrV1bNmjXVunVrde/e/Y6avt9//11lypSRu7u71Xi1atUsyzP/WaRIEQUGBlrNCwoKuum6b5wrSf/8848mTpyolStXZrkxZWJiYpb55cqVs3rv6ekpFxcXy02+rx+/8d4NN8rchxtr9vPzk5eXl2Vf88rvv/+uSpUqZXma0I3HNlOlSpWyrKNy5cpatWpVlvFPPvlEHh4ecnR0VNmyZVWxYsVsa4iMjFTlypXl4OAgX19fValSJcdPNwIAIDfobQpmb5P5Vf+aNWvedq6Dg4PKli1rNfbrr78qMTEx23s9SdduTJ5Z2439j7e3t4oXL57lc5UrV9b777+v9PR0HTp0SGvXrtX06dPVr18/BQYGqmXLlpYQ82Zf8bv+flC34unpqenTp2v69On6/ffftXnzZr355puaN2+ePD09NWXKlNuu4/nnn9eRI0e0a9cuq9sm/Prrr/rpp5/k7e2d7efy+sbtQG4QSgHIkSZNmujIkSP6/PPPtWnTJi1atEizZ8/WggULrM7Gme36M4eZnnnmGe3atUujRo1SnTp15ObmpoyMDLVu3VoZGRlZ5meeVbrdmKQsNy+9mZzcG+Be1aRJkyzNa3Zu9j8EAADcy+ht/nWv9zbOzs5ZTnZlZGTIx8dHy5cvz/YzNwtj7pS9vb1q1aqlWrVqKSQkRM2bN9fy5cvVsmVLy/F+//335efnl+WzuXlSXkBAgHr37q327durQoUKWr58+W1Dqblz5+rDDz/UBx98oDp16lgty8jIUK1atTRr1qxsP+vv75/jGoG8RigFIMdKlCihXr16qVevXrp48aKaNGmiCRMmWBq3mzUrmTeOvHDhgtUZxZ9//tmyPPOfGRkZOnbsmNUZrd9+++2Oazx37pw2b96siRMnaty4cZbx3FyanxuZ+/Drr79azpZKUkJCgs6fP5/tE13uxK2O7Y8//qiMjAyrhu3GY5spu+Pwyy+/WD3NDwCA+wW9ze3lR2+TefX1gQMHsty0+04//+WXX+qRRx7JNsS7vnbp32NVoUIFy/jZs2ezPKXvZjJPvJ0+fdqqdh8fn9vWntMgr3jx4qpYseJtn4z89ddfa+TIkRo6dKjlaYTXq1ixon744Qc99thjheJEKQonvlcBIEduvLTbzc1NQUFBVt95L1asmCTp/PnzVnPbtm2r9PR0zZs3z2p89uzZsrOzU5s2bSRJoaGhkqS3337bat5bb711x3VmngW88azfnDlz7ngdd6Nt27bZbi/zTNWtnrZzK8WKFcv28vy2bdsqPj5eH330kWUsLS1Nb731ltzc3NS0aVOr+Z999pn+/PNPy/vvvvtO3377reVnAADA/YLe5s7kR29Tt25dBQYGas6cOVmO7Z1cufXMM88oPT1dkydPzrIsLS3Nss6WLVvK0dFRb731ltV6szt2X3/9dbZPbcy8p1aVKlUk/fsz9fDw0Ouvv57t/LNnz1r+fLPfnx9++CHbr5f+/vvvOnTokGVb2Tl9+rSeeeYZNW7cWDNmzMh2zjPPPKM///zT6l5Yma5cuaJLly7ddP2AWbhSCkCOVK9eXc2aNVNwcLBKlCih77//Xh9//LEGDhxomRMcHCxJGjx4sEJDQ2Vvb68uXbroySefVPPmzfXqq6/q+PHjevDBB7Vp0yZ9/vnnGjp0qOWMU3BwsDp27Kg5c+bo77//tjw2+ZdffpF0Z2ebPDw81KRJE02fPl2pqal64IEHtGnTJh07diwfjkpWDz74oMLDw7Vw4UKdP39eTZs21XfffaelS5eqXbt2at68ea7WGxwcrI8++kjDhw9X/fr15ebmpieffFL9+vXTO++8o549eyo2Nlbly5fXxx9/rJ07d2rOnDlZ7nURFBSkxo0b64UXXlBycrLmzJmjkiVLavTo0Xmx+wAAFBj0NncmP3qbIkWKaP78+XryySdVp04d9erVS6VLl9bPP/+sgwcPauPGjbf8fNOmTdW/f39NnTpVcXFxatWqlRwdHfXrr79q9erVmjt3rjp16iRvb2+NHDlSU6dO1RNPPKG2bdtq3759Wr9+fZZbFLzxxhuKjY1Vhw4dLPcV27t3r5YtW6YSJUpYbo7u4eGh+fPnq3v37qpbt666dOkib29vnThxQtHR0XrkkUcsYeXNfn9iYmI0fvx4PfXUU2rUqJHc3Nx09OhRvffee0pOTtaECRNuuu+DBw/W2bNnNXr0aK1cudJqWe3atVW7dm11795dq1at0oABA7R161Y98sgjSk9P188//6xVq1Zp48aN3HoBtmerx/4BMF/mY5P37NmT7fKmTZve9rHJU6ZMMRo0aGB4eXkZrq6uRtWqVY3XXnvNSElJscxJS0szBg0aZHh7ext2dnZWj6+9cOGCMWzYMKNMmTKGo6OjUalSJWPGjBlWj+c1DMO4dOmSERERYZQoUcJwc3Mz2rVrZxw+fNiQZPUY48zH4549ezbL/vzxxx9G+/btjf/X3r2HVVXm//9/AcoGD4CHOI1IpKZ4ylMpaWbJiMo0lc5MJqkZ6VhYKpOWn8xMK83yLGmZiU06pt+pxtRUwtQx8URinsJKG5wUnBmFraaAcP/+6MfSnVqKsLbg83Fd+7rc637vtd/3Hcbdq7XXDggIMP7+/uaPf/yjOXLkyGW/evnn57jc1xlfap0upbCw0Lz00ksmIiLCVK1a1YSFhZnRo0ebs2fPXtH7XMqpU6dM3759TUBAgPX1wSVycnLMwIEDTd26dY23t7dp0aKFWbBggcvrDx06ZCSZ119/3UyZMsWEhYUZh8Nh7rrrLrNr1y6X2l9a2wv92s8VAADlhb1Nxd/bGGPMpk2bzG9/+1tTs2ZNU716ddOyZUsza9asKz7f22+/bdq2bWt8fX1NzZo1TYsWLcyoUaPMkSNHrJqioiLz0ksvmZCQEOPr62u6dOli9uzZc9HPwxdffGESEhJM8+bNjb+/v6lataqpX7++efTRR81333130Xt//vnnJiYmxvj7+xsfHx/ToEED8+ijj5odO3ZYNZf7+Tl48KAZO3as6dChgwkMDDRVqlQxN910k4mNjTXr1q1zeZ+Sf6Yl7r77biPpko8LfxYKCgrMa6+9Zpo1a2YcDoepVauWadu2rXnppZdMXl7er//DAcqZhzFXeEc7AHCzjIwMtW7dWu+///4lPzePX/f9998rIiJCr7/+up555hl3twMAwA2NvQ2AGx33lAJwXTpz5sxFx6ZPny5PT0917tzZDR0BAACUHnsbALgY95QCcF2aPHmy0tPTdc8996hKlSr69NNP9emnn2rw4MF8fS0AAKhw2NsAwMUIpQBcl+68806lpKRowoQJOnXqlOrXr69x48bp+eefd3drAAAAV429DQBcjHtKAQAAAAAAwHbcUwoAAAAAAAC2I5QCAAAAAACA7bin1BUoLi7WkSNHVLNmTXl4eLi7HQAAYCNjjE6ePKnQ0FB5evL/864GeygAAG5MV7p/IpS6AkeOHOEbMQAAuMEdPnxY9erVc3cbFQp7KAAAbmy/tn8ilLoCNWvWlPTTYvr5+bm5GwAAYCen06mwsDBrP4Arxx4KAIAb05XunwilrkDJ5eZ+fn5sqAAAuEHx8bOrxx4KAIAb26/tn7gxAgAAAAAAAGxHKAUAAAAAAADbEUoBAAAAAADAdoRSAAAAAAAAsB2hFAAAAAAAAGxHKAUAAAAAAADbEUoBAAAAAADAdoRSAAAAAAAAsB2hFAAAAAAAAGxHKAUAAAAAAADbEUoBAAAAAADAdoRSAAAAAAAAsB2hFAAAAAAAAGxHKAUAAAAAAADbEUoBAAAAAADAdoRSAAAAAAAAsB2hFAAAAAAAAGxHKAUAAAAAAADbEUoBAAAAAADAdoRSAAAAAAAAsB2hFAAAAAAAAGxXxd0N/PDDD3r22Wf16aef6scff1TDhg21YMECtWvXTpJkjNGLL76oefPmKTc3Vx07dtScOXPUqFEj6xzHjx/XU089pU8++USenp7q3bu3ZsyYoRo1alg1X331lRISErR9+3bddNNNeuqppzRq1Cjb5wsAAC4tPnl7qV87/9Hby7ATVBTX8jMj8XMDAIC7ufVKqRMnTqhjx46qWrWqPv30U+3bt09TpkxRrVq1rJrJkydr5syZmjt3rrZu3arq1asrJiZGZ8+etWri4uK0d+9epaSkaMWKFdq4caMGDx5sjTudTnXr1k3h4eFKT0/X66+/rnHjxuntt9+2db4AAAAAAAD4iVuvlHrttdcUFhamBQsWWMciIiKsPxtjNH36dI0ZM0b333+/JOm9995TUFCQPv74Y/Xp00f79+/X6tWrtX37duvqqlmzZqlnz5564403FBoaqkWLFqmgoEDvvvuuvL291axZM2VkZGjq1Kku4RUAAAAAAADs4dYrpZYvX6527drpj3/8owIDA9W6dWvNmzfPGj906JCys7MVHR1tHfP391f79u2VlpYmSUpLS1NAQIAVSElSdHS0PD09tXXrVqumc+fO8vb2tmpiYmKUmZmpEydOXNRXfn6+nE6nywMAAAAAAABlx62h1MGDB637Q61Zs0ZPPPGEnn76aS1cuFCSlJ2dLUkKCgpyeV1QUJA1lp2drcDAQJfxKlWqqHbt2i41lzrHhe9xoYkTJ8rf3996hIWFlcFsAQAAAAAAUMKtoVRxcbHatGmjV199Va1bt9bgwYM1aNAgzZ07151tafTo0crLy7Mehw8fdms/AAAAAAAAlY1bQ6mQkBA1bdrU5VhkZKSysrIkScHBwZKknJwcl5qcnBxrLDg4WMeOHXMZP3funI4fP+5Sc6lzXPgeF3I4HPLz83N5AAAAAAAAoOy4NZTq2LGjMjMzXY4dOHBA4eHhkn666XlwcLBSU1OtcafTqa1btyoqKkqSFBUVpdzcXKWnp1s169atU3Fxsdq3b2/VbNy4UYWFhVZNSkqKGjdu7PJNfwAAAAAAALCHW0OpESNGaMuWLXr11Vf17bffavHixXr77beVkJAgSfLw8NDw4cP18ssva/ny5dq9e7f69++v0NBQPfDAA5J+urKqe/fuGjRokLZt26YvvvhCQ4cOVZ8+fRQaGipJ6tu3r7y9vRUfH6+9e/fqgw8+0IwZM5SYmOiuqQMAAAAAANzQqrjzzW+//XZ99NFHGj16tMaPH6+IiAhNnz5dcXFxVs2oUaN0+vRpDR48WLm5uerUqZNWr14tHx8fq2bRokUaOnSounbtKk9PT/Xu3VszZ860xv39/bV27VolJCSobdu2qlu3rsaOHavBgwfbOl8AAAAAAAD8xMMYY9zdxPXO6XTK399feXl53F8KAIByEp+8vdSvnf/o7WXYiSv2AaVX3mt3LT8zUvn+3AAAcCO70j2AWz++BwAAAAAAgBsToRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAEAFUlRUpBdeeEERERHy9fVVgwYNNGHCBBljrBpjjMaOHauQkBD5+voqOjpa33zzjct5jh8/rri4OPn5+SkgIEDx8fE6deqUS81XX32lu+66Sz4+PgoLC9PkyZNtmSMAALgxEEoBAABUIK+99prmzJmj2bNna//+/Xrttdc0efJkzZo1y6qZPHmyZs6cqblz52rr1q2qXr26YmJidPbsWasmLi5Oe/fuVUpKilasWKGNGzdq8ODB1rjT6VS3bt0UHh6u9PR0vf766xo3bpzefvttW+cLAAAqryrubgAAAABXbvPmzbr//vsVGxsrSbr55pv1t7/9Tdu2bZP001VS06dP15gxY3T//fdLkt577z0FBQXp448/Vp8+fbR//36tXr1a27dvV7t27SRJs2bNUs+ePfXGG28oNDRUixYtUkFBgd599115e3urWbNmysjI0NSpU13CKwAAgNLiSikAAIAK5M4771RqaqoOHDggSdq1a5c2bdqkHj16SJIOHTqk7OxsRUdHW6/x9/dX+/btlZaWJklKS0tTQECAFUhJUnR0tDw9PbV161arpnPnzvL29rZqYmJilJmZqRMnTlyyt/z8fDmdTpcHAADA5XClFAAAQAXy3HPPyel0qkmTJvLy8lJRUZFeeeUVxcXFSZKys7MlSUFBQS6vCwoKssays7MVGBjoMl6lShXVrl3bpSYiIuKic5SM1apV66LeJk6cqJdeeqkMZgkAAG4EXCkFAABQgSxdulSLFi3S4sWL9eWXX2rhwoV64403tHDhQne3ptGjRysvL896HD582N0tAQCA6xhXSgEAAFQgI0eO1HPPPac+ffpIklq0aKF//etfmjhxogYMGKDg4GBJUk5OjkJCQqzX5eTkqFWrVpKk4OBgHTt2zOW8586d0/Hjx63XBwcHKycnx6Wm5HlJzc85HA45HI5rnyQAALghcKUUAABABfLjjz/K09N1C+fl5aXi4mJJUkREhIKDg5WammqNO51Obd26VVFRUZKkqKgo5ebmKj093apZt26diouL1b59e6tm48aNKiwstGpSUlLUuHHjS350DwAA4GoRSgEAAFQg9913n1555RWtXLlS33//vT766CNNnTpVDz74oCTJw8NDw4cP18svv6zly5dr9+7d6t+/v0JDQ/XAAw9IkiIjI9W9e3cNGjRI27Zt0xdffKGhQ4eqT58+Cg0NlST17dtX3t7eio+P1969e/XBBx9oxowZSkxMdNfUAQBAJcPH9wAAACqQWbNm6YUXXtCTTz6pY8eOKTQ0VH/+8581duxYq2bUqFE6ffq0Bg8erNzcXHXq1EmrV6+Wj4+PVbNo0SINHTpUXbt2laenp3r37q2ZM2da4/7+/lq7dq0SEhLUtm1b1a1bV2PHjtXgwYNtnS8AAKi8PIwxxt1NXO+cTqf8/f2Vl5cnPz8/d7cDAEClFJ+8vdSvnf/o7WXYiSv2AaVX3mt3LT8zUvn+3AAAcCO70j0AH98DAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDu3hlLjxo2Th4eHy6NJkybW+NmzZ5WQkKA6deqoRo0a6t27t3JyclzOkZWVpdjYWFWrVk2BgYEaOXKkzp0751Kzfv16tWnTRg6HQw0bNlRycrId0wMAAAAAAMBluP1KqWbNmuno0aPWY9OmTdbYiBEj9Mknn2jZsmXasGGDjhw5ol69elnjRUVFio2NVUFBgTZv3qyFCxcqOTlZY8eOtWoOHTqk2NhY3XPPPcrIyNDw4cP1+OOPa82aNbbOEwAAAAAAAOdVcXsDVaooODj4ouN5eXmaP3++Fi9erHvvvVeStGDBAkVGRmrLli3q0KGD1q5dq3379umzzz5TUFCQWrVqpQkTJujZZ5/VuHHj5O3trblz5yoiIkJTpkyRJEVGRmrTpk2aNm2aYmJibJ0rAAAAAAAAfuL2K6W++eYbhYaG6pZbblFcXJyysrIkSenp6SosLFR0dLRV26RJE9WvX19paWmSpLS0NLVo0UJBQUFWTUxMjJxOp/bu3WvVXHiOkpqSc1xKfn6+nE6nywMAAAAAAABlx62hVPv27ZWcnKzVq1drzpw5OnTokO666y6dPHlS2dnZ8vb2VkBAgMtrgoKClJ2dLUnKzs52CaRKxkvGfqnG6XTqzJkzl+xr4sSJ8vf3tx5hYWFlMV0AAAAAAAD8/9z68b0ePXpYf27ZsqXat2+v8PBwLV26VL6+vm7ra/To0UpMTLSeO51OgikAAAAAAIAy5PaP710oICBAt956q7799lsFBweroKBAubm5LjU5OTnWPaiCg4Mv+ja+kue/VuPn53fZ4MvhcMjPz8/lAQAAAAAAgLJzXYVSp06d0nfffaeQkBC1bdtWVatWVWpqqjWemZmprKwsRUVFSZKioqK0e/duHTt2zKpJSUmRn5+fmjZtatVceI6SmpJzAAAAAAAAwH5uDaWeeeYZbdiwQd9//702b96sBx98UF5eXnr44Yfl7++v+Ph4JSYm6vPPP1d6eroGDhyoqKgodejQQZLUrVs3NW3aVP369dOuXbu0Zs0ajRkzRgkJCXI4HJKkIUOG6ODBgxo1apS+/vprvfnmm1q6dKlGjBjhzqkDAAAAAADc0Nx6T6l///vfevjhh/W///1PN910kzp16qQtW7bopptukiRNmzZNnp6e6t27t/Lz8xUTE6M333zTer2Xl5dWrFihJ554QlFRUapevboGDBig8ePHWzURERFauXKlRowYoRkzZqhevXp65513FBMTY/t8AQAAAAAA8BO3hlJLliz5xXEfHx8lJSUpKSnpsjXh4eFatWrVL56nS5cu2rlzZ6l6BAAAAAAAQNm7ru4pBQAAAAAAgBsDoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUgAAAAAAALAdoRQAAAAAAABsd92EUpMmTZKHh4eGDx9uHTt79qwSEhJUp04d1ahRQ71791ZOTo7L67KyshQbG6tq1aopMDBQI0eO1Llz51xq1q9frzZt2sjhcKhhw4ZKTk62YUYAAAAAAAC4nOsilNq+fbveeusttWzZ0uX4iBEj9Mknn2jZsmXasGGDjhw5ol69elnjRUVFio2NVUFBgTZv3qyFCxcqOTlZY8eOtWoOHTqk2NhY3XPPPcrIyNDw4cP1+OOPa82aNbbNDwAAAAAAAK7cHkqdOnVKcXFxmjdvnmrVqmUdz8vL0/z58zV16lTde++9atu2rRYsWKDNmzdry5YtkqS1a9dq3759ev/999WqVSv16NFDEyZMUFJSkgoKCiRJc+fOVUREhKZMmaLIyEgNHTpUf/jDHzRt2rTL9pSfny+n0+nyAAAAAAAAQNlxeyiVkJCg2NhYRUdHuxxPT09XYWGhy/EmTZqofv36SktLkySlpaWpRYsWCgoKsmpiYmLkdDq1d+9eq+bn546JibHOcSkTJ06Uv7+/9QgLC7vmeQIAAAAAAOA8t4ZSS5Ys0ZdffqmJEydeNJadnS1vb28FBAS4HA8KClJ2drZVc2EgVTJeMvZLNU6nU2fOnLlkX6NHj1ZeXp71OHz4cKnmBwAAUB5++OEHPfLII6pTp458fX3VokUL7dixwxo3xmjs2LEKCQmRr6+voqOj9c0337ic4/jx44qLi5Ofn58CAgIUHx+vU6dOudR89dVXuuuuu+Tj46OwsDBNnjzZlvkBAIAbg9tCqcOHD2vYsGFatGiRfHx83NXGJTkcDvn5+bk8AAAArgcnTpxQx44dVbVqVX366afat2+fpkyZ4nIbhMmTJ2vmzJmaO3eutm7dqurVqysmJkZnz561auLi4rR3716lpKRoxYoV2rhxowYPHmyNO51OdevWTeHh4UpPT9frr7+ucePG6e2337Z1vgAAoPKq4q43Tk9P17Fjx9SmTRvrWFFRkTZu3KjZs2drzZo1KigoUG5ursvVUjk5OQoODpYkBQcHa9u2bS7nLfl2vgtrfv6NfTk5OfLz85Ovr295TA0AAKDcvPbaawoLC9OCBQusYxEREdafjTGaPn26xowZo/vvv1+S9N577ykoKEgff/yx+vTpo/3792v16tXavn272rVrJ0maNWuWevbsqTfeeEOhoaFatGiRCgoK9O6778rb21vNmjVTRkaGpk6d6hJeAQAAlJbbrpTq2rWrdu/erYyMDOvRrl07xcXFWX+uWrWqUlNTrddkZmYqKytLUVFRkqSoqCjt3r1bx44ds2pSUlLk5+enpk2bWjUXnqOkpuQcAAAAFcny5cvVrl07/fGPf1RgYKBat26tefPmWeOHDh1Sdna2yz01/f391b59e5f7cgYEBFiBlCRFR0fL09NTW7dutWo6d+4sb29vqyYmJkaZmZk6ceLEJXvjy2IAAMDVcFsoVbNmTTVv3tzlUb16ddWpU0fNmzeXv7+/4uPjlZiYqM8//1zp6ekaOHCgoqKi1KFDB0lSt27d1LRpU/Xr10+7du3SmjVrNGbMGCUkJMjhcEiShgwZooMHD2rUqFH6+uuv9eabb2rp0qUaMWKEu6YOAABQagcPHtScOXPUqFEjrVmzRk888YSefvppLVy4UNL5+2pe6p6aF95zMzAw0GW8SpUqql279lXdu/Pn+LIYAABwNdz+7Xu/ZNq0afrd736n3r17q3PnzgoODtaHH35ojXt5eWnFihXy8vJSVFSUHnnkEfXv31/jx4+3aiIiIrRy5UqlpKTotttu05QpU/TOO+8oJibGHVMCAAC4JsXFxWrTpo1effVVtW7dWoMHD9agQYM0d+5cd7fGl8UAAICr4rZ7Sl3K+vXrXZ77+PgoKSlJSUlJl31NeHi4Vq1a9Yvn7dKli3bu3FkWLQIAALhVSEiIdZuCEpGRkfr73/8u6fx9NXNychQSEmLV5OTkqFWrVlbNhbc/kKRz587p+PHjv3pfzgvf4+ccDod1tToAAMCvua6vlAIAAICrjh07KjMz0+XYgQMHFB4eLumnq8SDg4Nd7qnpdDq1detWl/ty5ubmKj093apZt26diouL1b59e6tm48aNKiwstGpSUlLUuHFjl2/6AwAAKC1CKQAAgApkxIgR2rJli1599VV9++23Wrx4sd5++20lJCRIkjw8PDR8+HC9/PLLWr58uXbv3q3+/fsrNDRUDzzwgKSfrqzq3r27Bg0apG3btumLL77Q0KFD1adPH4WGhkqS+vbtK29vb8XHx2vv3r364IMPNGPGDCUmJrpr6gAAoJK5rj6+BwAAgF92++2366OPPtLo0aM1fvx4RUREaPr06YqLi7NqRo0apdOnT2vw4MHKzc1Vp06dtHr1avn4+Fg1ixYt0tChQ9W1a1d5enqqd+/emjlzpjXu7++vtWvXKiEhQW3btlXdunU1duxYDR482Nb5AgCAysvDGGPc3cT1zul0yt/fX3l5efLz83N3OwAAVErxydtL/dr5j95ehp24Yh9QeuW9dtfyMyOV788NAAA3sivdA/DxPQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7UoVSh08eLCs+wAAAKj02EMBAACcV6pQqmHDhrrnnnv0/vvv6+zZs2XdEwAAQKXEHgoAAOC8UoVSX375pVq2bKnExEQFBwfrz3/+s7Zt21bWvQEAAFQq7KEAAADOK1Uo1apVK82YMUNHjhzRu+++q6NHj6pTp05q3ry5pk6dqv/85z9l3ScAAECFxx4KAADgvGu60XmVKlXUq1cvLVu2TK+99pq+/fZbPfPMMwoLC1P//v119OjRsuoTAACg0mAPBQAAcI2h1I4dO/Tkk08qJCREU6dO1TPPPKPvvvtOKSkpOnLkiO6///6y6hMAAKDSYA8FAAAgVSnNi6ZOnaoFCxYoMzNTPXv21HvvvaeePXvK0/OnjCsiIkLJycm6+eaby7JXAACACo09FAAAwHmlCqXmzJmjxx57TI8++qhCQkIuWRMYGKj58+dfU3MAAACVCXsoAACA80oVSn3zzTe/WuPt7a0BAwaU5vQAAACVEnsoAACA80p1T6kFCxZo2bJlFx1ftmyZFi5ceM1NAQAAVEbsoQAAAM4rVSg1ceJE1a1b96LjgYGBevXVV6+5KQAAgMqIPRQAAMB5pQqlsrKyFBERcdHx8PBwZWVlXXNTAAAAlRF7KAAAgPNKFUoFBgbqq6++uuj4rl27VKdOnWtuCgAAoDJiDwUAAHBeqUKphx9+WE8//bQ+//xzFRUVqaioSOvWrdOwYcPUp0+fsu4RAACgUmAPBQAAcF6pvn1vwoQJ+v7779W1a1dVqfLTKYqLi9W/f3/uhwAAAHAZ7KEAAADOK1Uo5e3trQ8++EATJkzQrl275OvrqxYtWig8PLys+wMAAKg02EMBAACcV6pQqsStt96qW2+9tax6AQAAuCGwhwIAAChlKFVUVKTk5GSlpqbq2LFjKi4udhlft25dmTQHAABQmbCHAgAAOK9UodSwYcOUnJys2NhYNW/eXB4eHmXdFwAAQKXDHgoAAOC8UoVSS5Ys0dKlS9WzZ8+y7gcAAKDSYg8FAABwnmdpXuTt7a2GDRuWdS8AAACVGnsoAACA80oVSv3lL3/RjBkzZIwp634AAAAqLfZQAAAA55Xq43ubNm3S559/rk8//VTNmjVT1apVXcY//PDDMmkOAACgMmEPBQAAcF6pQqmAgAA9+OCDZd0LAABApcYeCgAA4LxShVILFiwo6z4AAAAqPfZQAAAA55XqnlKSdO7cOX322Wd66623dPLkSUnSkSNHdOrUqTJrDgAAoLJhDwUAAPCTUl0p9a9//Uvdu3dXVlaW8vPz9dvf/lY1a9bUa6+9pvz8fM2dO7es+wQAAKjw2EMBAACcV6orpYYNG6Z27drpxIkT8vX1tY4/+OCDSk1NLbPmAAAAKhP2UAAAAOeV6kqpf/7zn9q8ebO8vb1djt9888364YcfyqQxAACAyoY9FAAAwHmlulKquLhYRUVFFx3/97//rZo1a15zUwAAAJUReygAAIDzShVKdevWTdOnT7eee3h46NSpU3rxxRfVs2fPsuoNAACgUmEPBQAAcF6pPr43ZcoUxcTEqGnTpjp79qz69u2rb775RnXr1tXf/va3su4RAACgUmAPBQAAcF6pQql69epp165dWrJkib766iudOnVK8fHxiouLc7lpJwAAAM5jDwUAAHBeqUIpSapSpYoeeeSRsuwFAACg0mMPBQAA8JNShVLvvffeL47379+/VM0AAABUZuyhAAAAzitVKDVs2DCX54WFhfrxxx/l7e2tatWqsaECAAC4BPZQAAAA55Xq2/dOnDjh8jh16pQyMzPVqVMnbtIJAABwGeyhAAAAzitVKHUpjRo10qRJky76P4AAAAC4PPZQAADgRlVmoZT00407jxw5UpanBAAAqPTYQwEAgBtRqe4ptXz5cpfnxhgdPXpUs2fPVseOHcukMQAAgMqGPRQAAMB5pQqlHnjgAZfnHh4euummm3TvvfdqypQpZdEXAABApcMeCgAA4LxShVLFxcVl3QcAAEClxx4KAADgvDK9p9TVmjNnjlq2bCk/Pz/5+fkpKipKn376qTV+9uxZJSQkqE6dOqpRo4Z69+6tnJwcl3NkZWUpNjZW1apVU2BgoEaOHKlz58651Kxfv15t2rSRw+FQw4YNlZycbMf0AAAAAAAAcBmlulIqMTHximunTp162bF69epp0qRJatSokYwxWrhwoe6//37t3LlTzZo104gRI7Ry5UotW7ZM/v7+Gjp0qHr16qUvvvhCklRUVKTY2FgFBwdr8+bNOnr0qPr376+qVavq1VdflSQdOnRIsbGxGjJkiBYtWqTU1FQ9/vjjCgkJUUxMTGmmDwAAUCpltYcCAACoDEoVSu3cuVM7d+5UYWGhGjduLEk6cOCAvLy81KZNG6vOw8PjF89z3333uTx/5ZVXNGfOHG3ZskX16tXT/PnztXjxYt17772SpAULFigyMlJbtmxRhw4dtHbtWu3bt0+fffaZgoKC1KpVK02YMEHPPvusxo0bJ29vb82dO1cRERHWfRoiIyO1adMmTZs2jVAKAADYqqz2UAAAAJVBqUKp++67TzVr1tTChQtVq1YtSdKJEyc0cOBA3XXXXfrLX/5y1ecsKirSsmXLdPr0aUVFRSk9PV2FhYWKjo62apo0aaL69esrLS1NHTp0UFpamlq0aKGgoCCrJiYmRk888YT27t2r1q1bKy0tzeUcJTXDhw+/bC/5+fnKz8+3njudzqueDwAAwM+Vxx4KAACgoirVPaWmTJmiiRMnWpspSapVq5Zefvnlq/7mmN27d6tGjRpyOBwaMmSIPvroIzVt2lTZ2dny9vZWQECAS31QUJCys7MlSdnZ2S6BVMl4ydgv1TidTp05c+aSPU2cOFH+/v7WIyws7KrmBAAAcClluYcCAACo6EoVSjmdTv3nP/+56Ph//vMfnTx58qrO1bhxY2VkZGjr1q164oknNGDAAO3bt680bZWZ0aNHKy8vz3ocPnzYrf0AAIDKoSz3UAAAABVdqT6+9+CDD2rgwIGaMmWK7rjjDknS1q1bNXLkSPXq1euqzuXt7a2GDRtKktq2bavt27drxowZeuihh1RQUKDc3FyXq6VycnIUHBwsSQoODta2bdtczlfy7XwX1vz8G/tycnLk5+cnX1/fS/bkcDjkcDiuah4AAAC/piz3UAAAABVdqa6Umjt3rnr06KG+ffsqPDxc4eHh6tu3r7p3764333zzmhoqLi5Wfn6+2rZtq6pVqyo1NdUay8zMVFZWlqKioiRJUVFR2r17t44dO2bVpKSkyM/PT02bNrVqLjxHSU3JOQAAAOxSnnsoAACAiqZUV0pVq1ZNb775pl5//XV99913kqQGDRqoevXqV3We0aNHq0ePHqpfv75OnjypxYsXa/369VqzZo38/f0VHx+vxMRE1a5dW35+fnrqqacUFRWlDh06SJK6deumpk2bql+/fpo8ebKys7M1ZswYJSQkWFc6DRkyRLNnz9aoUaP02GOPad26dVq6dKlWrlxZmqkDAACUWlntoQAAACqDUoVSJY4ePaqjR4+qc+fO8vX1lTHmqr7C+NixY+rfv7+OHj0qf39/tWzZUmvWrNFvf/tbSdK0adPk6emp3r17Kz8/XzExMS7/F9HLy0srVqzQE088oaioKFWvXl0DBgzQ+PHjrZqIiAitXLlSI0aM0IwZM1SvXj298847iomJuZapAwAAlNq17qEAAAAqg1KFUv/73//0pz/9SZ9//rk8PDz0zTff6JZbblF8fLxq1ap1xd8eM3/+/F8c9/HxUVJSkpKSki5bEx4erlWrVv3iebp06aKdO3deUU8AAADlpaz2UAAAAJVBqe4pNWLECFWtWlVZWVmqVq2adfyhhx7S6tWry6w5AACAyoQ9FAAAwHmlulJq7dq1WrNmjerVq+dyvFGjRvrXv/5VJo0BAABUNuyhAAAAzivVlVKnT592+b97JY4fP27dYBwAAACu2EMBAACcV6pQ6q677tJ7771nPffw8FBxcbEmT56se+65p8yaAwAAqEzYQwEAAJxXqo/vTZ48WV27dtWOHTtUUFCgUaNGae/evTp+/Li++OKLsu4RAACgUmAPBQAAcF6prpRq3ry5Dhw4oE6dOun+++/X6dOn1atXL+3cuVMNGjQo6x4BAAAqBfZQAAAA5131lVKFhYXq3r275s6dq+eff748egIAAKh02EMBAAC4uuorpapWraqvvvqqPHoBAACotNhDAQAAuCrVx/ceeeQRzZ8/v6x7AQAAqNTYQwEAAJxXqhudnzt3Tu+++64+++wztW3bVtWrV3cZnzp1apk0BwAAUJmwhwIAADjvqkKpgwcP6uabb9aePXvUpk0bSdKBAwdcajw8PMquOwAAgEqAPRQAAMDFriqUatSokY4eParPP/9ckvTQQw9p5syZCgoKKpfmAAAAKgP2UAAAABe7qntKGWNcnn/66ac6ffp0mTYEAABQ2bCHAgAAuFipbnRe4ucbLAAAAPw69lAAAABXGUp5eHhcdL8D7n8AAADwy9hDAQAAXOyq7illjNGjjz4qh8MhSTp79qyGDBly0TfHfPjhh2XXIQAAQAXHHgoAAOBiVxVKDRgwwOX5I488UqbNAAAAVEbsoQAAAC52VaHUggULyqsPAACASos9FAAAwMWu6UbnAAAAAAAAQGkQSgEAAFRgkyZNkoeHh4YPH24dO3v2rBISElSnTh3VqFFDvXv3Vk5OjsvrsrKyFBsbq2rVqikwMFAjR47UuXPnXGrWr1+vNm3ayOFwqGHDhkpOTrZhRgAA4EZBKAUAAFBBbd++XW+99ZZatmzpcnzEiBH65JNPtGzZMm3YsEFHjhxRr169rPGioiLFxsaqoKBAmzdv1sKFC5WcnKyxY8daNYcOHVJsbKzuueceZWRkaPjw4Xr88ce1Zs0a2+YHAAAqN0IpAACACujUqVOKi4vTvHnzVKtWLet4Xl6e5s+fr6lTp+ree+9V27ZttWDBAm3evFlbtmyRJK1du1b79u3T+++/r1atWqlHjx6aMGGCkpKSVFBQIEmaO3euIiIiNGXKFEVGRmro0KH6wx/+oGnTprllvgAAoPIhlAIAAKiAEhISFBsbq+joaJfj6enpKiwsdDnepEkT1a9fX2lpaZKktLQ0tWjRQkFBQVZNTEyMnE6n9u7da9X8/NwxMTHWOS4lPz9fTqfT5QEAAHA5V/XtewAAAHC/JUuW6Msvv9T27dsvGsvOzpa3t7cCAgJcjgcFBSk7O9uquTCQKhkvGfulGqfTqTNnzsjX1/ei9544caJeeumlUs8LAADcWLhSCgAAoAI5fPiwhg0bpkWLFsnHx8fd7bgYPXq08vLyrMfhw4fd3RIAALiOEUoBAABUIOnp6Tp27JjatGmjKlWqqEqVKtqwYYNmzpypKlWqKCgoSAUFBcrNzXV5XU5OjoKDgyVJwcHBF30bX8nzX6vx8/O75FVSkuRwOOTn5+fyAAAAuBxCKQAAgAqka9eu2r17tzIyMqxHu3btFBcXZ/25atWqSk1NtV6TmZmprKwsRUVFSZKioqK0e/duHTt2zKpJSUmRn5+fmjZtatVceI6SmpJzAAAAXCvuKQUAAFCB1KxZU82bN3c5Vr16ddWpU8c6Hh8fr8TERNWuXVt+fn566qmnFBUVpQ4dOkiSunXrpqZNm6pfv36aPHmysrOzNWbMGCUkJMjhcEiShgwZotmzZ2vUqFF67LHHtG7dOi1dulQrV660d8IAAKDSIpQCAACoZKZNmyZPT0/17t1b+fn5iomJ0ZtvvmmNe3l5acWKFXriiScUFRWl6tWra8CAARo/frxVExERoZUrV2rEiBGaMWOG6tWrp3feeUcxMTHumBIAAKiECKUAAAAquPXr17s89/HxUVJSkpKSki77mvDwcK1ateoXz9ulSxft3LmzLFoEAAC4CPeUAgAAAAAAgO0IpQAAAAAAAGA7QikAAAAAAADYjlAKAAAAAAAAtiOUAgAAAAAAgO0IpQAAAAAAAGA7QikAAAAAAADYjlAKAAAAAAAAtiOUAgAAAAAAgO0IpQAAAAAAAGA7QikAAAAAAADYjlAKAAAAAAAAtiOUAgAAAAAAgO0IpQAAAAAAAGA7QikAAAAAAADYjlAKAAAAAAAAtiOUAgAAAAAAgO0IpQAAAAAAAGA7QikAAAAAAADYjlAKAAAAAAAAtiOUAgAAAAAAgO0IpQAAAAAAAGA7QikAAAAAAADYjlAKAAAAAAAAtiOUAgAAAAAAgO0IpQAAAAAAAGA7QikAAAAAAADYjlAKAAAAAAAAtiOUAgAAAAAAgO0IpQAAAAAAAGA7QikAAAAAAADYjlAKAAAAAAAAtiOUAgAAAAAAgO0IpQAAAAAAAGA7QikAAAAAAADYjlAKAAAAAAAAtiOUAgAAAAAAgO0IpQAAAAAAAGA7QikAAAAAAADYjlAKAAAAAAAAtnNrKDVx4kTdfvvtqlmzpgIDA/XAAw8oMzPTpebs2bNKSEhQnTp1VKNGDfXu3Vs5OTkuNVlZWYqNjVW1atUUGBiokSNH6ty5cy4169evV5s2beRwONSwYUMlJyeX9/QAAAAAAABwGW4NpTZs2KCEhARt2bJFKSkpKiwsVLdu3XT69GmrZsSIEfrkk0+0bNkybdiwQUeOHFGvXr2s8aKiIsXGxqqgoECbN2/WwoULlZycrLFjx1o1hw4dUmxsrO655x5lZGRo+PDhevzxx7VmzRpb5wsAAAAAAICfVHHnm69evdrleXJysgIDA5Wenq7OnTsrLy9P8+fP1+LFi3XvvfdKkhYsWKDIyEht2bJFHTp00Nq1a7Vv3z599tlnCgoKUqtWrTRhwgQ9++yzGjdunLy9vTV37lxFRERoypQpkqTIyEht2rRJ06ZNU0xMzEV95efnKz8/33rudDrLcRUAAAAAAABuPNfVPaXy8vIkSbVr15Ykpaenq7CwUNHR0VZNkyZNVL9+faWlpUmS0tLS1KJFCwUFBVk1MTExcjqd2rt3r1Vz4TlKakrO8XMTJ06Uv7+/9QgLCyu7SQIAAAAAAOD6CaWKi4s1fPhwdezYUc2bN5ckZWdny9vbWwEBAS61QUFBys7OtmouDKRKxkvGfqnG6XTqzJkzF/UyevRo5eXlWY/Dhw+XyRwBAAAAAADwE7d+fO9CCQkJ2rNnjzZt2uTuVuRwOORwONzdBgAAAAAAQKV1XVwpNXToUK1YsUKff/656tWrZx0PDg5WQUGBcnNzXepzcnIUHBxs1fz82/hKnv9ajZ+fn3x9fct6OgAAAAAAAPgVbg2ljDEaOnSoPvroI61bt04REREu423btlXVqlWVmppqHcvMzFRWVpaioqIkSVFRUdq9e7eOHTtm1aSkpMjPz09Nmza1ai48R0lNyTkAAAAAAABgL7d+fC8hIUGLFy/WP/7xD9WsWdO6B5S/v798fX3l7++v+Ph4JSYmqnbt2vLz89NTTz2lqKgodejQQZLUrVs3NW3aVP369dPkyZOVnZ2tMWPGKCEhwfoI3pAhQzR79myNGjVKjz32mNatW6elS5dq5cqVbps7AAAAAADAjcytV0rNmTNHeXl56tKli0JCQqzHBx98YNVMmzZNv/vd79S7d2917txZwcHB+vDDD61xLy8vrVixQl5eXoqKitIjjzyi/v37a/z48VZNRESEVq5cqZSUFN12222aMmWK3nnnHcXExNg6XwAAAAAAAPzErVdKGWN+tcbHx0dJSUlKSkq6bE14eLhWrVr1i+fp0qWLdu7cedU9AgAAAAAAoOxdFzc6BwAAAAAAwI2FUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAgApk4sSJuv3221WzZk0FBgbqgQceUGZmpkvN2bNnlZCQoDp16qhGjRrq3bu3cnJyXGqysrIUGxuratWqKTAwUCNHjtS5c+dcatavX682bdrI4XCoYcOGSk5OLu/pAQCAGwihFAAAQAWyYcMGJSQkaMuWLUpJSVFhYaG6deum06dPWzUjRozQJ598omXLlmnDhg06cuSIevXqZY0XFRUpNjZWBQUF2rx5sxYuXKjk5GSNHTvWqjl06JBiY2N1zz33KCMjQ8OHD9fjjz+uNWvW2DpfAABQeVVxdwMAAAC4cqtXr3Z5npycrMDAQKWnp6tz587Ky8vT/PnztXjxYt17772SpAULFigyMlJbtmxRhw4dtHbtWu3bt0+fffaZgoKC1KpVK02YMEHPPvusxo0bJ29vb82dO1cRERGaMmWKJCkyMlKbNm3StGnTFBMTY/u8AQBA5cOVUgAAABVYXl6eJKl27dqSpPT0dBUWFio6OtqqadKkierXr6+0tDRJUlpamlq0aKGgoCCrJiYmRk6nU3v37rVqLjxHSU3JOS4lPz9fTqfT5QEAAHA5hFIAAAAVVHFxsYYPH66OHTuqefPmkqTs7Gx5e3srICDApTYoKEjZ2dlWzYWBVMl4ydgv1TidTp05c+aS/UycOFH+/v7WIyws7JrnCAAAKi9CKQAAgAoqISFBe/bs0ZIlS9zdiiRp9OjRysvLsx6HDx92d0sAAOA6xj2lAAAAKqChQ4dqxYoV2rhxo+rVq2cdDw4OVkFBgXJzc12ulsrJyVFwcLBVs23bNpfzlXw734U1P//GvpycHPn5+cnX1/eSPTkcDjkcjmueGwAAuDFwpRQAAEAFYozR0KFD9dFHH2ndunWKiIhwGW/btq2qVq2q1NRU61hmZqaysrIUFRUlSYqKitLu3bt17NgxqyYlJUV+fn5q2rSpVXPhOUpqSs4BAABwrbhSCgAAoAJJSEjQ4sWL9Y9//EM1a9a07gHl7+8vX19f+fv7Kz4+XomJiapdu7b8/Pz01FNPKSoqSh06dJAkdevWTU2bNlW/fv00efJkZWdna8yYMUpISLCudBoyZIhmz56tUaNG6bHHHtO6deu0dOlSrVy50m1zBwAAlQtXSgEAAFQgc+bMUV5enrp06aKQkBDr8cEHH1g106ZN0+9+9zv17t1bnTt3VnBwsD788ENr3MvLSytWrJCXl5eioqL0yCOPqH///ho/frxVExERoZUrVyolJUW33XabpkyZonfeeUcxMTG2zhcAAFReXCkFAABQgRhjfrXGx8dHSUlJSkpKumxNeHi4Vq1a9Yvn6dKli3bu3HnVPQIAAFwJrpQCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANiOUAoAAAAAAAC2I5QCAAAAAACA7QilAAAAAAAAYDtCKQAAAAAAANjOraHUxo0bdd999yk0NFQeHh76+OOPXcaNMRo7dqxCQkLk6+ur6OhoffPNNy41x48fV1xcnPz8/BQQEKD4+HidOnXKpearr77SXXfdJR8fH4WFhWny5MnlPTUAAAAAAAD8AreGUqdPn9Ztt92mpKSkS45PnjxZM2fO1Ny5c7V161ZVr15dMTExOnv2rFUTFxenvXv3KiUlRStWrNDGjRs1ePBga9zpdKpbt24KDw9Xenq6Xn/9dY0bN05vv/12uc8PAAAAAAAAl1bFnW/eo0cP9ejR45JjxhhNnz5dY8aM0f333y9Jeu+99xQUFKSPP/5Yffr00f79+7V69Wpt375d7dq1kyTNmjVLPXv21BtvvKHQ0FAtWrRIBQUFevfdd+Xt7a1mzZopIyNDU6dOdQmvLpSfn6/8/HzrudPpLOOZAwAAAAAA3Niu23tKHTp0SNnZ2YqOjraO+fv7q3379kpLS5MkpaWlKSAgwAqkJCk6Olqenp7aunWrVdO5c2d5e3tbNTExMcrMzNSJEycu+d4TJ06Uv7+/9QgLCyuPKQIAAAAAANywrttQKjs7W5IUFBTkcjwoKMgay87OVmBgoMt4lSpVVLt2bZeaS53jwvf4udGjRysvL896HD58+NonBAAAAAAAAItbP753vXI4HHI4HO5uAwAAAAAAoNK6bq+UCg4OliTl5OS4HM/JybHGgoODdezYMZfxc+fO6fjx4y41lzrHhe8BAAAAAAAAe123oVRERISCg4OVmppqHXM6ndq6dauioqIkSVFRUcrNzVV6erpVs27dOhUXF6t9+/ZWzcaNG1VYWGjVpKSkqHHjxqpVq5ZNswEAAAAAAMCF3BpKnTp1ShkZGcrIyJD0083NMzIylJWVJQ8PDw0fPlwvv/yyli9frt27d6t///4KDQ3VAw88IEmKjIxU9+7dNWjQIG3btk1ffPGFhg4dqj59+ig0NFSS1LdvX3l7eys+Pl579+7VBx98oBkzZigxMdFNswYAAAAAAIBb7ym1Y8cO3XPPPdbzkqBowIABSk5O1qhRo3T69GkNHjxYubm56tSpk1avXi0fHx/rNYsWLdLQoUPVtWtXeXp6qnfv3po5c6Y17u/vr7Vr1yohIUFt27ZV3bp1NXbsWA0ePNi+iQIAAAAAAMCFW0OpLl26yBhz2XEPDw+NHz9e48ePv2xN7dq1tXjx4l98n5YtW+qf//xnqfsEAAAAAABA2bpu7ykFAAAAAACAyotQCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABgO0IpAAAAAAAA2I5QCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABgO0IpAAAAAAAA2I5QCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABgO0IpAAAAAAAA2I5QCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABgO0IpAAAAAAAA2I5QCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABgO0IpAAAAAAAA2I5QCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABgO0IpAAAAAAAA2I5QCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABgO0IpAAAAAAAA2I5QCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABgO0IpAAAAAAAA2I5QCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABgO0IpAAAAAAAA2I5QCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABgO0IpAAAAAAAA2I5QCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABgO0IpAAAAAAAA2I5QCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABgO0IpAAAAAAAA2I5QCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABgO0IpAAAAAAAA2I5QCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABgO0IpAAAAAAAA2I5QCgAAAAAAALYjlAIAAAAAAIDtCKUAAAAAAABguyrubgAVW3zy9mt6/fxHby+jTgAA7natvxMAAABwYyGUglu58z9gKnIgdi3rVpHnDQAAAACoPAilwP/ZLiXW7cZSkQNUQkxcDf7dhhsJ/34EAMC9CKUqAf4DonRu1HW7UecNAAAAALi+EEoBwHXOnUFiRb5v3I16BQTBMwAAACoKQikAwHWJcAUAAACo3AilAAD4GQIxAAAAoPwRSgEAyg3hDgAAAIDL8XR3AwAAAAAAALjx3FBXSiUlJen1119Xdna2brvtNs2aNUt33HGHu9uSxNUEAADg+nQ9758AAEDFdsOEUh988IESExM1d+5ctW/fXtOnT1dMTIwyMzMVGBjo7vYAAACuO+yfLq8ifzspAADXixvm43tTp07VoEGDNHDgQDVt2lRz585VtWrV9O6777q7NQAAgOsS+ycAAFCebogrpQoKCpSenq7Ro0dbxzw9PRUdHa20tLSL6vPz85Wfn289z8vLkyQ5nc7y6/HMqXI7NwAAlV15/o4uObcxptze43p0tfsnyf49VEXeP13LmiQsSr+m906Ka3tNrwcA4Ndc6f7phgil/vvf/6qoqEhBQUEux4OCgvT1119fVD9x4kS99NJLFx0PCwsrtx4BAEDpvf9k+b/HyZMn5e/vX/5vdJ242v2TxB7qatjxM3s9vjcA4Mbya/unGyKUulqjR49WYmKi9by4uFjHjx9XnTp15OHhUerzOp1OhYWF6fDhw/Lz8yuLVnGFWHv3YN3dg3V3H9bePcp73Y0xOnnypEJDQ8v83JVNee2hLoW/b/Zhre3FetuHtbYPa22v62G9r3T/dEOEUnXr1pWXl5dycnJcjufk5Cg4OPiieofDIYfD4XIsICCgzPrx8/PjL6KbsPbuwbq7B+vuPqy9e5Tnut9IV0iVuNr9k1T+e6hL4e+bfVhre7He9mGt7cNa28vd630l+6cb4kbn3t7eatu2rVJTU61jxcXFSk1NVVRUlBs7AwAAuD6xfwIAAOXthrhSSpISExM1YMAAtWvXTnfccYemT5+u06dPa+DAge5uDQAA4LrE/gkAAJSnGyaUeuihh/Sf//xHY8eOVXZ2tlq1aqXVq1dfdPPO8uRwOPTiiy9edFk7yh9r7x6su3uw7u7D2rsH615+rof90+Xwz90+rLW9WG/7sNb2Ya3tVZHW28PcaN9vDAAAAAAAALe7Ie4pBQAAAAAAgOsLoRQAAAAAAABsRygFAAAAAAAA2xFKAQAAAAAAwHaEUmUsKSlJN998s3x8fNS+fXtt27btF+uXLVumJk2ayMfHRy1atNCqVats6rTyuZq1nzdvnu666y7VqlVLtWrVUnR09K/+s8KlXe3PfIklS5bIw8NDDzzwQPk2WEld7brn5uYqISFBISEhcjgcuvXWW/n3TSld7dpPnz5djRs3lq+vr8LCwjRixAidPXvWpm4rh40bN+q+++5TaGioPDw89PHHH//qa9avX682bdrI4XCoYcOGSk5OLvc+YZ/S/u7BeRMnTtTtt9+umjVrKjAwUA888IAyMzNdas6ePauEhATVqVNHNWrUUO/evZWTk+NSk5WVpdjYWFWrVk2BgYEaOXKkzp07Z+dUKpxJkybJw8NDw4cPt46x1mXrhx9+0COPPKI6derI19dXLVq00I4dO6xxY4zGjh2rkJAQ+fr6Kjo6Wt98843LOY4fP664uDj5+fkpICBA8fHxOnXqlN1Tua4VFRXphRdeUEREhHx9fdWgQQNNmDBBF36XGmtder+2/ymrtf3qq6901113ycfHR2FhYZo8eXJ5T82VQZlZsmSJ8fb2Nu+++67Zu3evGTRokAkICDA5OTmXrP/iiy+Ml5eXmTx5stm3b58ZM2aMqVq1qtm9e7fNnVd8V7v2ffv2NUlJSWbnzp1m//795tFHHzX+/v7m3//+t82dV2xXu+4lDh06ZH7zm9+Yu+66y9x///32NFuJXO265+fnm3bt2pmePXuaTZs2mUOHDpn169ebjIwMmzuv+K527RctWmQcDodZtGiROXTokFmzZo0JCQkxI0aMsLnzim3VqlXm+eefNx9++KGRZD766KNfrD948KCpVq2aSUxMNPv27TOzZs0yXl5eZvXq1fY0jHJV2t89cBUTE2MWLFhg9uzZYzIyMkzPnj1N/fr1zalTp6yaIUOGmLCwMJOammp27NhhOnToYO68805r/Ny5c6Z58+YmOjra7Ny506xatcrUrVvXjB492h1TqhC2bdtmbr75ZtOyZUszbNgw6zhrXXaOHz9uwsPDzaOPPmq2bt1qDh48aNasWWO+/fZbq2bSpEnG39/ffPzxx2bXrl3m97//vYmIiDBnzpyxarp3725uu+02s2XLFvPPf/7TNGzY0Dz88MPumNJ165VXXjF16tQxK1asMIcOHTLLli0zNWrUMDNmzLBqWOvS+7X9T1msbV5engkKCjJxcXFmz5495m9/+5vx9fU1b731ll3TNIRSZeiOO+4wCQkJ1vOioiITGhpqJk6ceMn6P/3pTyY2NtblWPv27c2f//zncu2zMrratf+5c+fOmZo1a5qFCxeWV4uVUmnW/dy5c+bOO+8077zzjhkwYAChVClc7brPmTPH3HLLLaagoMCuFiutq137hIQEc++997ocS0xMNB07dizXPiuzKwmlRo0aZZo1a+Zy7KGHHjIxMTHl2Bnscq2/83Fpx44dM5LMhg0bjDHG5ObmmqpVq5ply5ZZNfv37zeSTFpamjHmp/9g8vT0NNnZ2VbNnDlzjJ+fn8nPz7d3AhXAyZMnTaNGjUxKSoq5++67rVCKtS5bzz77rOnUqdNlx4uLi01wcLB5/fXXrWO5ubnG4XCYv/3tb8YYY/bt22ckme3bt1s1n376qfHw8DA//PBD+TVfwcTGxprHHnvM5VivXr1MXFycMYa1Lks/3/+U1dq++eabplatWi7/Hnn22WdN48aNy3lG5/HxvTJSUFCg9PR0RUdHW8c8PT0VHR2ttLS0S74mLS3NpV6SYmJiLluPSyvN2v/cjz/+qMLCQtWuXbu82qx0Srvu48ePV2BgoOLj4+1os9IpzbovX75cUVFRSkhIUFBQkJo3b65XX31VRUVFdrVdKZRm7e+8806lp6dbHy06ePCgVq1apZ49e9rS842K36+VV1n8zsel5eXlSZK1F0pPT1dhYaHLWjdp0kT169e31jotLU0tWrRQUFCQVRMTEyOn06m9e/fa2H3FkJCQoNjY2Iv+/cRal63ly5erXbt2+uMf/6jAwEC1bt1a8+bNs8YPHTqk7Oxsl/X29/dX+/btXdY7ICBA7dq1s2qio6Pl6emprVu32jeZ69ydd96p1NRUHThwQJK0a9cubdq0ST169JDEWpenslrbtLQ0de7cWd7e3lZNTEyMMjMzdeLECVvmUsWWd7kB/Pe//1VRUZHLLwpJCgoK0tdff33J12RnZ1+yPjs7u9z6rIxKs/Y/9+yzzyo0NPSiTQIurzTrvmnTJs2fP18ZGRk2dFg5lWbdDx48qHXr1ikuLk6rVq3St99+qyeffFKFhYV68cUX7Wi7UijN2vft21f//e9/1alTJxljdO7cOQ0ZMkT/93//Z0fLN6zL/X51Op06c+aMfH193dQZrlVZ/M7HxYqLizV8+HB17NhRzZs3l/TT3yNvb28FBAS41F64V73c37WSMZy3ZMkSffnll9q+fftFY6x12Tp48KDmzJmjxMRE/d///Z+2b9+up59+Wt7e3howYIC1Xr/032HZ2dkKDAx0Ga9SpYpq167Nel/gueeek9PpVJMmTeTl5aWioiK98soriouLkyTWuhyV1dpmZ2crIiLionOUjNWqVatc+nfpqdzfAbjOTZo0SUuWLNH69evl4+Pj7nYqrZMnT6pfv36aN2+e6tat6+52bijFxcUKDAzU22+/LS8vL7Vt21Y//PCDXn/9dUKpcrZ+/Xq9+uqrevPNN9W+fXt9++23GjZsmCZMmKAXXnjB3e0BgKSfruDZs2ePNm3a5O5WKqXDhw9r2LBhSklJYa9pg+LiYrVr106vvvqqJKl169bas2eP5s6dqwEDBri5u8pl6dKlWrRokRYvXqxmzZopIyNDw4cPV2hoKGuNK0YoVUbq1q0rLy+vi74lIycnR8HBwZd8TXBw8FXV49JKs/Yl3njjDU2aNEmfffaZWrZsWZ5tVjpXu+7fffedvv/+e913333WseLiYkk/JfaZmZlq0KBB+TZdCZTm5z0kJERVq1aVl5eXdSwyMlLZ2dkqKChwuVwXl1eatX/hhRfUr18/Pf7445KkFi1a6PTp0xo8eLCef/55eXryKfrycLnfr35+flwlVcFdy+98XNrQoUO1YsUKbdy4UfXq1bOOBwcHq6CgQLm5uS5X8Fy41sHBwRd982HJPxv+eZyXnp6uY8eOqU2bNtaxoqIibdy4UbNnz9aaNWtY6zIUEhKipk2buhyLjIzU3//+d0nn1ysnJ0chISFWTU5Ojlq1amXVHDt2zOUc586d0/Hjx1nvC4wcOVLPPfec+vTpI+mnfc6//vUvTZw4UQMGDGCty1FZre3l9kwXvkd5YzdcRry9vdW2bVulpqZax4qLi5WamqqoqKhLviYqKsqlXpJSUlIuW49LK83aS9LkyZM1YcIErV692uVztrgyV7vuTZo00e7du5WRkWE9fv/73+uee+5RRkaGwsLC7Gy/wirNz3vHjh317bffWiGgJB04cEAhISEEUlehNGv/448/XhQ8lYSD5oKvS0bZ4vdr5VXa3/m4mDFGQ4cO1UcffaR169Zd9PGNtm3bqmrVqi5rnZmZqaysLGuto6KitHv3bpf/6ElJSZGfn99FocCNrGvXrhftgdq1a6e4uDjrz6x12enYsaMyMzNdjh04cEDh4eGSpIiICAUHB7ust9Pp1NatW13WOzc3V+np6VbNunXrVFxcrPbt29swi4rhcvuckj0na11+ympto6KitHHjRhUWFlo1KSkpaty4sS0f3ZMkvn2vDC1ZssQ4HA6TnJxs9u3bZwYPHmwCAgKsb8no16+fee6556z6L774wlSpUsW88cYbZv/+/ebFF180VatWNbt373bXFCqsq137SZMmGW9vb/P//t//M0ePHrUeJ0+edNcUKqSrXfef49v3Sudq1z0rK8vUrFnTDB061GRmZpoVK1aYwMBA8/LLL7trChXW1a79iy++aGrWrGn+9re/mYMHD5q1a9eaBg0amD/96U/umkKFdPLkSbNz506zc+dOI8lMnTrV7Ny50/zrX/8yxhjz3HPPmX79+ln1Bw8eNNWqVTMjR440+/fvN0lJScbLy8usXr3aXVNAGfq1v4e4Mk888YTx9/c369evd9kL/fjjj1bNkCFDTP369c26devMjh07TFRUlImKirLGz507Z5o3b266detmMjIyzOrVq81NN91kRo8e7Y4pVSgXfvueMax1Wdq2bZupUqWKeeWVV8w333xjFi1aZKpVq2bef/99q2bSpEkmICDA/OMf/zBfffWVuf/++01ERIQ5c+aMVdO9e3fTunVrs3XrVrNp0ybTqFEj8/DDD7tjStetAQMGmN/85jdmxYoV5tChQ+bDDz80devWNaNGjbJqWOvS+7X9T1msbW5urgkKCjL9+vUze/bsMUuWLDHVqlUzb731lm3zJJQqY7NmzTL169c33t7e5o477jBbtmyxxu6++24zYMAAl/qlS5eaW2+91Xh7e5tmzZqZlStX2txx5XE1ax8eHm4kXfR48cUX7W+8grvan/kLEUqV3tWu++bNm0379u2Nw+Ewt9xyi3nllVfMuXPnbO66criatS8sLDTjxo0zDRo0MD4+PiYsLMw8+eST5sSJE/Y3XoF9/vnnl/x3dslaDxgwwNx9990XvaZVq1bG29vb3HLLLWbBggW2943y80t/D3FlLvV3SpLL35UzZ86YJ5980tSqVctUq1bNPPjgg+bo0aMu5/n+++9Njx49jK+vr6lbt675y1/+YgoLC22eTcXz81CKtS5bn3zyiWnevLlxOBymSZMm5u2333YZLy4uNi+88IIJCgoyDofDdO3a1WRmZrrU/O9//zMPP/ywqVGjhvHz8zMDBw7kf2D/jNPpNMOGDTP169c3Pj4+5pZbbjHPP/+8yc/Pt2pY69L7tf1PWa3trl27TKdOnYzD4TC/+c1vzKRJk+yaojHGGA9j+PwAAAAAAAAA7MU9pQAAAAAAAGA7QikAAAAAAADYjlAKAAAAAAAAtiOUAgAAAAAAgO0IpQAAAAAAAGA7QikAAAAAAADYjlAKAAAAAAAAtiOUAgAAAAAAgO0IpQAAAAAAl/X999/Lw8NDGRkZ7m5FktSlSxcNHz7c3W0AKAOEUgAqtfLatHTp0kUeHh7y8PCQj4+PmjZtqjfffNMaT05OtsY9PT1Vr149DRw4UMeOHSvzXgAAAOz20UcfqUOHDvL391fNmjXVrFmzq95zeXh46OOPP3Y5VlRUpEmTJqlJkyby9fVV7dq11b59e73zzjtWzYcffqgJEyaUwSwAuFsVdzcAABXVoEGDNH78eP3444967733lJCQoFq1aunhhx+WJPn5+SkzM1PFxcXatWuXBg4cqCNHjmjNmjVu7hwAANyICgsLVbVq1Ws+T2pqqh566CG98sor+v3vfy8PDw/t27dPKSkp13zul156SW+99ZZmz56tdu3ayel0aseOHTpx4oRVU7t27Wt+HwDXB66UAlBpPfroo9qwYYNmzJhhXbX0/fffa8OGDbrjjjvkcDgUEhKi5557TufOnbNe16VLFw0dOlRDhw6Vv7+/6tatqxdeeEHGGJfzV6tWTcHBwbrllls0btw4NWrUSMuXL7fGPTw8FBwcrNDQUPXo0UNPP/20PvvsM505c8a2NQAAAJVbcXGxJk+erIYNG8rhcKh+/fp65ZVXrI/cffDBB7r77rvl4+OjRYsWSZLeeecdRUZGysfHR02aNHG52luStm3bptatW8vHx0ft2rXTzp07XcY/+eQTdezYUSNHjlTjxo1166236oEHHlBSUpJL3T/+8Q+1adNGPj4+uuWWW/TSSy9Ze66bb75ZkvTggw/Kw8PDer58+XI9+eST+uMf/6iIiAjddtttio+P1zPPPGOd98Ir4devX2/t8y58PProo1fUBwD34kopAJXWjBkzdODAATVv3lzjx4+X9NMl4T179tSjjz6q9957T19//bUGDRokHx8fjRs3znrtwoULFR8fr23btmnHjh0aPHiw6tevr0GDBl32/Xx9fVVQUPCL48XFxWyCAABAmRk9erTmzZunadOmqVOnTjp69Ki+/vpra/y5557TlClTrJBp0aJFGjt2rGbPnq3WrVtr586dGjRokKpXr64BAwbo1KlT+t3vfqff/va3ev/993Xo0CENGzbM5T2Dg4O1ePFi7dmzR82bN79kX//85z/Vv39/zZw5U3fddZe+++47DR48WJL04osvavv27QoMDNSCBQvUvXt3eXl5Wedet26dnnzySd10002/Ov8777xTR48etZ7v379fPXv2VOfOna+oDwBuZgCgErv77rvNsGHDrOf/93//Zxo3bmyKi4utY0lJSaZGjRqmqKjIek1kZKRLzbPPPmsiIyMved5z586Zv/71r0aSmT17tjHGmAULFhh/f3+r/sCBA+bWW2817dq1K4dZAgCAG5HT6TQOh8PMmzfvorFDhw4ZSWb69Okuxxs0aGAWL17scmzChAkmKirKGGPMW2+9ZerUqWPOnDljjc+ZM8dIMjt37jTGGHPq1CnTs2dPI8mEh4ebhx56yMyfP9+cPXvWek3Xrl3Nq6++6vI+f/3rX01ISIj1XJL56KOPXGr27t1rIiMjjaenp2nRooX585//bFatWuVS8/P9XYn//ve/5pZbbjFPPvnkVfUBwH34+B6AG8r+/fsVFRUlDw8P61jHjh116tQp/fvf/7aOdejQwaUmKipK33zzjYqKiqxjb775pmrUqCFfX18NGjRII0aM0BNPPGGN5+XlqUaNGqpWrZoaN26soKAg67J5AACAa7V//37l5+era9eul61p166d9efTp0/ru+++U3x8vGrUqGE9Xn75ZX333XfWOVu2bCkfHx/rdVFRUS7nrF69ulauXKlvv/1WY8aMUY0aNfSXv/xFd9xxh3788UdJ0q5duzR+/HiX9xk0aJCOHj1q1VxK06ZNtWfPHm3ZskWPPfaYjh07pvvuu0+PP/74L65FYWGhevfurfDwcM2YMcM6Xto+ANiDj+8BQCnFxcXp+eefl6+vr0JCQuTp6Zrz16xZU19++aU8PT0VEhIiX19fN3UKAAAqoyvZW1SvXt3686lTpyRJ8+bNU/v27V3qSj4+dzUaNGigBg0a6PHHH9fzzz+vW2+9VR988IEGDhyoU6dO6aWXXlKvXr0uet2FgdeleHp66vbbb9ftt9+u4cOH6/3331e/fv30/PPPKyIi4pKveeKJJ3T48GFt27ZNVaqc/8/ca+kDQPkjlAJQqXl7e7tc3RQZGam///3vMsZYV0J98cUXqlmzpurVq2fVbd261eU8W7ZsUaNGjVw2bP7+/mrYsOFl39vT0/MXxwEAAK5Fo0aN5Ovrq9TU1F+9kkiSgoKCFBoaqoMHDyouLu6SNZGRkfrrX/+qs2fPWqHNli1bfvXcN998s6pVq6bTp09Lktq0aaPMzMxf3AtVrVrVZZ92OU2bNpUk69w/N3XqVC1dulSbN29WnTp1XMaupA8A7kMoBaBSu/nmm7V161Z9//33qlGjhp588klNnz5dTz31lIYOHarMzEy9+OKLSkxMdLnSKSsrS4mJifrzn/+sL7/8UrNmzdKUKVPcOBMAAABXPj4+evbZZzVq1Ch5e3urY8eO+s9//qO9e/de9iN9L730kp5++mn5+/ure/fuys/P144dO3TixAklJiaqb9++ev755zVo0CCNHj1a33//vd544w2Xc4wbN04//vijevbsqfDwcOXm5mrmzJkqLCzUb3/7W0nS2LFj9bvf/U7169fXH/7wB3l6emrXrl3as2ePXn75ZUk/7dNSU1PVsWNHORwO1apVS3/4wx/UsWNH3XnnnQoODtahQ4c0evRo3XrrrWrSpMlF8/nss880atQoJSUlqW7dusrOzpb001Vk/v7+V9QHAPfhnlIAKrVnnnlGXl5eatq0qW666SYVFhZq1apV2rZtm2677TYNGTJE8fHxGjNmjMvr+vfvrzNnzuiOO+5QQkKChg0bZn1TCwAAwPXihRde0F/+8heNHTtWkZGReuihh3Ts2LHL1j/++ON65513tGDBArVo0UJ33323kpOTrY/F1ahRQ5988ol2796t1q1b6/nnn9drr73mco67775bBw8eVP/+/dWkSRP16NFD2dnZWrt2rRo3bixJiomJ0YoVK7R27Vrdfvvt6tChg6ZNm6bw8HDrPFOmTFFKSorCwsLUunVr63WffPKJ7rvvPt16660aMGCAmjRporVr17p8LK/Epk2bVFRUpCFDhigkJMR6lHxj4JX0AcB9PIwxxt1NAMD1pEuXLmrVqpWmT5/u7lYAAAAAoNLiSikAAAAAAADYjlAKAAAAAAAAtuPjewAAAAAAALAdV0oBAAAAAADAdoRSAAAAAAAAsB2hFAAAAAAAAGxHKAUAAAAAAADbEUoBAAAAAADAdoRSAAAAAAAAsB2hFAAAAAAAAGxHKAUAAAAAAADb/X+pWVkjnevuxAAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pdf = first_credset.select(\"purityMinR2\", \"purityMeanR2\", \"topPP\", \"credSetSize\").toPandas()\n", + "plt.figure(figsize=(12, 12))\n", + "\n", + "# Histogram for purityMinR2\n", + "plt.subplot(2, 2, 1)\n", + "plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of purityMinR2\")\n", + "plt.xlabel(\"purityMinR2\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for purityMeanR2\n", + "plt.subplot(2, 2, 2)\n", + "plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of purityMeanR2\")\n", + "plt.xlabel(\"purityMeanR2\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for topPP\n", + "plt.subplot(2, 2, 3)\n", + "plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of topPP\")\n", + "plt.xlabel(\"topPP\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for credSetSize\n", + "plt.subplot(2, 2, 4)\n", + "plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of credSetSize\")\n", + "plt.xlabel(\"credSetSize\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Adjust layout to prevent overlap\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Filtering credible sets with qc function" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "24/08/07 10:05:27 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of high quality credible sets: 33544\n", + "Number of unique studyIds in high quality credible sets: 2188\n" + ] + } + ], + "source": [ + "qc_credsets = SUSIE_inf.credible_set_qc(\n", + " susie_fm, study_index, ld_index, 1e-5, 0.25, 0.8\n", + ").persist()\n", + "\n", + "qc_credsets = (\n", + " qc_credsets.df.withColumn(\"credSetSize\", f.size(\"locus\"))\n", + " .withColumn(\n", + " \"locus\",\n", + " f.slice(order_array_of_structs_by_field(\"locus\", \"posteriorProbability\"), 1, 1)[\n", + " 0\n", + " ],\n", + " )\n", + " .withColumn(\"topPP\", f.col(\"locus\").getField(\"posteriorProbability\"))\n", + " .filter(~f.isnan(\"topPP\"))\n", + ")\n", + "\n", + "print(\"Number of high quality credible sets: \", qc_credsets.count())\n", + "print(\n", + " \"Number of unique studyIds in high quality credible sets: \",\n", + " qc_credsets.select(\"studyId\").distinct().count(),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0---------------------------\n", + " meanTopPP | 0.9033882550691011 \n", + " minTopPP | 0.013988537311438214 \n", + " q1TopPP | 0.9937867852571437 \n", + " medianTopPP | 0.9999999999596412 \n", + " q3TopPP | 1.0 \n", + " maxTopPP | 1.0 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0-------------------------------\n", + " meanCredSetSize | 3.2039709038874316 \n", + " minCredSetSize | 1 \n", + " q1CredSetSize | 1 \n", + " medianCredSetSize | 1 \n", + " q3CredSetSize | 1 \n", + " maxCredSetSize | 1022 \n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0--------------------------------\n", + " meanPurityMeanR2 | 0.9789336464434549 \n", + " minPurityMeanR2 | 0.5417537849178955 \n", + " q1PurityMeanR2 | 1.0 \n", + " medianPurityMeanR2 | 1.0 \n", + " q3PurityMeanR2 | 1.0 \n", + " maxPurityMeanR2 | 1.0 \n", + "\n", + "-RECORD 0--------------------------------\n", + " meanPurityMinR2 | 0.9491503761431416 \n", + " minPurityMinR2 | 0.25018750677013313 \n", + " q1PurityMinR2 | 1.0 \n", + " medianPurityMinR2 | 1.0 \n", + " q3PurityMinR2 | 1.0 \n", + " maxPurityMinR2 | 1.0 \n", + "\n" + ] + } + ], + "source": [ + "(\n", + " qc_credsets.select(\n", + " f.mean(\"topPP\").alias(\"meanTopPP\"),\n", + " f.min(\"topPP\").alias(\"minTopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.25).alias(\"q1TopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.5).alias(\"medianTopPP\"),\n", + " f.percentile_approx(\"topPP\", 0.75).alias(\"q3TopPP\"),\n", + " f.max(\"topPP\").alias(\"maxTopPP\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " qc_credsets.select(\n", + " f.mean(\"credSetSize\").alias(\"meanCredSetSize\"),\n", + " f.min(\"credSetSize\").alias(\"minCredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.25).alias(\"q1CredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.5).alias(\"medianCredSetSize\"),\n", + " f.percentile_approx(\"credSetSize\", 0.75).alias(\"q3CredSetSize\"),\n", + " f.max(\"credSetSize\").alias(\"maxCredSetSize\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " qc_credsets.select(\n", + " f.mean(\"purityMeanR2\").alias(\"meanPurityMeanR2\"),\n", + " f.min(\"purityMeanR2\").alias(\"minPurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.25).alias(\"q1PurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.5).alias(\"medianPurityMeanR2\"),\n", + " f.percentile_approx(\"purityMeanR2\", 0.75).alias(\"q3PurityMeanR2\"),\n", + " f.max(\"purityMeanR2\").alias(\"maxPurityMeanR2\"),\n", + " ).show(vertical=True)\n", + ")\n", + "(\n", + " qc_credsets.select(\n", + " f.mean(\"purityMinR2\").alias(\"meanPurityMinR2\"),\n", + " f.min(\"purityMinR2\").alias(\"minPurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.25).alias(\"q1PurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.5).alias(\"medianPurityMinR2\"),\n", + " f.percentile_approx(\"purityMinR2\", 0.75).alias(\"q3PurityMinR2\"),\n", + " f.max(\"purityMinR2\").alias(\"maxPurityMinR2\"),\n", + " ).show(vertical=True)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pdf = qc_credsets.select(\"purityMinR2\", \"purityMeanR2\", \"topPP\", \"credSetSize\").toPandas()\n", + "plt.figure(figsize=(12, 12))\n", + "\n", + "# Histogram for purityMinR2\n", + "plt.subplot(2, 2, 1)\n", + "plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of purityMinR2\")\n", + "plt.xlabel(\"purityMinR2\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for purityMeanR2\n", + "plt.subplot(2, 2, 2)\n", + "plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of purityMeanR2\")\n", + "plt.xlabel(\"purityMeanR2\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for topPP\n", + "plt.subplot(2, 2, 3)\n", + "plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of topPP\")\n", + "plt.xlabel(\"topPP\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Histogram for credSetSize\n", + "plt.subplot(2, 2, 4)\n", + "plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", + "plt.title(\"Histogram of credSetSize\")\n", + "plt.xlabel(\"credSetSize\")\n", + "plt.ylabel(\"Frequency\")\n", + "\n", + "# Adjust layout to prevent overlap\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# WIP" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Clumped loci filtered for usage with PICS\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Number of unique studyIds\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "15" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pics_loci.df.select(\"studyId\").distinct().count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Number of loci to fine map with PICS\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "18" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pics_loci.df.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "df = pics_loci.df.withColumns(\n", + " {\n", + " \"locusSize\": f.size(\"locus\"),\n", + " \"locusLength\": f.col(\"locusEnd\") - f.col(\"locusStart\"),\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How many loci with less than 100 variants from summary statistics?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "18" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.filter(f.col(\"locusSize\") < 100).count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How many loci with more than 15,000 variants from summary statistics?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.filter(f.col(\"locusSize\") > 15_000).count()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0------------------------------\n", + " meanLocusLength | 273867.6666666667 \n", + " q1LocusLength | 200000 \n", + " medianLocusLength | 200000 \n", + " q3LocusLength | 302688 \n", + "\n", + "-RECORD 0-----------------------------\n", + " meanLocusSize | 30.055555555555557 \n", + " minLocusSize | 2 \n", + " q1LocusSize | 12 \n", + " medianLocusSize | 19 \n", + " q3LocusSize | 57 \n", + " maxLocusSize | 79 \n", + "\n" + ] + } + ], + "source": [ + "length = df.select(\n", + " f.mean(\"locusLength\").alias(\"meanLocusLength\"),\n", + " f.percentile_approx(\"locusLength\", 0.25).alias(\"q1LocusLength\"),\n", + " f.percentile_approx(\"locusLength\", 0.5).alias(\"medianLocusLength\"),\n", + " f.percentile_approx(\"locusLength\", 0.75).alias(\"q3LocusLength\"),\n", + ")\n", + "size = df.select(\n", + " f.mean(\"locusSize\").alias(\"meanLocusSize\"),\n", + " f.min(\"locusSize\").alias(\"minLocusSize\"),\n", + " f.percentile_approx(\"locusSize\", 0.25).alias(\"q1LocusSize\"),\n", + " f.percentile_approx(\"locusSize\", 0.5).alias(\"medianLocusSize\"),\n", + " f.percentile_approx(\"locusSize\", 0.75).alias(\"q3LocusSize\"),\n", + " f.max(\"locusSize\").alias(\"maxLocusSize\"),\n", + ")\n", + "length.show(vertical=True)\n", + "size.show(vertical=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "panda_df = df.select(\"locusSize\", \"locusLength\").toPandas()\n", + "\n", + "plt.figure(figsize=(12, 6))\n", + "\n", + "# Histogram for locusLength\n", + "plt.subplot(1, 2, 1) # 1 row, 2 columns, 1st subplot\n", + "plt.hist(panda_df[\"locusLength\"], bins=30, alpha=0.7)\n", + "plt.xlabel(\"Locus Length\")\n", + "plt.ylabel(\"Frequency\")\n", + "plt.title(\"Histogram of Locus Length\")\n", + "\n", + "# Histogram for locusSize\n", + "plt.subplot(1, 2, 2) # 1 row, 2 columns, 2nd subplot\n", + "plt.hist(panda_df[\"locusSize\"], bins=30, alpha=0.7)\n", + "plt.xlabel(\"Locus Size\")\n", + "plt.ylabel(\"Frequency\")\n", + "plt.title(\"Histogram of Locus Size\")\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10, 6))\n", + "plt.scatter(panda_df[\"locusSize\"], panda_df[\"locusLength\"], alpha=0.5)\n", + "plt.title(\"Scatter Plot of Locus Size vs Locus Length\")\n", + "plt.xlabel(\"Locus Size\")\n", + "plt.ylabel(\"Locus Length\")\n", + "plt.grid(True)\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "gentropy-iQynFIia-py3.10", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 5b5b408e0c5ac30e361f344de52d317ef02234d2 Mon Sep 17 00:00:00 2001 From: Daniel-Considine <113430683+Daniel-Considine@users.noreply.github.com> Date: Thu, 8 Aug 2024 15:37:52 +0100 Subject: [PATCH 007/188] fix: updating config paths and fine-mapping methods (#725) * fix: updating config paths and fine-mapping methods * Update ot_locus_to_gene_train.yaml --- config/step/ot_locus_to_gene_predict.yaml | 2 +- config/step/ot_locus_to_gene_train.yaml | 2 +- src/gentropy/colocalisation.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/config/step/ot_locus_to_gene_predict.yaml b/config/step/ot_locus_to_gene_predict.yaml index a98e3cf2a..c3cb88b59 100644 --- a/config/step/ot_locus_to_gene_predict.yaml +++ b/config/step/ot_locus_to_gene_predict.yaml @@ -6,6 +6,6 @@ model_path: null predictions_path: ${datasets.l2g_predictions} feature_matrix_path: ${datasets.l2g_feature_matrix} credible_set_path: ${datasets.credible_set} -variant_gene_path: ${datasets.v2g} +variant_gene_path: ${datasets.variant_to_gene} colocalisation_path: ${datasets.colocalisation} study_index_path: ${datasets.study_index} diff --git a/config/step/ot_locus_to_gene_train.yaml b/config/step/ot_locus_to_gene_train.yaml index 25f3710c5..b59a24dae 100644 --- a/config/step/ot_locus_to_gene_train.yaml +++ b/config/step/ot_locus_to_gene_train.yaml @@ -7,7 +7,7 @@ hf_hub_repo_id: opentargets/locus_to_gene model_path: ${datasets.l2g_model} predictions_path: ${datasets.l2g_predictions} credible_set_path: ${datasets.credible_set} -variant_gene_path: ${datasets.v2g} +variant_gene_path: ${datasets.variant_to_gene} colocalisation_path: ${datasets.colocalisation} study_index_path: ${datasets.study_index} gold_standard_curation_path: ${datasets.l2g_gold_standard_curation} diff --git a/src/gentropy/colocalisation.py b/src/gentropy/colocalisation.py index 1d71ed447..6b370d426 100644 --- a/src/gentropy/colocalisation.py +++ b/src/gentropy/colocalisation.py @@ -1,4 +1,5 @@ """Step to generate colocalisation results.""" + from __future__ import annotations import inspect @@ -40,7 +41,7 @@ def __init__( credible_set = ( StudyLocus.from_parquet( session, credible_set_path, recursiveFileLookup=True - ).filter(col("finemappingMethod") == "SuSie") + ).filter(col("finemappingMethod").isin("SuSie", "SuSiE-inf")) if colocalisation_class is Coloc else StudyLocus.from_parquet( session, credible_set_path, recursiveFileLookup=True From e45f2950c0339dec0eaae31ffac920f0c20980cd Mon Sep 17 00:00:00 2001 From: Daniel-Considine <113430683+Daniel-Considine@users.noreply.github.com> Date: Thu, 15 Aug 2024 09:51:33 +0100 Subject: [PATCH 008/188] docs: macos fix for some functions (#729) * docs: macos fix for some functions * docs: formatting --- docs/development/troubleshooting.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/development/troubleshooting.md b/docs/development/troubleshooting.md index b9f7385da..a30f72be0 100644 --- a/docs/development/troubleshooting.md +++ b/docs/development/troubleshooting.md @@ -39,3 +39,13 @@ Another solution which helps is to remove Node, NodeJS, and npm from your system On Ubuntu, this can be done using `sudo apt remove node nodejs npm`, followed by `sudo apt autoremove`. But in some cases, depending on your existing installation, you may need to also manually remove some files. See [this StackOverflow answer](https://stackoverflow.com/a/41057802) for guidance. After running these commands, you are advised to open a fresh shell, and then also reinstall Pyenv and Poetry to make sure they pick up the changes (see relevant section above). + +## MacOS + +Some functions on MacOS may throw a java error: + +`python3.10/site-packages/py4j/protocol.py:326: Py4JJavaError` + +This can be resolved by adding the follow line to your `~/.zshrc`: + +`export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES` From f49a5c52c6773450b2aa736b8d57e287e171a620 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 22 Aug 2024 09:35:11 +0100 Subject: [PATCH 009/188] build(deps-dev): bump ruff from 0.5.1 to 0.6.1 (#732) * build(deps-dev): bump ruff from 0.5.1 to 0.6.1 Bumps [ruff](https://github.com/astral-sh/ruff) from 0.5.1 to 0.6.1. - [Release notes](https://github.com/astral-sh/ruff/releases) - [Changelog](https://github.com/astral-sh/ruff/blob/main/CHANGELOG.md) - [Commits](https://github.com/astral-sh/ruff/compare/0.5.1...0.6.1) --- updated-dependencies: - dependency-name: ruff dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * fix: linting issues associated with ruff 0.6 * chore: fixing imports for notebooks * chore: removing old notebook --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: David Ochoa Co-authored-by: Daniel Considine --- notebooks/FineMappingSimmuations.ipynb | 32 +- notebooks/FineMapping_AlzheimierDisease.ipynb | 51 +- notebooks/Finngen_PICS_run.ipynb | 2459 ++++++++--------- notebooks/Mapping_EFO_finngen.ipynb | 24 +- notebooks/Productionizing_LD_matrix.ipynb | 1504 ---------- notebooks/Release_QC_metrics.ipynb | 70 +- notebooks/gwas_cat_benchmark.ipynb | 273 +- notebooks/l2g_benchmark.ipynb | 603 ---- notebooks/pics_benchmark.ipynb | 1678 ++++++----- notebooks/susie_inf_benchmark.ipynb | 23 +- notebooks/ukb_ppp_benchmark.ipynb | 271 +- poetry.lock | 40 +- pyproject.toml | 2 +- 13 files changed, 2393 insertions(+), 4637 deletions(-) delete mode 100644 notebooks/Productionizing_LD_matrix.ipynb delete mode 100644 notebooks/l2g_benchmark.ipynb diff --git a/notebooks/FineMappingSimmuations.ipynb b/notebooks/FineMappingSimmuations.ipynb index d81b268e3..fcec9bbe9 100644 --- a/notebooks/FineMappingSimmuations.ipynb +++ b/notebooks/FineMappingSimmuations.ipynb @@ -82,7 +82,7 @@ "metadata": {}, "outputs": [], "source": [ - "ld_matrix = np.load('/Users/yt4/Projects/ot_data/tmp/ld_matrix.npy')\n", + "ld_matrix = np.load(\"/Users/yt4/Projects/ot_data/tmp/ld_matrix.npy\")\n", "ld_index=session.spark.read.parquet(\"/Users/yt4/Projects/ot_data/tmp/ld_index\")\n", "ld_matrix_for_sim=ld_matrix[0:500,:][:,0:500]\n", "ld_index_for_sim=ld_index.limit(500)" @@ -129,11 +129,8 @@ } ], "source": [ - "print(FineMappingSimulations.ProvideSummary(cred_sets=x1,n_causal=n_causal))\n", "x2=x1[(x1[\"pValueExponent\"]<=-6) | (x1[\"credibleSetIndex\"]==1)]\n", - "print(FineMappingSimulations.ProvideSummary(cred_sets=x2,n_causal=n_causal))\n", - "x3=x2[(x2[\"purityMinR2\"]>=0.25) | (x2[\"credibleSetIndex\"]==1)]\n", - "print(FineMappingSimulations.ProvideSummary(cred_sets=x3,n_causal=n_causal))" + "x3=x2[(x2[\"purityMinR2\"]>=0.25) | (x2[\"credibleSetIndex\"]==1)]" ] }, { @@ -177,11 +174,8 @@ } ], "source": [ - "print(FineMappingSimulations.ProvideSummary(cred_sets=x1,n_causal=n_causal))\n", "x2=x1[(x1[\"pValueExponent\"]<=-6) | (x1[\"credibleSetIndex\"]==1)]\n", - "print(FineMappingSimulations.ProvideSummary(cred_sets=x2,n_causal=n_causal))\n", - "x3=x2[(x2[\"purityMinR2\"]>=0.25) | (x2[\"credibleSetIndex\"]==1)]\n", - "print(FineMappingSimulations.ProvideSummary(cred_sets=x3,n_causal=n_causal))" + "x3=x2[(x2[\"purityMinR2\"]>=0.25) | (x2[\"credibleSetIndex\"]==1)]" ] }, { @@ -227,11 +221,8 @@ } ], "source": [ - "print(FineMappingSimulations.ProvideSummary(cred_sets=x1,n_causal=n_causal))\n", "x2=x1[(x1[\"pValueExponent\"]<=-6) | (x1[\"credibleSetIndex\"]==1)]\n", - "print(FineMappingSimulations.ProvideSummary(cred_sets=x2,n_causal=n_causal))\n", - "x3=x2[(x2[\"purityMinR2\"]>=0.25) | (x2[\"credibleSetIndex\"]==1)]\n", - "print(FineMappingSimulations.ProvideSummary(cred_sets=x3,n_causal=n_causal))" + "x3=x2[(x2[\"purityMinR2\"]>=0.25) | (x2[\"credibleSetIndex\"]==1)]" ] }, { @@ -277,11 +268,8 @@ } ], "source": [ - "print(FineMappingSimulations.ProvideSummary(cred_sets=x1,n_causal=n_causal))\n", "x2=x1[(x1[\"pValueExponent\"]<=-6) | (x1[\"credibleSetIndex\"]==1)]\n", - "print(FineMappingSimulations.ProvideSummary(cred_sets=x2,n_causal=n_causal))\n", - "x3=x2[(x2[\"purityMinR2\"]>=0.25) | (x2[\"credibleSetIndex\"]==1)]\n", - "print(FineMappingSimulations.ProvideSummary(cred_sets=x3,n_causal=n_causal))" + "x3=x2[(x2[\"purityMinR2\"]>=0.25) | (x2[\"credibleSetIndex\"]==1)]" ] }, { @@ -335,11 +323,8 @@ } ], "source": [ - "print(FineMappingSimulations.ProvideSummary(cred_sets=x1,n_causal=n_causal))\n", "x2=x1[(x1[\"pValueExponent\"]<=-6) | (x1[\"credibleSetIndex\"]==1)]\n", - "print(FineMappingSimulations.ProvideSummary(cred_sets=x2,n_causal=n_causal))\n", - "x3=x2[(x2[\"purityMinR2\"]>=0.25) | (x2[\"credibleSetIndex\"]==1)]\n", - "print(FineMappingSimulations.ProvideSummary(cred_sets=x3,n_causal=n_causal))" + "x3=x2[(x2[\"purityMinR2\"]>=0.25) | (x2[\"credibleSetIndex\"]==1)]" ] }, { @@ -386,11 +371,8 @@ } ], "source": [ - "print(FineMappingSimulations.ProvideSummary(cred_sets=x1,n_causal=n_causal))\n", "x2=x1[(x1[\"pValueExponent\"]<=-6) | (x1[\"credibleSetIndex\"]==1)]\n", - "print(FineMappingSimulations.ProvideSummary(cred_sets=x2,n_causal=n_causal))\n", - "x3=x2[(x2[\"purityMinR2\"]>=0.25) | (x2[\"credibleSetIndex\"]==1)]\n", - "print(FineMappingSimulations.ProvideSummary(cred_sets=x3,n_causal=n_causal))" + "x3=x2[(x2[\"purityMinR2\"]>=0.25) | (x2[\"credibleSetIndex\"]==1)]" ] }, { diff --git a/notebooks/FineMapping_AlzheimierDisease.ipynb b/notebooks/FineMapping_AlzheimierDisease.ipynb index 2934080f0..bdb37e354 100644 --- a/notebooks/FineMapping_AlzheimierDisease.ipynb +++ b/notebooks/FineMapping_AlzheimierDisease.ipynb @@ -121,19 +121,20 @@ ], "source": [ "import os\n", + "\n", "import hail as hl\n", - "import pyspark.sql.functions as f\n", "import pandas as pd\n", - "pd.set_option('display.max_colwidth', None)\n", - "pd.set_option('display.expand_frame_repr', False)\n", + "import pyspark.sql.functions as f\n", "\n", "from gentropy.common.session import Session\n", "from gentropy.dataset.study_index import StudyIndex\n", "from gentropy.dataset.summary_statistics import SummaryStatistics\n", - "from gentropy.dataset.study_index import StudyIndex\n", "from gentropy.method.window_based_clumping import WindowBasedClumping\n", "from gentropy.susie_finemapper import SusieFineMapperStep\n", "\n", + "pd.set_option(\"display.max_colwidth\", None)\n", + "pd.set_option(\"display.expand_frame_repr\", False)\n", + "\n", "hail_dir = os.path.dirname(hl.__file__)\n", "session = Session(hail_home=hail_dir, start_hail=True, extended_spark_conf={\"spark.driver.memory\": \"12g\",\n", " \"spark.kryoserializer.buffer.max\": \"500m\",\"spark.driver.maxResultSize\":\"3g\"})" @@ -195,10 +196,7 @@ "study_index = StudyIndex.from_parquet(session, path_si)\n", "\n", "slt=WindowBasedClumping.clump(gwas1,gwas_significance=5e-8,distance=1e6)\n", - "slt_df=slt._df\n", - "\n", - "print(\"Number of SNPs in GWAS: \",gwas1._df.count())\n", - "print(\"Number of clumps: \",slt_df.count())" + "slt_df=slt._df\n" ] }, { @@ -254,9 +252,7 @@ ] } ], - "source": [ - "print(slt_df.show())" - ] + "source": [] }, { "cell_type": "markdown", @@ -1071,7 +1067,7 @@ "source": [ "df = slt_df.withColumn(\"row_index\", f.monotonically_increasing_id())\n", "\n", - "columns = ['N_gwas', 'N_ld', 'N_overlap', 'N_outliers', 'N_imputed', 'N_final_to_fm', 'eleapsed_time']\n", + "columns = [\"N_gwas\", \"N_ld\", \"N_overlap\", \"N_outliers\", \"N_imputed\", \"N_final_to_fm\", \"eleapsed_time\"]\n", "logs = pd.DataFrame(columns=columns)\n", "\n", "for i in range(0,df.count()):\n", @@ -1095,7 +1091,6 @@ "\n", " sl=res[\"study_locus\"]\n", " #print(sl._df.withColumn(\"size\", f.size(sl._df[\"locus\"])).show())\n", - " print(\"Region: \",sl._df.collect()[0]['region'], \"; number of CSs: \",sl._df.count(), \"; log:\")\n", " #print(res[\"log\"])\n", " logs=pd.concat([logs,res[\"log\"]])" ] @@ -1146,8 +1141,7 @@ } ], "source": [ - "pd.set_option('display.max_rows', None)\n", - "print(logs)" + "pd.set_option(\"display.max_rows\", None)" ] }, { @@ -1164,8 +1158,7 @@ } ], "source": [ - "summary = logs['N_overlap'].mean()\n", - "print(summary)" + "summary = logs[\"N_overlap\"].mean()" ] }, { @@ -1318,9 +1311,7 @@ " imputed_r2_threshold=0.8,\n", " ld_score_threshold=4\n", ")\n", - "sl=res[\"study_locus\"]\n", - "print(sl._df.withColumn(\"size\", f.size(sl._df[\"locus\"])).show())\n", - "print(res[\"log\"])" + "sl=res[\"study_locus\"]" ] }, { @@ -1382,9 +1373,7 @@ " imputed_r2_threshold=0.8,\n", " ld_score_threshold=4\n", ")\n", - "sl=res[\"study_locus\"]\n", - "print(sl._df.withColumn(\"size\", f.size(sl._df[\"locus\"])).show())\n", - "print(res[\"log\"])" + "sl=res[\"study_locus\"]" ] }, { @@ -1482,9 +1471,7 @@ " imputed_r2_threshold=0.8,\n", " ld_score_threshold=4\n", ")\n", - "sl=res[\"study_locus\"]\n", - "print(sl._df.withColumn(\"size\", f.size(sl._df[\"locus\"])).show())\n", - "print(res[\"log\"])" + "sl=res[\"study_locus\"]" ] }, { @@ -1546,9 +1533,7 @@ " imputed_r2_threshold=0.8,\n", " ld_score_threshold=4\n", ")\n", - "sl=res[\"study_locus\"]\n", - "print(sl._df.withColumn(\"size\", f.size(sl._df[\"locus\"])).show())\n", - "print(res[\"log\"])" + "sl=res[\"study_locus\"]" ] }, { @@ -1610,9 +1595,7 @@ " imputed_r2_threshold=0.8,\n", " ld_score_threshold=4\n", ")\n", - "sl=res[\"study_locus\"]\n", - "print(sl._df.withColumn(\"size\", f.size(sl._df[\"locus\"])).show())\n", - "print(res[\"log\"])" + "sl=res[\"study_locus\"]" ] }, { @@ -1703,9 +1686,7 @@ " imputed_r2_threshold=0.8,\n", " ld_score_threshold=4\n", ")\n", - "sl=res[\"study_locus\"]\n", - "print(sl._df.withColumn(\"size\", f.size(sl._df[\"locus\"])).show())\n", - "print(res[\"log\"])" + "sl=res[\"study_locus\"]" ] } ], diff --git a/notebooks/Finngen_PICS_run.ipynb b/notebooks/Finngen_PICS_run.ipynb index 21f29db2f..614f07ba5 100644 --- a/notebooks/Finngen_PICS_run.ipynb +++ b/notebooks/Finngen_PICS_run.ipynb @@ -1,1293 +1,1284 @@ { - "cells": [ + "cells": [ + { + "cell_type": "markdown", + "id": "2dd3bf11", + "metadata": {}, + "source": [ + "# Running PICS finemapping on Finngen summary statistics\n", + "\n", + "1. Read summary stats.\n", + "2. Apply window based clumping.\n", + "3. LD expansion.\n", + "4. lD clumping.\n", + "5. PICS." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0deeb686", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-13T09:53:43.429135Z", + "start_time": "2023-10-13T09:53:23.566141Z" + } + }, + "outputs": [ { - "cell_type": "markdown", - "id": "2dd3bf11", - "metadata": {}, - "source": [ - "# Running PICS finemapping on Finngen summary statistics\n", - "\n", - "1. Read summary stats.\n", - "2. Apply window based clumping.\n", - "3. LD expansion.\n", - "4. lD clumping.\n", - "5. PICS." + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " Loading BokehJS ...\n", + "
\n" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "code", - "execution_count": 1, - "id": "0deeb686", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-13T09:53:43.429135Z", - "start_time": "2023-10-13T09:53:23.566141Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - " \n", - " Loading BokehJS ...\n", - "
\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\nconst JS_MIME_TYPE = 'application/javascript';\n const HTML_MIME_TYPE = 'text/html';\n const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n const CLASS_NAME = 'output_bokeh rendered_html';\n\n /**\n * Render data to the DOM node\n */\n function render(props, node) {\n const script = document.createElement(\"script\");\n node.appendChild(script);\n }\n\n /**\n * Handle when an output is cleared or removed\n */\n function handleClearOutput(event, handle) {\n const cell = handle.cell;\n\n const id = cell.output_area._bokeh_element_id;\n const server_id = cell.output_area._bokeh_server_id;\n // Clean up Bokeh references\n if (id != null && id in Bokeh.index) {\n Bokeh.index[id].model.document.clear();\n delete Bokeh.index[id];\n }\n\n if (server_id !== undefined) {\n // Clean up Bokeh references\n const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n cell.notebook.kernel.execute(cmd_clean, {\n iopub: {\n output: function(msg) {\n const id = msg.content.text.trim();\n if (id in Bokeh.index) {\n Bokeh.index[id].model.document.clear();\n delete Bokeh.index[id];\n }\n }\n }\n });\n // Destroy server and session\n const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n cell.notebook.kernel.execute(cmd_destroy);\n }\n }\n\n /**\n * Handle when a new output is added\n */\n function handleAddOutput(event, handle) {\n const output_area = handle.output_area;\n const output = handle.output;\n\n // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n return\n }\n\n const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n\n if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n // store reference to embed id on output_area\n output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n }\n if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n const bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n const script_attrs = bk_div.children[0].attributes;\n for (let i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n }\n\n function register_renderer(events, OutputArea) {\n\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n const toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[toinsert.length - 1]);\n element.append(toinsert);\n return toinsert\n }\n\n /* Handle when an output is cleared or removed */\n events.on('clear_output.CodeCell', handleClearOutput);\n events.on('delete.Cell', handleClearOutput);\n\n /* Handle when a new output is added */\n events.on('output_added.OutputArea', handleAddOutput);\n\n /**\n * Register the mime type and append_mime function with output_area\n */\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n /* Is output safe? */\n safe: true,\n /* Index of renderer in `output_area.display_order` */\n index: 0\n });\n }\n\n // register the mime type if in Jupyter Notebook environment and previously unregistered\n if (root.Jupyter !== undefined) {\n const events = require('base/js/events');\n const OutputArea = require('notebook/js/outputarea').OutputArea;\n\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n }\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n const el = document.getElementById(\"cb01f13c-6396-4b85-99db-68f8056c07dd\");\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.2.2.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\nif (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(\"cb01f13c-6396-4b85-99db-68f8056c07dd\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));", - "application/vnd.bokehjs_load.v0+json": "" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Setting default log level to \"WARN\".\n", - "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", - "23/10/13 09:53:40 INFO SparkEnv: Registering MapOutputTracker\n", - "23/10/13 09:53:40 INFO SparkEnv: Registering BlockManagerMaster\n", - "23/10/13 09:53:40 INFO SparkEnv: Registering BlockManagerMasterHeartbeat\n", - "23/10/13 09:53:40 INFO SparkEnv: Registering OutputCommitCoordinator\n", - "23/10/13 09:53:42 WARN GhfsStorageStatistics: Detected potential high latency for operation op_get_file_status. latencyMs=198; previousMaxLatencyMs=0; operationCount=1; context=gs://dataproc-temp-europe-west1-426265110888-ymkbpaze/64dcfdf8-46d3-4b5c-aad4-0a12ee0ba91a/spark-job-history\n", - "23/10/13 09:53:42 WARN GhfsStorageStatistics: Detected potential high latency for operation op_mkdirs. latencyMs=166; previousMaxLatencyMs=0; operationCount=1; context=gs://dataproc-temp-europe-west1-426265110888-ymkbpaze/64dcfdf8-46d3-4b5c-aad4-0a12ee0ba91a/spark-job-history\n" - ] - } - ], - "source": [ - "# Import:\n", - "from pyspark.sql import functions as f, types as t\n", - "\n", - "from gentropy.common.session import Session\n", - "\n", - "from gentropy.dataset.summary_statistics import SummaryStatistics\n", - "from gentropy.dataset.study_locus import StudyLocus\n", - "from gentropy.dataset.study_index import StudyIndex\n", - "from gentropy.dataset.ld_index import LDIndex\n", - "from gentropy.method.ld import LDAnnotator\n", - "\n", - "from gentropy.method.pics import PICS\n", - "\n", - "# Initialize session:\n", - "session = Session()\n", - "\n", - "# Input:\n", - "sumstats = 'gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/*'\n", - "ld_index_path = 'gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/ld_index/'\n", - "\n", - "# Parameters:\n", - "clump_window_length = 500_000 # Distance between semi-indices.\n", - "locus_window_length = 250_000 # Distance around semi-indices from where the tags are collected.\n", - "\n", - "# Output:\n", - "window_based_clumped_output = 'gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus'\n", - "ld_clumped_output = 'gs://ot-team/dsuveges/finngen/2023.10.13_ld_clumped_w_locus'\n", - "picsed_output = 'gs://ot-team/dsuveges/finngen/2023.10.06_PICSed'\n" - ] + "data": { + "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\nconst JS_MIME_TYPE = 'application/javascript';\n const HTML_MIME_TYPE = 'text/html';\n const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n const CLASS_NAME = 'output_bokeh rendered_html';\n\n /**\n * Render data to the DOM node\n */\n function render(props, node) {\n const script = document.createElement(\"script\");\n node.appendChild(script);\n }\n\n /**\n * Handle when an output is cleared or removed\n */\n function handleClearOutput(event, handle) {\n const cell = handle.cell;\n\n const id = cell.output_area._bokeh_element_id;\n const server_id = cell.output_area._bokeh_server_id;\n // Clean up Bokeh references\n if (id != null && id in Bokeh.index) {\n Bokeh.index[id].model.document.clear();\n delete Bokeh.index[id];\n }\n\n if (server_id !== undefined) {\n // Clean up Bokeh references\n const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n cell.notebook.kernel.execute(cmd_clean, {\n iopub: {\n output: function(msg) {\n const id = msg.content.text.trim();\n if (id in Bokeh.index) {\n Bokeh.index[id].model.document.clear();\n delete Bokeh.index[id];\n }\n }\n }\n });\n // Destroy server and session\n const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n cell.notebook.kernel.execute(cmd_destroy);\n }\n }\n\n /**\n * Handle when a new output is added\n */\n function handleAddOutput(event, handle) {\n const output_area = handle.output_area;\n const output = handle.output;\n\n // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n return\n }\n\n const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n\n if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n // store reference to embed id on output_area\n output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n }\n if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n const bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n const script_attrs = bk_div.children[0].attributes;\n for (let i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n }\n\n function register_renderer(events, OutputArea) {\n\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n const toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[toinsert.length - 1]);\n element.append(toinsert);\n return toinsert\n }\n\n /* Handle when an output is cleared or removed */\n events.on('clear_output.CodeCell', handleClearOutput);\n events.on('delete.Cell', handleClearOutput);\n\n /* Handle when a new output is added */\n events.on('output_added.OutputArea', handleAddOutput);\n\n /**\n * Register the mime type and append_mime function with output_area\n */\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n /* Is output safe? */\n safe: true,\n /* Index of renderer in `output_area.display_order` */\n index: 0\n });\n }\n\n // register the mime type if in Jupyter Notebook environment and previously unregistered\n if (root.Jupyter !== undefined) {\n const events = require('base/js/events');\n const OutputArea = require('notebook/js/outputarea').OutputArea;\n\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n }\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n const el = document.getElementById(\"cb01f13c-6396-4b85-99db-68f8056c07dd\");\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.2.2.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\nif (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(\"cb01f13c-6396-4b85-99db-68f8056c07dd\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));", + "application/vnd.bokehjs_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "id": "3bdbcfbf", - "metadata": {}, - "source": [ - "## 1. Read summary statistics" - ] - }, + "name": "stderr", + "output_type": "stream", + "text": [ + "Setting default log level to \"WARN\".\n", + "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", + "23/10/13 09:53:40 INFO SparkEnv: Registering MapOutputTracker\n", + "23/10/13 09:53:40 INFO SparkEnv: Registering BlockManagerMaster\n", + "23/10/13 09:53:40 INFO SparkEnv: Registering BlockManagerMasterHeartbeat\n", + "23/10/13 09:53:40 INFO SparkEnv: Registering OutputCommitCoordinator\n", + "23/10/13 09:53:42 WARN GhfsStorageStatistics: Detected potential high latency for operation op_get_file_status. latencyMs=198; previousMaxLatencyMs=0; operationCount=1; context=gs://dataproc-temp-europe-west1-426265110888-ymkbpaze/64dcfdf8-46d3-4b5c-aad4-0a12ee0ba91a/spark-job-history\n", + "23/10/13 09:53:42 WARN GhfsStorageStatistics: Detected potential high latency for operation op_mkdirs. latencyMs=166; previousMaxLatencyMs=0; operationCount=1; context=gs://dataproc-temp-europe-west1-426265110888-ymkbpaze/64dcfdf8-46d3-4b5c-aad4-0a12ee0ba91a/spark-job-history\n" + ] + } + ], + "source": [ + "# Import:\n", + "from pyspark.sql import functions as f\n", + "\n", + "from gentropy.common.session import Session\n", + "from gentropy.dataset.ld_index import LDIndex\n", + "from gentropy.dataset.study_index import StudyIndex\n", + "from gentropy.dataset.study_locus import StudyLocus\n", + "from gentropy.dataset.summary_statistics import SummaryStatistics\n", + "from gentropy.method.ld import LDAnnotator\n", + "from gentropy.method.pics import PICS\n", + "\n", + "# Initialize session:\n", + "session = Session()\n", + "\n", + "# Input:\n", + "sumstats = \"gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/*\"\n", + "ld_index_path = \"gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/ld_index/\"\n", + "\n", + "# Parameters:\n", + "clump_window_length = 500_000 # Distance between semi-indices.\n", + "locus_window_length = 250_000 # Distance around semi-indices from where the tags are collected.\n", + "\n", + "# Output:\n", + "window_based_clumped_output = \"gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus\"\n", + "ld_clumped_output = \"gs://ot-team/dsuveges/finngen/2023.10.13_ld_clumped_w_locus\"\n", + "picsed_output = \"gs://ot-team/dsuveges/finngen/2023.10.06_PICSed\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "3bdbcfbf", + "metadata": {}, + "source": [ + "## 1. Read summary statistics" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "c893433c", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-06T08:46:39.186806Z", + "start_time": "2023-10-06T08:46:04.601886Z" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 16, - "id": "c893433c", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-06T08:46:39.186806Z", - "start_time": "2023-10-06T08:46:04.601886Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "23/10/06 08:46:37 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=236; previousMaxLatencyMs=147; operationCount=276; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_AB1_ACTINOMYCOSIS/chromosome=1/part-00011-33e31f88-435f-4200-9adc-3e909d706910.c000.snappy.parquet\n", - "23/10/06 08:46:37 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=269; previousMaxLatencyMs=236; operationCount=282; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_HEIGHT_IRN/chromosome=2/part-00020-774f0990-ad3d-46ae-9648-f39c18fae314.c000.snappy.parquet\n", - "23/10/06 08:46:38 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=417; previousMaxLatencyMs=269; operationCount=288; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_HEIGHT_IRN/chromosome=2/part-00020-774f0990-ad3d-46ae-9648-f39c18fae314.c000.snappy.parquet\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+-----------+--------+--------------+--------------+-----------+-------------+-------------------------------+---------------------------+---------------------------+----------+\n", - "| studyId| variantId|position|pValueMantissa|pValueExponent| beta|standardError|effectAlleleFrequencyFromSource|betaConfidenceIntervalLower|betaConfidenceIntervalUpper|chromosome|\n", - "+--------------------+-----------+--------+--------------+--------------+-----------+-------------+-------------------------------+---------------------------+---------------------------+----------+\n", - "|FINNGEN_R9_HEIGHT...|2_10603_A_C| 10603| 8.515| -1| 0.0117813| 0.0629271| 2.30629E-4| -0.0511458| 0.0747084| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_10610_G_A| 10610| 8.814| -3| 0.0187529| 0.00715985| 0.0194828| 0.01159305| 0.02591275| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_10659_G_A| 10659| 9.292| -1|-0.00289323| 0.0325443| 8.5694E-4| -0.03543752999999...| 0.029651069999999998| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_10756_T_C| 10756| 2.368| -1| -0.180083| 0.15223| 5.62468E-5| -0.33231299999999997| -0.02785299999999999| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_10797_C_T| 10797| 8.506| -1|-0.00190634| 0.01012| 0.00847354| -0.01202634| 0.008213660000000001| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_10823_G_C| 10823| 6.834| -2| -0.259102| 0.142146| 1.48813E-4| -0.401248| -0.116956| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_10847_C_T| 10847| 5.97| -1| 0.0487634| 0.0922252| 1.03478E-4| -0.04346179999999...| 0.1409886| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_11305_G_C| 11305| 6.992| -1|-0.00333638| 0.00863326| 0.013513| -0.01196964| 0.00529688| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_11311_C_G| 11311| 1.676| -1| 0.112938| 0.0818501| 1.30145E-4| 0.0310879| 0.1947881| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_11320_G_A| 11320| 2.484| -7| -0.0106449| 0.0020634| 0.302909| -0.0127083| -0.0085815| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_11486_A_G| 11486| 6.829| -3| 0.0135765| 0.00501887| 0.035261| 0.00855763| 0.01859537| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_11573_A_C| 11573| 7.428| -1| 0.00483333| 0.0147283| 0.00424829| -0.00989497| 0.01956163| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_11594_G_T| 11594| 5.291| -1|-0.00908649| 0.014438| 0.0045864| -0.02352449| 0.00535151| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_11607_T_C| 11607| 5.666| -3| 0.0139369| 0.00503765| 0.0350636| 0.008899250000000001| 0.01897455| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_11609_T_G| 11609| 8.231| -2| -0.065766| 0.0378528| 7.11595E-4| -0.10361880000000001| -0.02791320000000...| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_11620_G_C| 11620| 5.866| -1| -0.0235368| 0.0432809| 5.48483E-4| -0.0668177| 0.019744099999999997| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_11677_C_G| 11677| 3.944| -1| -0.0888112| 0.104287| 1.01977E-4| -0.1930982| 0.015475799999999998| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_11694_C_T| 11694| 8.855| -2| -0.0468924| 0.0275339| 0.00127759| -0.0744263| -0.0193585| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_11696_C_T| 11696| 2.446| -1| -0.0384871| 0.0330789| 8.17298E-4| -0.071566| -0.00540820000000...| 2|\n", - "|FINNGEN_R9_HEIGHT...|2_11834_A_G| 11834| 5.664| -3| 0.0139372| 0.00503766| 0.0350634| 0.008899540000000001| 0.01897486| 2|\n", - "+--------------------+-----------+--------+--------------+--------------+-----------+-------------+-------------------------------+---------------------------+---------------------------+----------+\n", - "only showing top 20 rows\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "finngen_sumstats = SummaryStatistics(\n", - " _df=(\n", - " session.spark.read.parquet(sumstats, recursiveFileLookup=True)\n", - " # We need to add chromosome column as this is a partition column:\n", - " .withColumn('chromosome',f.split(f.col('variantId'), '_')[0])\n", - " ),\n", - " _schema=SummaryStatistics.get_schema()\n", - ")\n", - "finngen_sumstats.df.show()\n", - "print(finngen_sumstats.df.count())" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "23/10/06 08:46:37 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=236; previousMaxLatencyMs=147; operationCount=276; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_AB1_ACTINOMYCOSIS/chromosome=1/part-00011-33e31f88-435f-4200-9adc-3e909d706910.c000.snappy.parquet\n", + "23/10/06 08:46:37 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=269; previousMaxLatencyMs=236; operationCount=282; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_HEIGHT_IRN/chromosome=2/part-00020-774f0990-ad3d-46ae-9648-f39c18fae314.c000.snappy.parquet\n", + "23/10/06 08:46:38 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=417; previousMaxLatencyMs=269; operationCount=288; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_HEIGHT_IRN/chromosome=2/part-00020-774f0990-ad3d-46ae-9648-f39c18fae314.c000.snappy.parquet\n" + ] }, { - "cell_type": "markdown", - "id": "a76b27ce", - "metadata": {}, - "source": [ - "## 2. Apply window based clumping.\n", - "\n", - "- Clumping distance: +/-500kbp\n", - "- Locus collected: +/-250kbp" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+-----------+--------+--------------+--------------+-----------+-------------+-------------------------------+---------------------------+---------------------------+----------+\n", + "| studyId| variantId|position|pValueMantissa|pValueExponent| beta|standardError|effectAlleleFrequencyFromSource|betaConfidenceIntervalLower|betaConfidenceIntervalUpper|chromosome|\n", + "+--------------------+-----------+--------+--------------+--------------+-----------+-------------+-------------------------------+---------------------------+---------------------------+----------+\n", + "|FINNGEN_R9_HEIGHT...|2_10603_A_C| 10603| 8.515| -1| 0.0117813| 0.0629271| 2.30629E-4| -0.0511458| 0.0747084| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_10610_G_A| 10610| 8.814| -3| 0.0187529| 0.00715985| 0.0194828| 0.01159305| 0.02591275| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_10659_G_A| 10659| 9.292| -1|-0.00289323| 0.0325443| 8.5694E-4| -0.03543752999999...| 0.029651069999999998| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_10756_T_C| 10756| 2.368| -1| -0.180083| 0.15223| 5.62468E-5| -0.33231299999999997| -0.02785299999999999| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_10797_C_T| 10797| 8.506| -1|-0.00190634| 0.01012| 0.00847354| -0.01202634| 0.008213660000000001| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_10823_G_C| 10823| 6.834| -2| -0.259102| 0.142146| 1.48813E-4| -0.401248| -0.116956| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_10847_C_T| 10847| 5.97| -1| 0.0487634| 0.0922252| 1.03478E-4| -0.04346179999999...| 0.1409886| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_11305_G_C| 11305| 6.992| -1|-0.00333638| 0.00863326| 0.013513| -0.01196964| 0.00529688| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_11311_C_G| 11311| 1.676| -1| 0.112938| 0.0818501| 1.30145E-4| 0.0310879| 0.1947881| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_11320_G_A| 11320| 2.484| -7| -0.0106449| 0.0020634| 0.302909| -0.0127083| -0.0085815| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_11486_A_G| 11486| 6.829| -3| 0.0135765| 0.00501887| 0.035261| 0.00855763| 0.01859537| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_11573_A_C| 11573| 7.428| -1| 0.00483333| 0.0147283| 0.00424829| -0.00989497| 0.01956163| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_11594_G_T| 11594| 5.291| -1|-0.00908649| 0.014438| 0.0045864| -0.02352449| 0.00535151| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_11607_T_C| 11607| 5.666| -3| 0.0139369| 0.00503765| 0.0350636| 0.008899250000000001| 0.01897455| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_11609_T_G| 11609| 8.231| -2| -0.065766| 0.0378528| 7.11595E-4| -0.10361880000000001| -0.02791320000000...| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_11620_G_C| 11620| 5.866| -1| -0.0235368| 0.0432809| 5.48483E-4| -0.0668177| 0.019744099999999997| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_11677_C_G| 11677| 3.944| -1| -0.0888112| 0.104287| 1.01977E-4| -0.1930982| 0.015475799999999998| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_11694_C_T| 11694| 8.855| -2| -0.0468924| 0.0275339| 0.00127759| -0.0744263| -0.0193585| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_11696_C_T| 11696| 2.446| -1| -0.0384871| 0.0330789| 8.17298E-4| -0.071566| -0.00540820000000...| 2|\n", + "|FINNGEN_R9_HEIGHT...|2_11834_A_G| 11834| 5.664| -3| 0.0139372| 0.00503766| 0.0350634| 0.008899540000000001| 0.01897486| 2|\n", + "+--------------------+-----------+--------+--------------+--------------+-----------+-------------+-------------------------------+---------------------------+---------------------------+----------+\n", + "only showing top 20 rows\n", + "\n" + ] }, { - "cell_type": "code", - "execution_count": 34, - "id": "fc0dd4f0", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-06T10:22:38.264681Z", - "start_time": "2023-10-06T10:10:29.768171Z" - }, - "scrolled": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "23/10/06 10:22:32 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=1812; previousMaxLatencyMs=253; operationCount=415; context=gs://ot-team/dsuveges/finngen/2023.10.06_window_clumped/_temporary/0/_temporary/attempt_202310061022254599137247441201519_0051_m_000011_106753/part-00011-53e48f6d-d432-40c5-9201-d4c8f03e6dee-c000.snappy.parquet\n", - " \r" - ] - } - ], - "source": [ - "# This process takes ~1h on a 32 cores:\n", - "(\n", - " SummaryStatistics(\n", - " _df=(\n", - " session.spark.read.parquet(sumstats, recursiveFileLookup=True)\n", - " .withColumn(\n", - " 'chromosome',\n", - " f.split(f.col('variantId'), '_')[0]\n", - " )\n", - " ),\n", - " _schema=SummaryStatistics.get_schema()\n", - " )\n", - " .window_based_clumping(\n", - " distance=clump_window_length,\n", - " locus_collect_distance=locus_window_length,\n", - " with_locus=True\n", - " )\n", - " .df.write.mode('overwrite')\n", - " .parquet(window_based_clumped_output)\n", - ")" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "finngen_sumstats = SummaryStatistics(\n", + " _df=(\n", + " session.spark.read.parquet(sumstats, recursiveFileLookup=True)\n", + " # We need to add chromosome column as this is a partition column:\n", + " .withColumn(\"chromosome\",f.split(f.col(\"variantId\"), \"_\")[0])\n", + " ),\n", + " _schema=SummaryStatistics.get_schema()\n", + ")\n", + "finngen_sumstats.df.show()" + ] + }, + { + "cell_type": "markdown", + "id": "a76b27ce", + "metadata": {}, + "source": [ + "## 2. Apply window based clumping.\n", + "\n", + "- Clumping distance: +/-500kbp\n", + "- Locus collected: +/-250kbp" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "fc0dd4f0", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-06T10:22:38.264681Z", + "start_time": "2023-10-06T10:10:29.768171Z" }, + "scrolled": false + }, + "outputs": [ { - "cell_type": "markdown", - "id": "a1cd0172", - "metadata": {}, - "source": [ - "## 3. LD expansion\n", - "\n", - "- For FINNGEN, study table needs to be mocked." - ] - }, + "name": "stderr", + "output_type": "stream", + "text": [ + "23/10/06 10:22:32 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=1812; previousMaxLatencyMs=253; operationCount=415; context=gs://ot-team/dsuveges/finngen/2023.10.06_window_clumped/_temporary/0/_temporary/attempt_202310061022254599137247441201519_0051_m_000011_106753/part-00011-53e48f6d-d432-40c5-9201-d4c8f03e6dee-c000.snappy.parquet\n", + " \r" + ] + } + ], + "source": [ + "# This process takes ~1h on a 32 cores:\n", + "(\n", + " SummaryStatistics(\n", + " _df=(\n", + " session.spark.read.parquet(sumstats, recursiveFileLookup=True)\n", + " .withColumn(\n", + " \"chromosome\",\n", + " f.split(f.col(\"variantId\"), \"_\")[0]\n", + " )\n", + " ),\n", + " _schema=SummaryStatistics.get_schema()\n", + " )\n", + " .window_based_clumping(\n", + " distance=clump_window_length,\n", + " locus_collect_distance=locus_window_length,\n", + " with_locus=True\n", + " )\n", + " .df.write.mode(\"overwrite\")\n", + " .parquet(window_based_clumped_output)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a1cd0172", + "metadata": {}, + "source": [ + "## 3. LD expansion\n", + "\n", + "- For FINNGEN, study table needs to be mocked." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1305f6c3", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-06T08:13:10.050554Z", + "start_time": "2023-10-06T08:13:05.901762Z" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 8, - "id": "1305f6c3", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-06T08:13:10.050554Z", - "start_time": "2023-10-06T08:13:05.901762Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 5:===============================================> (15 + 3) / 18]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+---------------------+---------+---------+---------------+\n", - "| studyId|ldPopulationStructure|projectId|studyType|traitFromSource|\n", - "+--------------------+---------------------+---------+---------+---------------+\n", - "|FINNGEN_R9_K11_EN...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_P16_IN...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "| FINNGEN_R9_C_STROKE| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_G6_HER...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_RHEUMA...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_N14_EN...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "| FINNGEN_R9_PAIN| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_D3_COA...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_F5_UNSORG| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_HEIGHT...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_M13_SY...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_APPEND...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_ALLERG...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_M13_FI...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_E4_DM2...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_AUTOIM...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_CD2_BE...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_F5_DIS...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_E4_DMN...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_E4_FH_IHD| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "+--------------------+---------------------+---------+---------+---------------+\n", - "only showing top 20 rows\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "# Generating a \"fake\" study index, just for providing ld_population structure for each finngen study:\n", - "studies_df = (\n", - " session.spark.read.parquet('gs://ot-team/dsuveges/finngen_semi_indices_250kbp')\n", - " # Generating a list of study identifiers:\n", - " .select('studyId')\n", - " .distinct()\n", - " # Adding fabricated values required to parse as gwas catalog study:\n", - " .select(\n", - " 'studyId',\n", - " StudyIndex.aggregate_and_map_ancestries(\n", - " f.array(\n", - " f.struct(\n", - " f.lit('Finnish').alias('ancestry'),\n", - " f.lit(100).cast('long').alias('sampleSize')\n", - " )\n", - " )\n", - " ).alias('ldPopulationStructure'),\n", - " f.lit('FINNGEN').alias('projectId'),\n", - " f.lit('gwas').alias('studyType'),\n", - " f.lit('cicaful').alias('traitFromSource')\n", - " )\n", - ")\n", - "\n", - "study_index = (\n", - " StudyIndex(\n", - " _df=studies_df,\n", - " _schema=StudyIndex.get_schema()\n", - " )\n", - ")\n", - "\n", - "study_index.df.show()" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 5:===============================================> (15 + 3) / 18]\r" + ] }, { - "cell_type": "code", - "execution_count": 19, - "id": "cb2d5030", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-06T09:09:22.063715Z", - "start_time": "2023-10-06T09:09:21.359206Z" - } - }, - "outputs": [], - "source": [ - "# Loading ld index:\n", - "ld_index = LDIndex.from_parquet(session, ld_index_path)\n", - "\n" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+---------------------+---------+---------+---------------+\n", + "| studyId|ldPopulationStructure|projectId|studyType|traitFromSource|\n", + "+--------------------+---------------------+---------+---------+---------------+\n", + "|FINNGEN_R9_K11_EN...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_P16_IN...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "| FINNGEN_R9_C_STROKE| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_G6_HER...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_RHEUMA...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_N14_EN...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "| FINNGEN_R9_PAIN| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_D3_COA...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_F5_UNSORG| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_HEIGHT...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_M13_SY...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_APPEND...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_ALLERG...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_M13_FI...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_E4_DM2...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_AUTOIM...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_CD2_BE...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_F5_DIS...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_E4_DMN...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_E4_FH_IHD| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "+--------------------+---------------------+---------+---------+---------------+\n", + "only showing top 20 rows\n", + "\n" + ] }, { - "cell_type": "markdown", - "id": "3d13ae41", - "metadata": {}, - "source": [ - "## 4. LD based clumping.\n", - "\n", - "- Persist resulting dataset.\n", - "- Save dataset." - ] + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "# Generating a \"fake\" study index, just for providing ld_population structure for each finngen study:\n", + "studies_df = (\n", + " session.spark.read.parquet(\"gs://ot-team/dsuveges/finngen_semi_indices_250kbp\")\n", + " # Generating a list of study identifiers:\n", + " .select(\"studyId\")\n", + " .distinct()\n", + " # Adding fabricated values required to parse as gwas catalog study:\n", + " .select(\n", + " \"studyId\",\n", + " StudyIndex.aggregate_and_map_ancestries(\n", + " f.array(\n", + " f.struct(\n", + " f.lit(\"Finnish\").alias(\"ancestry\"),\n", + " f.lit(100).cast(\"long\").alias(\"sampleSize\")\n", + " )\n", + " )\n", + " ).alias(\"ldPopulationStructure\"),\n", + " f.lit(\"FINNGEN\").alias(\"projectId\"),\n", + " f.lit(\"gwas\").alias(\"studyType\"),\n", + " f.lit(\"cicaful\").alias(\"traitFromSource\")\n", + " )\n", + ")\n", + "\n", + "study_index = (\n", + " StudyIndex(\n", + " _df=studies_df,\n", + " _schema=StudyIndex.get_schema()\n", + " )\n", + ")\n", + "\n", + "study_index.df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "cb2d5030", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-06T09:09:22.063715Z", + "start_time": "2023-10-06T09:09:21.359206Z" + } + }, + "outputs": [], + "source": [ + "# Loading ld index:\n", + "ld_index = LDIndex.from_parquet(session, ld_index_path)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "3d13ae41", + "metadata": {}, + "source": [ + "## 4. LD based clumping.\n", + "\n", + "- Persist resulting dataset.\n", + "- Save dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "20449e76", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-06T09:42:11.040193Z", + "start_time": "2023-10-06T09:09:52.968139Z" }, + "scrolled": false + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 20, - "id": "20449e76", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-06T09:42:11.040193Z", - "start_time": "2023-10-06T09:09:52.968139Z" - }, - "scrolled": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "23/10/06 09:11:42 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=1312; previousMaxLatencyMs=1306; operationCount=59583672; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_RX_N05C/chromosome=1/part-00012-a13aba14-79f4-45c0-9af0-937d599fa3f0.c000.snappy.parquet\n", - "23/10/06 09:11:43 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=1316; previousMaxLatencyMs=1312; operationCount=59631953; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_RX_CODEINE_TRAMADOL/chromosome=1/part-00012-36e099c5-3f60-4091-bf73-6fa15d48af6c.c000.snappy.parquet\n", - "23/10/06 09:16:19 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=1489; previousMaxLatencyMs=1316; operationCount=76331604; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/ld_index/chromosome=7/part-00010-ff42773a-494c-46d2-bc22-322062b5e715.c000.snappy.parquet\n", - "23/10/06 09:16:21 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=1735; previousMaxLatencyMs=1489; operationCount=76482674; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/ld_index/chromosome=7/part-00010-ff42773a-494c-46d2-bc22-322062b5e715.c000.snappy.parquet\n", - "23/10/06 09:16:23 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_operations. latencyMs=460; previousMaxLatencyMs=337; operationCount=17471; context=gs://dataproc-temp-europe-west1-426265110888-ymkbpaze/64dcfdf8-46d3-4b5c-aad4-0a12ee0ba91a/spark-job-history/local-1696579691428.inprogress\n", - "23/10/06 09:27:35 WARN GhfsStorageStatistics: Detected potential high latency for operation op_open. latencyMs=1041; previousMaxLatencyMs=1034; operationCount=215062; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_M13_DORSOPATHYNAS/chromosome=19/part-00009-09e75b10-c64d-4b79-9c84-6b0ca039c439.c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=126; previousMaxLatencyMs=93; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909552056325521918358154_0027_m_000008_57347/part-00008-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=134; previousMaxLatencyMs=126; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909555844252084106444775_0027_m_000028_57367/part-00028-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=134; previousMaxLatencyMs=126; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909556216099889261123757_0027_m_000015_57354/part-00015-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=137; previousMaxLatencyMs=134; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909553770650668910223592_0027_m_000020_57359/part-00020-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=134; previousMaxLatencyMs=126; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909552371970894888794726_0027_m_000025_57364/part-00025-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=136; previousMaxLatencyMs=134; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955320032532820679068_0027_m_000009_57348/part-00009-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=139; previousMaxLatencyMs=136; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554704118087127617221_0027_m_000031_57370/part-00031-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=139; previousMaxLatencyMs=136; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955146935648450954616_0027_m_000021_57360/part-00021-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=140; previousMaxLatencyMs=139; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909557457999139377546046_0027_m_000026_57365/part-00026-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=141; previousMaxLatencyMs=140; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909557648490159907156305_0027_m_000017_57356/part-00017-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=144; previousMaxLatencyMs=141; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909556136722862237844168_0027_m_000013_57352/part-00013-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=143; previousMaxLatencyMs=141; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955993438161489409499_0027_m_000016_57355/part-00016-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=144; previousMaxLatencyMs=141; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909552708616931813844767_0027_m_000006_57345/part-00006-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=145; previousMaxLatencyMs=144; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909555919710644676214353_0027_m_000004_57343/part-00004-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=145; previousMaxLatencyMs=144; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551292246187607294419_0027_m_000002_57341/part-00002-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=146; previousMaxLatencyMs=145; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551853556311935071964_0027_m_000030_57369/part-00030-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=147; previousMaxLatencyMs=146; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909555640079838863826112_0027_m_000022_57361/part-00022-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=147; previousMaxLatencyMs=146; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955962380753726438814_0027_m_000000_57339/part-00000-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=149; previousMaxLatencyMs=147; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551603246466691698658_0027_m_000001_57340/part-00001-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=150; previousMaxLatencyMs=149; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554079176040000118228_0027_m_000027_57366/part-00027-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=154; previousMaxLatencyMs=150; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909553545408253172771512_0027_m_000005_57344/part-00005-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=156; previousMaxLatencyMs=154; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909556087040172921209608_0027_m_000018_57357/part-00018-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=160; previousMaxLatencyMs=156; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955995307818437095879_0027_m_000024_57363/part-00024-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=161; previousMaxLatencyMs=160; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909552743075067405201713_0027_m_000014_57353/part-00014-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=106; previousMaxLatencyMs=95; operationCount=30; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551603246466691698658_0027_m_000001_57340/part-00001-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=138; previousMaxLatencyMs=106; operationCount=31; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909552371970894888794726_0027_m_000025_57364/part-00025-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=165; previousMaxLatencyMs=0; operationCount=30; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955320032532820679068_0027_m_000009_57348/part-00009-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00009-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=183; previousMaxLatencyMs=165; operationCount=30; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955962380753726438814_0027_m_000000_57339/part-00000-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00000-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=186; previousMaxLatencyMs=183; operationCount=30; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909552708616931813844767_0027_m_000006_57345/part-00006-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00006-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=191; previousMaxLatencyMs=186; operationCount=30; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909555919710644676214353_0027_m_000004_57343/part-00004-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00004-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=227; previousMaxLatencyMs=186; operationCount=30; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554079176040000118228_0027_m_000027_57366/part-00027-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00027-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=233; previousMaxLatencyMs=227; operationCount=30; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909556216099889261123757_0027_m_000015_57354/part-00015-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00015-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=239; previousMaxLatencyMs=233; operationCount=31; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554704118087127617221_0027_m_000031_57370/part-00031-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00031-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=284; previousMaxLatencyMs=239; operationCount=31; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551853556311935071964_0027_m_000030_57369/part-00030-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00030-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=128; previousMaxLatencyMs=0; operationCount=28; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909555919710644676214353_0027_m_000004_57343\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=318; previousMaxLatencyMs=284; operationCount=31; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955146935648450954616_0027_m_000021_57360/part-00021-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00021-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=150; previousMaxLatencyMs=128; operationCount=29; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909555920613268130744433_0027_m_000010_57349\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=190; previousMaxLatencyMs=150; operationCount=29; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955320032532820679068_0027_m_000009_57348\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=226; previousMaxLatencyMs=190; operationCount=30; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554079176040000118228_0027_m_000027_57366\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=172; previousMaxLatencyMs=161; operationCount=53; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909557479860471944928338_0027_m_000035_57374/part-00035-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=146; previousMaxLatencyMs=138; operationCount=53; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909557479860471944928338_0027_m_000035_57374/part-00035-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:58 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=150; previousMaxLatencyMs=146; operationCount=60; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551578112291921242039_0027_m_000054_57393/part-00054-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:58 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=1259; previousMaxLatencyMs=226; operationCount=53; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551853556311935071964_0027_m_000030_57369\n", - "23/10/06 09:41:58 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=1497; previousMaxLatencyMs=1259; operationCount=56; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909557355876953011052025_0027_m_000019_57358\n", - "23/10/06 09:41:58 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=711; previousMaxLatencyMs=318; operationCount=75; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909557693125728422191174_0027_m_000050_57389/part-00050-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00050-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", - "23/10/06 09:41:59 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=157; previousMaxLatencyMs=150; operationCount=93; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551476648569527086024_0027_m_000080_57419/part-00080-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:41:59 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=752; previousMaxLatencyMs=711; operationCount=91; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554631887941475021153_0027_m_000057_57396/part-00057-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00057-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", - "23/10/06 09:41:59 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=182; previousMaxLatencyMs=157; operationCount=96; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909556932256815778386100_0027_m_000093_57432/part-00093-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:42:00 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=201; previousMaxLatencyMs=182; operationCount=128; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554714655772639032866_0027_m_000122_57461/part-00122-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:42:01 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=253; previousMaxLatencyMs=201; operationCount=187; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909555299227751328654817_0027_m_000175_57513/part-00175-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:42:02 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=1159; previousMaxLatencyMs=752; operationCount=198; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909556191692784728530413_0027_m_000172_57510/part-00172-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00172-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", - "23/10/06 09:42:02 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=1051; previousMaxLatencyMs=172; operationCount=200; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955452778920582844510_0027_m_000198_57536/part-00198-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", - "23/10/06 09:42:08 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=11057; previousMaxLatencyMs=1497; operationCount=199; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909557457999139377546046_0027_m_000026_57365\n", - "23/10/06 09:42:08 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=11233; previousMaxLatencyMs=11057; operationCount=199; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551292246187607294419_0027_m_000002_57341\n", - "23/10/06 09:42:08 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=11402; previousMaxLatencyMs=11233; operationCount=199; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909552371970894888794726_0027_m_000025_57364\n", - "23/10/06 09:42:08 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=11583; previousMaxLatencyMs=11402; operationCount=199; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909556136722862237844168_0027_m_000013_57352\n", - "23/10/06 09:42:09 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=12651; previousMaxLatencyMs=11583; operationCount=199; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909553722108484298157195_0027_m_000023_57362\n", - "23/10/06 09:42:10 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=13120; previousMaxLatencyMs=12651; operationCount=199; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554704118087127617221_0027_m_000031_57370\n", - "23/10/06 09:42:10 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=13440; previousMaxLatencyMs=13120; operationCount=199; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909553545408253172771512_0027_m_000005_57344\n", - " \r" - ] - } - ], - "source": [ - "(\n", - " # To annotate study/locus, study level info and ld panel is needed:\n", - " LDAnnotator.ld_annotate(\n", - " StudyLocus.from_parquet(session, window_based_clumped_output),\n", - " study_index, \n", - " ld_index\n", - " )\n", - " # Clumping linked study-loci together:\n", - " .clump()\n", - " .df.write.mode('overwrite').parquet(ld_clumped_output)\n", - ")\n", - "\n" - ] - }, + "name": "stderr", + "output_type": "stream", + "text": [ + "23/10/06 09:11:42 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=1312; previousMaxLatencyMs=1306; operationCount=59583672; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_RX_N05C/chromosome=1/part-00012-a13aba14-79f4-45c0-9af0-937d599fa3f0.c000.snappy.parquet\n", + "23/10/06 09:11:43 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=1316; previousMaxLatencyMs=1312; operationCount=59631953; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_RX_CODEINE_TRAMADOL/chromosome=1/part-00012-36e099c5-3f60-4091-bf73-6fa15d48af6c.c000.snappy.parquet\n", + "23/10/06 09:16:19 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=1489; previousMaxLatencyMs=1316; operationCount=76331604; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/ld_index/chromosome=7/part-00010-ff42773a-494c-46d2-bc22-322062b5e715.c000.snappy.parquet\n", + "23/10/06 09:16:21 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=1735; previousMaxLatencyMs=1489; operationCount=76482674; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/ld_index/chromosome=7/part-00010-ff42773a-494c-46d2-bc22-322062b5e715.c000.snappy.parquet\n", + "23/10/06 09:16:23 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_operations. latencyMs=460; previousMaxLatencyMs=337; operationCount=17471; context=gs://dataproc-temp-europe-west1-426265110888-ymkbpaze/64dcfdf8-46d3-4b5c-aad4-0a12ee0ba91a/spark-job-history/local-1696579691428.inprogress\n", + "23/10/06 09:27:35 WARN GhfsStorageStatistics: Detected potential high latency for operation op_open. latencyMs=1041; previousMaxLatencyMs=1034; operationCount=215062; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_M13_DORSOPATHYNAS/chromosome=19/part-00009-09e75b10-c64d-4b79-9c84-6b0ca039c439.c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=126; previousMaxLatencyMs=93; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909552056325521918358154_0027_m_000008_57347/part-00008-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=134; previousMaxLatencyMs=126; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909555844252084106444775_0027_m_000028_57367/part-00028-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=134; previousMaxLatencyMs=126; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909556216099889261123757_0027_m_000015_57354/part-00015-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=137; previousMaxLatencyMs=134; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909553770650668910223592_0027_m_000020_57359/part-00020-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=134; previousMaxLatencyMs=126; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909552371970894888794726_0027_m_000025_57364/part-00025-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=136; previousMaxLatencyMs=134; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955320032532820679068_0027_m_000009_57348/part-00009-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=139; previousMaxLatencyMs=136; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554704118087127617221_0027_m_000031_57370/part-00031-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=139; previousMaxLatencyMs=136; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955146935648450954616_0027_m_000021_57360/part-00021-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=140; previousMaxLatencyMs=139; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909557457999139377546046_0027_m_000026_57365/part-00026-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=141; previousMaxLatencyMs=140; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909557648490159907156305_0027_m_000017_57356/part-00017-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=144; previousMaxLatencyMs=141; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909556136722862237844168_0027_m_000013_57352/part-00013-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=143; previousMaxLatencyMs=141; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955993438161489409499_0027_m_000016_57355/part-00016-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=144; previousMaxLatencyMs=141; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909552708616931813844767_0027_m_000006_57345/part-00006-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=145; previousMaxLatencyMs=144; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909555919710644676214353_0027_m_000004_57343/part-00004-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=145; previousMaxLatencyMs=144; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551292246187607294419_0027_m_000002_57341/part-00002-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=146; previousMaxLatencyMs=145; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551853556311935071964_0027_m_000030_57369/part-00030-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=147; previousMaxLatencyMs=146; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909555640079838863826112_0027_m_000022_57361/part-00022-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=147; previousMaxLatencyMs=146; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955962380753726438814_0027_m_000000_57339/part-00000-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=149; previousMaxLatencyMs=147; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551603246466691698658_0027_m_000001_57340/part-00001-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=150; previousMaxLatencyMs=149; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554079176040000118228_0027_m_000027_57366/part-00027-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=154; previousMaxLatencyMs=150; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909553545408253172771512_0027_m_000005_57344/part-00005-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=156; previousMaxLatencyMs=154; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909556087040172921209608_0027_m_000018_57357/part-00018-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=160; previousMaxLatencyMs=156; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955995307818437095879_0027_m_000024_57363/part-00024-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=161; previousMaxLatencyMs=160; operationCount=32; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909552743075067405201713_0027_m_000014_57353/part-00014-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:56 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=106; previousMaxLatencyMs=95; operationCount=30; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551603246466691698658_0027_m_000001_57340/part-00001-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=138; previousMaxLatencyMs=106; operationCount=31; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909552371970894888794726_0027_m_000025_57364/part-00025-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=165; previousMaxLatencyMs=0; operationCount=30; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955320032532820679068_0027_m_000009_57348/part-00009-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00009-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=183; previousMaxLatencyMs=165; operationCount=30; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955962380753726438814_0027_m_000000_57339/part-00000-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00000-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=186; previousMaxLatencyMs=183; operationCount=30; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909552708616931813844767_0027_m_000006_57345/part-00006-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00006-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=191; previousMaxLatencyMs=186; operationCount=30; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909555919710644676214353_0027_m_000004_57343/part-00004-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00004-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=227; previousMaxLatencyMs=186; operationCount=30; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554079176040000118228_0027_m_000027_57366/part-00027-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00027-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=233; previousMaxLatencyMs=227; operationCount=30; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909556216099889261123757_0027_m_000015_57354/part-00015-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00015-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=239; previousMaxLatencyMs=233; operationCount=31; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554704118087127617221_0027_m_000031_57370/part-00031-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00031-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=284; previousMaxLatencyMs=239; operationCount=31; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551853556311935071964_0027_m_000030_57369/part-00030-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00030-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=128; previousMaxLatencyMs=0; operationCount=28; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909555919710644676214353_0027_m_000004_57343\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=318; previousMaxLatencyMs=284; operationCount=31; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955146935648450954616_0027_m_000021_57360/part-00021-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00021-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=150; previousMaxLatencyMs=128; operationCount=29; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909555920613268130744433_0027_m_000010_57349\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=190; previousMaxLatencyMs=150; operationCount=29; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955320032532820679068_0027_m_000009_57348\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=226; previousMaxLatencyMs=190; operationCount=30; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554079176040000118228_0027_m_000027_57366\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=172; previousMaxLatencyMs=161; operationCount=53; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909557479860471944928338_0027_m_000035_57374/part-00035-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:57 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=146; previousMaxLatencyMs=138; operationCount=53; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909557479860471944928338_0027_m_000035_57374/part-00035-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:58 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=150; previousMaxLatencyMs=146; operationCount=60; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551578112291921242039_0027_m_000054_57393/part-00054-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:58 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=1259; previousMaxLatencyMs=226; operationCount=53; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551853556311935071964_0027_m_000030_57369\n", + "23/10/06 09:41:58 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=1497; previousMaxLatencyMs=1259; operationCount=56; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909557355876953011052025_0027_m_000019_57358\n", + "23/10/06 09:41:58 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=711; previousMaxLatencyMs=318; operationCount=75; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909557693125728422191174_0027_m_000050_57389/part-00050-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00050-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", + "23/10/06 09:41:59 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=157; previousMaxLatencyMs=150; operationCount=93; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551476648569527086024_0027_m_000080_57419/part-00080-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:41:59 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=752; previousMaxLatencyMs=711; operationCount=91; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554631887941475021153_0027_m_000057_57396/part-00057-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00057-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", + "23/10/06 09:41:59 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=182; previousMaxLatencyMs=157; operationCount=96; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909556932256815778386100_0027_m_000093_57432/part-00093-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:42:00 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=201; previousMaxLatencyMs=182; operationCount=128; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554714655772639032866_0027_m_000122_57461/part-00122-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:42:01 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=253; previousMaxLatencyMs=201; operationCount=187; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909555299227751328654817_0027_m_000175_57513/part-00175-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:42:02 WARN GhfsStorageStatistics: Detected potential high latency for operation op_rename. latencyMs=1159; previousMaxLatencyMs=752; operationCount=198; context=rename(gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909556191692784728530413_0027_m_000172_57510/part-00172-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet -> gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/part-00172-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet)\n", + "23/10/06 09:42:02 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=1051; previousMaxLatencyMs=172; operationCount=200; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_20231006090955452778920582844510_0027_m_000198_57536/part-00198-a0adc7ff-71a9-4671-ad1e-0093f9d7d260-c000.snappy.parquet\n", + "23/10/06 09:42:08 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=11057; previousMaxLatencyMs=1497; operationCount=199; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909557457999139377546046_0027_m_000026_57365\n", + "23/10/06 09:42:08 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=11233; previousMaxLatencyMs=11057; operationCount=199; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909551292246187607294419_0027_m_000002_57341\n", + "23/10/06 09:42:08 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=11402; previousMaxLatencyMs=11233; operationCount=199; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909552371970894888794726_0027_m_000025_57364\n", + "23/10/06 09:42:08 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=11583; previousMaxLatencyMs=11402; operationCount=199; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909556136722862237844168_0027_m_000013_57352\n", + "23/10/06 09:42:09 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=12651; previousMaxLatencyMs=11583; operationCount=199; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909553722108484298157195_0027_m_000023_57362\n", + "23/10/06 09:42:10 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=13120; previousMaxLatencyMs=12651; operationCount=199; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909554704118087127617221_0027_m_000031_57370\n", + "23/10/06 09:42:10 WARN GhfsStorageStatistics: Detected potential high latency for operation op_delete. latencyMs=13440; previousMaxLatencyMs=13120; operationCount=199; context=gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped/_temporary/0/_temporary/attempt_202310060909553545408253172771512_0027_m_000005_57344\n", + " \r" + ] + } + ], + "source": [ + "(\n", + " # To annotate study/locus, study level info and ld panel is needed:\n", + " LDAnnotator.ld_annotate(\n", + " StudyLocus.from_parquet(session, window_based_clumped_output),\n", + " study_index,\n", + " ld_index\n", + " )\n", + " # Clumping linked study-loci together:\n", + " .clump()\n", + " .df.write.mode(\"overwrite\").parquet(ld_clumped_output)\n", + ")\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "3ec08141", + "metadata": {}, + "source": [ + "## 5. PICS - finemapping." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "aab9d72e", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-06T09:50:46.179833Z", + "start_time": "2023-10-06T09:49:25.408253Z" + } + }, + "outputs": [ { - "cell_type": "markdown", - "id": "3ec08141", - "metadata": {}, - "source": [ - "## 5. PICS - finemapping." - ] - }, + "name": "stderr", + "output_type": "stream", + "text": [ + "23/10/06 09:50:16 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=1081; previousMaxLatencyMs=1051; operationCount=396; context=gs://ot-team/dsuveges/finngen/2023.10.06_PICSed/_temporary/0/_temporary/attempt_202310060949259143728196112407547_0033_m_000193_57732/part-00193-9d4ae5b4-a18c-4406-bd26-9387e62477de-c000.snappy.parquet\n", + " \r" + ] + } + ], + "source": [ + "(\n", + " # The previously generated LD clumped dataset is read as StudyLocus:\n", + " PICS.finemap(StudyLocus.from_parquet(session, ld_clumped_output))\n", + " .annotate_credible_sets()\n", + " .df.write.mode(\"overwrite\").parquet(picsed_output)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "0bc2cb6e", + "metadata": {}, + "source": [ + "## QC results - Sumstats" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "7cbeab8b", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-06T10:40:53.295506Z", + "start_time": "2023-10-06T10:26:04.284592Z" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 21, - "id": "aab9d72e", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-06T09:50:46.179833Z", - "start_time": "2023-10-06T09:49:25.408253Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "23/10/06 09:50:16 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=1081; previousMaxLatencyMs=1051; operationCount=396; context=gs://ot-team/dsuveges/finngen/2023.10.06_PICSed/_temporary/0/_temporary/attempt_202310060949259143728196112407547_0033_m_000193_57732/part-00193-9d4ae5b4-a18c-4406-bd26-9387e62477de-c000.snappy.parquet\n", - " \r" - ] - } - ], - "source": [ - "(\n", - " # The previously generated LD clumped dataset is read as StudyLocus:\n", - " PICS.finemap(StudyLocus.from_parquet(session, ld_clumped_output))\n", - " .annotate_credible_sets()\n", - " .df.write.mode('overwrite').parquet(picsed_output)\n", - ")" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "23/10/06 10:26:41 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_operations. latencyMs=572; previousMaxLatencyMs=460; operationCount=925353; context=gs://dataproc-temp-europe-west1-426265110888-ymkbpaze/64dcfdf8-46d3-4b5c-aad4-0a12ee0ba91a/spark-job-history/local-1696579691428.inprogress\n", + "[Stage 57:===========================================> (3 + 1) / 4]\r" + ] }, { - "cell_type": "markdown", - "id": "0bc2cb6e", - "metadata": {}, - "source": [ - "## QC results - Sumstats" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "In the Finngen dataset, there are 45,820,490,260 associations from 2,272 studies.\n" + ] }, { - "cell_type": "code", - "execution_count": 35, - "id": "7cbeab8b", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-06T10:40:53.295506Z", - "start_time": "2023-10-06T10:26:04.284592Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "23/10/06 10:26:41 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_operations. latencyMs=572; previousMaxLatencyMs=460; operationCount=925353; context=gs://dataproc-temp-europe-west1-426265110888-ymkbpaze/64dcfdf8-46d3-4b5c-aad4-0a12ee0ba91a/spark-job-history/local-1696579691428.inprogress\n", - "[Stage 57:===========================================> (3 + 1) / 4]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "In the Finngen dataset, there are 45,820,490,260 associations from 2,272 studies.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "assoc_count = finngen_sumstats.df.count()\n", - "study_count = finngen_sumstats.df.select('studyId').distinct().count()\n", - "\n", - "print(f'In the Finngen dataset, there are {assoc_count:,} associations from {study_count:,} studies.')" - ] - }, + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "assoc_count = finngen_sumstats.df.count()\n", + "study_count = finngen_sumstats.df.select(\"studyId\").distinct().count()\n" + ] + }, + { + "cell_type": "markdown", + "id": "19f94399", + "metadata": {}, + "source": [ + "## QC results - Window based clumping" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "5ff794d4", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-06T10:41:39.724996Z", + "start_time": "2023-10-06T10:41:38.322389Z" + } + }, + "outputs": [ { - "cell_type": "markdown", - "id": "19f94399", - "metadata": {}, - "source": [ - "## QC results - Window based clumping" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "In the window based cluped dataset, there are 19,005 semi-indices, from 1,387 studies.\n" + ] + } + ], + "source": [ + "windowed_count = session.spark.read.parquet(window_based_clumped_output).count()\n", + "windowed_studies = session.spark.read.parquet(window_based_clumped_output).select(\"studyId\").distinct().count()\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "d88b0d87", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-06T10:08:22.129308Z", + "start_time": "2023-10-06T10:08:18.802493Z" + } + }, + "source": [ + "## QC results - LD clumping" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "3cd88471", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-06T10:43:29.370733Z", + "start_time": "2023-10-06T10:43:26.646342Z" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 37, - "id": "5ff794d4", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-06T10:41:39.724996Z", - "start_time": "2023-10-06T10:41:38.322389Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "In the window based cluped dataset, there are 19,005 semi-indices, from 1,387 studies.\n" - ] - } - ], - "source": [ - "windowed_count = session.spark.read.parquet(window_based_clumped_output).count()\n", - "windowed_studies = session.spark.read.parquet(window_based_clumped_output).select('studyId').distinct().count()\n", - "\n", - "print(f'In the window based cluped dataset, there are {windowed_count:,} semi-indices, from {windowed_studies:,} studies.')\n" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 87:======================================================> (28 + 1) / 29]\r" + ] }, { - "cell_type": "markdown", - "id": "d88b0d87", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-06T10:08:22.129308Z", - "start_time": "2023-10-06T10:08:18.802493Z" - } - }, - "source": [ - "## QC results - LD clumping" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "In the LD based cluped dataset, there are 19,005 semi-indices, from 1,387 studies.\n" + ] }, { - "cell_type": "code", - "execution_count": 38, - "id": "3cd88471", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-06T10:43:29.370733Z", - "start_time": "2023-10-06T10:43:26.646342Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 87:======================================================> (28 + 1) / 29]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "In the LD based cluped dataset, there are 19,005 semi-indices, from 1,387 studies.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "ld_clumped_df = session.spark.read.parquet(ld_clumped_output)\n", - "\n", - "windowed_count = ld_clumped_df.count()\n", - "windowed_studies = ld_clumped_df.select('studyId').distinct().count()\n", - "\n", - "print(f'In the LD based cluped dataset, there are {windowed_count:,} semi-indices, from {windowed_studies:,} studies.')\n" - ] - }, + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "ld_clumped_df = session.spark.read.parquet(ld_clumped_output)\n", + "\n", + "windowed_count = ld_clumped_df.count()\n", + "windowed_studies = ld_clumped_df.select(\"studyId\").distinct().count()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "d9d002d0", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-06T10:44:36.008520Z", + "start_time": "2023-10-06T10:44:34.483964Z" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 40, - "id": "d9d002d0", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-06T10:44:36.008520Z", - "start_time": "2023-10-06T10:44:34.483964Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 93:====================================================> (27 + 2) / 29]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------------------------------------------------+-----+\n", - "|qualityControls |count|\n", - "+--------------------------------------------------------------+-----+\n", - "|[Variant not found in LD reference] |4706 |\n", - "|[] |13714|\n", - "|[Explained by a more significant variant in high LD (clumped)]|585 |\n", - "+--------------------------------------------------------------+-----+\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "ld_clumped_df.groupBy('qualityControls').count().show(truncate=False)" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 93:====================================================> (27 + 2) / 29]\r" + ] }, { - "cell_type": "markdown", - "id": "7a3be915", - "metadata": {}, - "source": [ - "## QC results - PICS" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------------------------------------------------+-----+\n", + "|qualityControls |count|\n", + "+--------------------------------------------------------------+-----+\n", + "|[Variant not found in LD reference] |4706 |\n", + "|[] |13714|\n", + "|[Explained by a more significant variant in high LD (clumped)]|585 |\n", + "+--------------------------------------------------------------+-----+\n", + "\n" + ] }, { - "cell_type": "code", - "execution_count": 43, - "id": "b1ddd53c", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-06T10:59:16.339543Z", - "start_time": "2023-10-06T10:59:15.930105Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-RECORD 0-----------------------------------------------\n", - " variantId | 20_35437976_G_A \n", - " chromosome | 20 \n", - " studyId | FINNGEN_R9_HEIGHT... \n", - " position | 35437976 \n", - " pValueMantissa | 3.811 \n", - " pValueExponent | -193 \n", - " beta | -0.0551669 \n", - " standardError | 0.00186085 \n", - " effectAlleleFrequencyFromSource | 0.5599 \n", - " betaConfidenceIntervalLower | -0.05702774999999... \n", - " betaConfidenceIntervalUpper | -0.05330605 \n", - " studyLocusId | 5242723067793949472 \n", - " qualityControls | [Variant not foun... \n", - " ldSet | null \n", - " locus | null \n", - "only showing top 1 row\n", - "\n", - "root\n", - " |-- variantId: string (nullable = true)\n", - " |-- chromosome: string (nullable = true)\n", - " |-- studyId: string (nullable = true)\n", - " |-- position: integer (nullable = true)\n", - " |-- pValueMantissa: float (nullable = true)\n", - " |-- pValueExponent: integer (nullable = true)\n", - " |-- beta: double (nullable = true)\n", - " |-- standardError: double (nullable = true)\n", - " |-- effectAlleleFrequencyFromSource: float (nullable = true)\n", - " |-- betaConfidenceIntervalLower: double (nullable = true)\n", - " |-- betaConfidenceIntervalUpper: double (nullable = true)\n", - " |-- studyLocusId: long (nullable = true)\n", - " |-- qualityControls: array (nullable = true)\n", - " | |-- element: string (containsNull = true)\n", - " |-- ldSet: array (nullable = true)\n", - " | |-- element: struct (containsNull = true)\n", - " | | |-- tagVariantId: string (nullable = true)\n", - " | | |-- r2Overall: double (nullable = true)\n", - " |-- locus: array (nullable = true)\n", - " | |-- element: struct (containsNull = true)\n", - " | | |-- variantId: string (nullable = true)\n", - " | | |-- r2Overall: double (nullable = true)\n", - " | | |-- posteriorProbability: double (nullable = true)\n", - " | | |-- standardError: double (nullable = true)\n", - " | | |-- is95CredibleSet: boolean (nullable = true)\n", - " | | |-- is99CredibleSet: boolean (nullable = true)\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "23/10/06 10:59:16 WARN CacheManager: Asked to cache already cached data.\n" - ] - }, - { - "data": { - "text/plain": [ - "19005" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "picsed_df = session.spark.read.parquet(picsed_output).persist()\n", - "picsed_df.show(1, vertical=True)\n", - "picsed_df.printSchema()\n", - "picsed_df.count()\n" - ] - }, + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "ld_clumped_df.groupBy(\"qualityControls\").count().show(truncate=False)" + ] + }, + { + "cell_type": "markdown", + "id": "7a3be915", + "metadata": {}, + "source": [ + "## QC results - PICS" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "b1ddd53c", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-06T10:59:16.339543Z", + "start_time": "2023-10-06T10:59:15.930105Z" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 59, - "id": "937e1eb0", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-06T11:11:55.487431Z", - "start_time": "2023-10-06T11:11:54.898030Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 59, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "(\n", - " picsed_df\n", - " .filter(f.col('locus').isNotNull())\n", - " .select(\n", - " 'studyId',\n", - " 'variantId',\n", - " f.aggregate(\n", - " f.transform(\n", - " f.col('locus'),\n", - " lambda locus: f.when(locus.is99CredibleSet, f.lit(1.0)).otherwise( f.lit(0.0))\n", - " ),\n", - " f.lit(0.0),\n", - " lambda summa, value: summa + value\n", - " ).alias('99CredCount'),\n", - " f.aggregate(\n", - " f.transform(\n", - " f.col('locus'),\n", - " lambda locus: f.when(locus.is95CredibleSet, f.lit(1.0)).otherwise( f.lit(0.0))\n", - " ),\n", - " f.lit(0.0),\n", - " lambda summa, value: summa + value\n", - " ).alias('95CredCount'),\n", - " )\n", - "# .orderBy(f.col('99CredCount').desc())\n", - "# .show()\n", - " .filter(f.col('99CredCount') < 100)\n", - "# .count()\n", - " .toPandas()\n", - " [['99CredCount', '95CredCount']]\n", - " .plot.hist(bins=25, alpha=0.5, label='Credible set size')\n", - ")" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0-----------------------------------------------\n", + " variantId | 20_35437976_G_A \n", + " chromosome | 20 \n", + " studyId | FINNGEN_R9_HEIGHT... \n", + " position | 35437976 \n", + " pValueMantissa | 3.811 \n", + " pValueExponent | -193 \n", + " beta | -0.0551669 \n", + " standardError | 0.00186085 \n", + " effectAlleleFrequencyFromSource | 0.5599 \n", + " betaConfidenceIntervalLower | -0.05702774999999... \n", + " betaConfidenceIntervalUpper | -0.05330605 \n", + " studyLocusId | 5242723067793949472 \n", + " qualityControls | [Variant not foun... \n", + " ldSet | null \n", + " locus | null \n", + "only showing top 1 row\n", + "\n", + "root\n", + " |-- variantId: string (nullable = true)\n", + " |-- chromosome: string (nullable = true)\n", + " |-- studyId: string (nullable = true)\n", + " |-- position: integer (nullable = true)\n", + " |-- pValueMantissa: float (nullable = true)\n", + " |-- pValueExponent: integer (nullable = true)\n", + " |-- beta: double (nullable = true)\n", + " |-- standardError: double (nullable = true)\n", + " |-- effectAlleleFrequencyFromSource: float (nullable = true)\n", + " |-- betaConfidenceIntervalLower: double (nullable = true)\n", + " |-- betaConfidenceIntervalUpper: double (nullable = true)\n", + " |-- studyLocusId: long (nullable = true)\n", + " |-- qualityControls: array (nullable = true)\n", + " | |-- element: string (containsNull = true)\n", + " |-- ldSet: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- tagVariantId: string (nullable = true)\n", + " | | |-- r2Overall: double (nullable = true)\n", + " |-- locus: array (nullable = true)\n", + " | |-- element: struct (containsNull = true)\n", + " | | |-- variantId: string (nullable = true)\n", + " | | |-- r2Overall: double (nullable = true)\n", + " | | |-- posteriorProbability: double (nullable = true)\n", + " | | |-- standardError: double (nullable = true)\n", + " | | |-- is95CredibleSet: boolean (nullable = true)\n", + " | | |-- is99CredibleSet: boolean (nullable = true)\n", + "\n" + ] }, { - "cell_type": "markdown", - "id": "a8b99c39", - "metadata": {}, - "source": [ - "## Finngen clumping with locus\n", - "\n", - "- Window width: 500kbp\n", - "- Locus width: 250kbp\n", - "- LD threshold: 0.5\n", - "\n" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "23/10/06 10:59:16 WARN CacheManager: Asked to cache already cached data.\n" + ] }, { - "cell_type": "code", - "execution_count": 2, - "id": "09f14d49", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-13T09:54:35.337711Z", - "start_time": "2023-10-13T09:54:34.446422Z" - } - }, - "outputs": [], - "source": [ - "# Import:\n", - "from pyspark.sql import functions as f, types as t\n", - "\n", - "from gentropy.common.session import Session\n", - "\n", - "from gentropy.dataset.summary_statistics import SummaryStatistics\n", - "from gentropy.dataset.study_locus import StudyLocus\n", - "from gentropy.dataset.study_index import StudyIndex\n", - "from gentropy.dataset.ld_index import LDIndex\n", - "from gentropy.method.ld import LDAnnotator\n", - "\n", - "from gentropy.method.pics import PICS\n", - "\n", - "# Initialize session:\n", - "session = Session()\n", - "\n", - "# Input:\n", - "sumstats = 'gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/*'\n", - "ld_index_path = 'gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/ld_index/'\n", - "\n", - "# Output:\n", - "ld_clumped_output = 'gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped'\n", - "picsed_output = 'gs://ot-team/dsuveges/finngen/2023.10.06_PICSed'\n", - "window_based_clumped_output = 'gs://ot-team/dsuveges/finngen/2023.10.06_window_clumped'" + "data": { + "text/plain": [ + "19005" ] - }, + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "picsed_df = session.spark.read.parquet(picsed_output).persist()\n", + "picsed_df.show(1, vertical=True)\n", + "picsed_df.printSchema()\n", + "picsed_df.count()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "937e1eb0", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-06T11:11:55.487431Z", + "start_time": "2023-10-06T11:11:54.898030Z" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 16, - "id": "52861491", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-13T11:39:14.790777Z", - "start_time": "2023-10-13T10:47:20.349271Z" - }, - "code_folding": [], - "scrolled": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "23/10/13 10:47:53 WARN HintErrorLogger: Hint (strategy=broadcast) is not supported in the query: build right for right outer join.\n", - "23/10/13 10:47:53 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.\n", - "23/10/13 10:48:19 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=1248; previousMaxLatencyMs=1230; operationCount=59116581; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_OTHER_SYSTCON_FG/chromosome=2/part-00019-1d683f00-9247-401b-846c-8b2498bc68bf.c000.snappy.parquet\n", - "23/10/13 10:49:02 WARN GhfsStorageStatistics: Detected potential high latency for operation op_get_file_status. latencyMs=1039; previousMaxLatencyMs=1037; operationCount=126124; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_K11_SCISS_BITE_INCLAVO/chromosome=2/part-00018-e37f4a01-bdb4-442f-8dcd-681da303e876.c000.snappy.parquet\n", - "23/10/13 10:52:37 WARN GhfsStorageStatistics: Detected potential high latency for operation op_get_file_status. latencyMs=1917; previousMaxLatencyMs=1039; operationCount=144264; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_AUTOIMMUNE_NONTHYROID_STRICT/chromosome=5/part-00006-02537c5f-2dea-47c5-8655-b0d84827115a.c000.snappy.parquet\n", - "23/10/13 10:52:38 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=1942; previousMaxLatencyMs=1248; operationCount=94444057; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_K11_PULPITIS_1_ONLYAVO/chromosome=5/part-00006-de277609-305a-4cec-8bb1-5b37a70be48b.c000.snappy.parquet\n", - "23/10/13 11:07:41 WARN HintErrorLogger: Hint (strategy=broadcast) is not supported in the query: build right for right outer join.\n", - "23/10/13 11:12:32 WARN HintErrorLogger: Hint (strategy=broadcast) is not supported in the query: build right for right outer join.\n", - "23/10/13 11:12:48 WARN HintErrorLogger: Hint (strategy=broadcast) is not supported in the query: build right for right outer join.\n", - "23/10/13 11:21:40 WARN GoogleCloudStorageReadChannel: Failed read retry #1/10 for 'gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_H7_CONJUHAEMOR/chromosome=12/part-00016-03e2c019-d707-414b-b4e1-8b6474ccfb26.c000.snappy.parquet'. Sleeping...\n", - "java.net.SocketException: Connection reset\n", - "\tat java.net.SocketInputStream.read(SocketInputStream.java:186) ~[?:?]\n", - "\tat java.net.SocketInputStream.read(SocketInputStream.java:140) ~[?:?]\n", - "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.readFromSocket(ConscryptEngineSocket.java:920) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", - "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.processDataFromSocket(ConscryptEngineSocket.java:884) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", - "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.readUntilDataAvailable(ConscryptEngineSocket.java:799) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", - "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.read(ConscryptEngineSocket.java:772) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", - "\tat java.io.BufferedInputStream.read1(BufferedInputStream.java:290) ~[?:?]\n", - "\tat java.io.BufferedInputStream.read(BufferedInputStream.java:351) ~[?:?]\n", - "\tat sun.net.www.MeteredStream.read(MeteredStream.java:134) ~[?:?]\n", - "\tat java.io.FilterInputStream.read(FilterInputStream.java:133) ~[?:?]\n", - "\tat sun.net.www.protocol.http.HttpURLConnection$HttpInputStream.read(HttpURLConnection.java:3529) ~[?:?]\n", - "\tat com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.http.javanet.NetHttpResponse$SizeValidatingInputStream.read(NetHttpResponse.java:164) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", - "\tat java.io.BufferedInputStream.read1(BufferedInputStream.java:290) ~[?:?]\n", - "\tat java.io.BufferedInputStream.read(BufferedInputStream.java:351) ~[?:?]\n", - "\tat java.nio.channels.Channels$ReadableByteChannelImpl.read(Channels.java:388) ~[?:?]\n", - "\tat com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadChannel.read(GoogleCloudStorageReadChannel.java:315) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", - "\tat com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream.lambda$read$1(GoogleHadoopFSInputStream.java:170) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", - "\tat com.google.cloud.hadoop.fs.gcs.GhfsStorageStatistics.trackDuration(GhfsStorageStatistics.java:77) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", - "\tat com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream.read(GoogleHadoopFSInputStream.java:159) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", - "\tat java.io.DataInputStream.read(DataInputStream.java:149) ~[?:?]\n", - "\tat org.apache.parquet.io.DelegatingSeekableInputStream.readFully(DelegatingSeekableInputStream.java:102) ~[parquet-common-1.12.2.jar:1.12.2]\n", - "\tat org.apache.parquet.io.DelegatingSeekableInputStream.readFullyHeapBuffer(DelegatingSeekableInputStream.java:127) ~[parquet-common-1.12.2.jar:1.12.2]\n", - "\tat org.apache.parquet.io.DelegatingSeekableInputStream.readFully(DelegatingSeekableInputStream.java:91) ~[parquet-common-1.12.2.jar:1.12.2]\n", - "\tat org.apache.parquet.hadoop.ParquetFileReader$ConsecutivePartList.readAll(ParquetFileReader.java:1704) ~[parquet-hadoop-1.12.2.jar:1.12.2]\n", - "\tat org.apache.parquet.hadoop.ParquetFileReader.readNextRowGroup(ParquetFileReader.java:925) ~[parquet-hadoop-1.12.2.jar:1.12.2]\n", - "\tat org.apache.parquet.hadoop.ParquetFileReader.readNextFilteredRowGroup(ParquetFileReader.java:972) ~[parquet-hadoop-1.12.2.jar:1.12.2]\n", - "\tat org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase$ParquetRowGroupReaderImpl.readNextRowGroup(SpecificParquetRecordReaderBase.java:320) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.checkEndOfRowGroup(VectorizedParquetRecordReader.java:403) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:324) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:227) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:116) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:274) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:116) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:565) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source) ~[?:?]\n", - "\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) ~[?:?]\n", - "\tat org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) ~[scala-library-2.12.14.jar:?]\n", - "\tat org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:142) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.scheduler.Task.run(Task.scala:136) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) ~[?:?]\n", - "\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) ~[?:?]\n", - "\tat java.lang.Thread.run(Thread.java:829) ~[?:?]\n", - "23/10/13 11:21:41 WARN GoogleCloudStorageReadChannel: Failed read retry #1/10 for 'gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_AMN2/chromosome=12/part-00016-6c463da6-b063-495d-8b62-8ad766491cfb.c000.snappy.parquet'. Sleeping...\n", - "java.net.SocketException: Connection reset\n", - "\tat java.net.SocketInputStream.read(SocketInputStream.java:186) ~[?:?]\n", - "\tat java.net.SocketInputStream.read(SocketInputStream.java:140) ~[?:?]\n", - "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.readFromSocket(ConscryptEngineSocket.java:920) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", - "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.processDataFromSocket(ConscryptEngineSocket.java:884) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", - "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.readUntilDataAvailable(ConscryptEngineSocket.java:799) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", - "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.read(ConscryptEngineSocket.java:772) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", - "\tat java.io.BufferedInputStream.read1(BufferedInputStream.java:290) ~[?:?]\n", - "\tat java.io.BufferedInputStream.read(BufferedInputStream.java:351) ~[?:?]\n", - "\tat sun.net.www.MeteredStream.read(MeteredStream.java:134) ~[?:?]\n", - "\tat java.io.FilterInputStream.read(FilterInputStream.java:133) ~[?:?]\n", - "\tat sun.net.www.protocol.http.HttpURLConnection$HttpInputStream.read(HttpURLConnection.java:3529) ~[?:?]\n", - "\tat com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.http.javanet.NetHttpResponse$SizeValidatingInputStream.read(NetHttpResponse.java:164) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", - "\tat java.io.BufferedInputStream.read1(BufferedInputStream.java:290) ~[?:?]\n", - "\tat java.io.BufferedInputStream.read(BufferedInputStream.java:351) ~[?:?]\n", - "\tat java.nio.channels.Channels$ReadableByteChannelImpl.read(Channels.java:388) ~[?:?]\n", - "\tat com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadChannel.read(GoogleCloudStorageReadChannel.java:315) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", - "\tat com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream.lambda$read$1(GoogleHadoopFSInputStream.java:170) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", - "\tat com.google.cloud.hadoop.fs.gcs.GhfsStorageStatistics.trackDuration(GhfsStorageStatistics.java:77) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", - "\tat com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream.read(GoogleHadoopFSInputStream.java:159) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", - "\tat java.io.DataInputStream.read(DataInputStream.java:149) ~[?:?]\n", - "\tat org.apache.parquet.io.DelegatingSeekableInputStream.readFully(DelegatingSeekableInputStream.java:102) ~[parquet-common-1.12.2.jar:1.12.2]\n", - "\tat org.apache.parquet.io.DelegatingSeekableInputStream.readFullyHeapBuffer(DelegatingSeekableInputStream.java:127) ~[parquet-common-1.12.2.jar:1.12.2]\n", - "\tat org.apache.parquet.io.DelegatingSeekableInputStream.readFully(DelegatingSeekableInputStream.java:91) ~[parquet-common-1.12.2.jar:1.12.2]\n", - "\tat org.apache.parquet.hadoop.ParquetFileReader$ConsecutivePartList.readAll(ParquetFileReader.java:1704) ~[parquet-hadoop-1.12.2.jar:1.12.2]\n", - "\tat org.apache.parquet.hadoop.ParquetFileReader.readNextRowGroup(ParquetFileReader.java:925) ~[parquet-hadoop-1.12.2.jar:1.12.2]\n", - "\tat org.apache.parquet.hadoop.ParquetFileReader.readNextFilteredRowGroup(ParquetFileReader.java:972) ~[parquet-hadoop-1.12.2.jar:1.12.2]\n", - "\tat org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase$ParquetRowGroupReaderImpl.readNextRowGroup(SpecificParquetRecordReaderBase.java:320) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.checkEndOfRowGroup(VectorizedParquetRecordReader.java:403) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:324) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:227) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:116) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:274) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:116) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:565) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source) ~[?:?]\n", - "\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) ~[?:?]\n", - "\tat org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", - "\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) ~[scala-library-2.12.14.jar:?]\n", - "\tat org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:170) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.scheduler.Task.run(Task.scala:136) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", - "\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) ~[?:?]\n", - "\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) ~[?:?]\n", - "\tat java.lang.Thread.run(Thread.java:829) ~[?:?]\n", - "23/10/13 11:36:03 WARN HintErrorLogger: Hint (strategy=broadcast) is not supported in the query: build right for right outer join.\n", - "23/10/13 11:39:00 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=202; previousMaxLatencyMs=190; operationCount=105; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_20231013113900962145652452525416_0067_m_000009_79286/part-00009-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", - "23/10/13 11:39:00 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=216; previousMaxLatencyMs=202; operationCount=105; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_202310131139003257422348970599913_0067_m_000028_79305/part-00028-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", - "23/10/13 11:39:00 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=218; previousMaxLatencyMs=216; operationCount=105; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_202310131139006484121277706965747_0067_m_000003_79280/part-00003-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", - "23/10/13 11:39:01 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=243; previousMaxLatencyMs=218; operationCount=105; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_202310131139004953018478188900860_0067_m_000023_79300/part-00023-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", - "23/10/13 11:39:02 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=165; previousMaxLatencyMs=153; operationCount=92; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_202310131139003598197994288369319_0067_m_000026_79303/part-00026-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", - "23/10/13 11:39:02 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=193; previousMaxLatencyMs=165; operationCount=99; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_202310131139004279983947938826985_0067_m_000025_79302/part-00025-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", - "23/10/13 11:39:02 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=243; previousMaxLatencyMs=193; operationCount=103; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_202310131139004602145763603687891_0067_m_000030_79307/part-00030-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", - "23/10/13 11:39:02 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=329; previousMaxLatencyMs=243; operationCount=104; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_20231013113900353649088592248874_0067_m_000005_79282/part-00005-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", - " \r" - ] - } - ], - "source": [ - "window_based_clumped_output = 'gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus'\n", - "\n", - "clump_window_length = 500_000\n", - "locus_window_length = 250_000\n", - "\n", - "(\n", - " SummaryStatistics(\n", - " _df=(\n", - " session.spark.read.parquet(sumstats, recursiveFileLookup=True)\n", - " .withColumn(\n", - " 'chromosome',\n", - " f.split(f.col('variantId'), '_')[0]\n", - " )\n", - " ),\n", - " _schema=SummaryStatistics.get_schema()\n", - " )\n", - " .window_based_clumping(\n", - " distance=clump_window_length,\n", - " locus_collect_distance=locus_window_length,\n", - " with_locus=True\n", - " )\n", - " .df.write.mode('overwrite')\n", - " .parquet(window_based_clumped_output)\n", - ")\n" + "data": { + "text/plain": [ + "" ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" }, { - "cell_type": "code", - "execution_count": 18, - "id": "6b234362", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-13T12:39:15.023402Z", - "start_time": "2023-10-13T11:50:42.603425Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+---------------------+---------+---------+---------------+\n", - "| studyId|ldPopulationStructure|projectId|studyType|traitFromSource|\n", - "+--------------------+---------------------+---------+---------+---------------+\n", - "|FINNGEN_R9_K11_EN...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_H7_KER...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_H8_EXT...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_H7_RET...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_RHEUMA...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_H7_KER...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_HEIGHT...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_M13_SY...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_M13_DO...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_M13_PY...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "| FINNGEN_R9_GOUT_NOS| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_M13_FI...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_ALLERG...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_E4_DM2...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_G6_CER...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_L12_UR...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_AUTOIM...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_I9_HYP...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_M13_AR...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "|FINNGEN_R9_K11_AP...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", - "+--------------------+---------------------+---------+---------+---------------+\n", - "only showing top 20 rows\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "23/10/13 11:55:34 WARN GhfsStorageStatistics: Detected potential high latency for operation op_open. latencyMs=676; previousMaxLatencyMs=470; operationCount=333148; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/ld_index/chromosome=5/part-00076-ff42773a-494c-46d2-bc22-322062b5e715.c000.snappy.parquet\n", - "23/10/13 12:39:11 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=279; previousMaxLatencyMs=243; operationCount=140; context=gs://ot-team/dsuveges/finngen/2023.10.13_ld_clumped_w_locus/_temporary/0/_temporary/attempt_202310131239103300432709600941830_0086_m_000000_80535/part-00000-17d449c4-f0c3-4617-b378-b74b864ab64a-c000.snappy.parquet\n", - "23/10/13 12:39:11 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=335; previousMaxLatencyMs=279; operationCount=140; context=gs://ot-team/dsuveges/finngen/2023.10.13_ld_clumped_w_locus/_temporary/0/_temporary/attempt_202310131239106417331548307580589_0086_m_000027_80562/part-00027-17d449c4-f0c3-4617-b378-b74b864ab64a-c000.snappy.parquet\n", - " \r" - ] - } - ], - "source": [ - "ld_clumped_output = 'gs://ot-team/dsuveges/finngen/2023.10.13_ld_clumped_w_locus'\n", - "\n", - "studies_df = (\n", - " session.spark.read.parquet(window_based_clumped_output)\n", - " # Generating a list of study identifiers:\n", - " .select('studyId')\n", - " .distinct()\n", - " # Adding fabricated values required to parse as gwas catalog study:\n", - " .select(\n", - " 'studyId',\n", - " StudyIndex.aggregate_and_map_ancestries(\n", - " f.array(\n", - " f.struct(\n", - " f.lit('Finnish').alias('ancestry'),\n", - " f.lit(100).cast('long').alias('sampleSize')\n", - " )\n", - " )\n", - " ).alias('ldPopulationStructure'),\n", - " f.lit('FINNGEN').alias('projectId'),\n", - " f.lit('gwas').alias('studyType'),\n", - " f.lit('cicaful').alias('traitFromSource')\n", - " )\n", - ")\n", - "\n", - "study_index = (\n", - " StudyIndex(\n", - " _df=studies_df,\n", - " _schema=StudyIndex.get_schema()\n", - " )\n", - ")\n", - "\n", - "study_index.df.show()\n", - "\n", - "# Loading ld index:\n", - "ld_index = LDIndex.from_parquet(session, ld_index_path)\n", - "\n", - "(\n", - " # To annotate study/locus, study level info and ld panel is needed:\n", - " LDAnnotator.ld_annotate(\n", - " StudyLocus.from_parquet(session, window_based_clumped_output),\n", - " study_index, \n", - " ld_index\n", - " )\n", - " # Clumping linked study-loci together:\n", - " .clump()\n", - " .df.write.mode('overwrite').parquet(ld_clumped_output)\n", - ")\n", - "\n", - "\n" + "data": { + "image/png": "", + "text/plain": [ + "
" ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "(\n", + " picsed_df\n", + " .filter(f.col(\"locus\").isNotNull())\n", + " .select(\n", + " \"studyId\",\n", + " \"variantId\",\n", + " f.aggregate(\n", + " f.transform(\n", + " f.col(\"locus\"),\n", + " lambda locus: f.when(locus.is99CredibleSet, f.lit(1.0)).otherwise( f.lit(0.0))\n", + " ),\n", + " f.lit(0.0),\n", + " lambda summa, value: summa + value\n", + " ).alias(\"99CredCount\"),\n", + " f.aggregate(\n", + " f.transform(\n", + " f.col(\"locus\"),\n", + " lambda locus: f.when(locus.is95CredibleSet, f.lit(1.0)).otherwise( f.lit(0.0))\n", + " ),\n", + " f.lit(0.0),\n", + " lambda summa, value: summa + value\n", + " ).alias(\"95CredCount\"),\n", + " )\n", + "# .orderBy(f.col('99CredCount').desc())\n", + "# .show()\n", + " .filter(f.col(\"99CredCount\") < 100)\n", + "# .count()\n", + " .toPandas()\n", + " [[\"99CredCount\", \"95CredCount\"]]\n", + " .plot.hist(bins=25, alpha=0.5, label=\"Credible set size\")\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a8b99c39", + "metadata": {}, + "source": [ + "## Finngen clumping with locus\n", + "\n", + "- Window width: 500kbp\n", + "- Locus width: 250kbp\n", + "- LD threshold: 0.5\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "09f14d49", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-13T09:54:35.337711Z", + "start_time": "2023-10-13T09:54:34.446422Z" + } + }, + "outputs": [], + "source": [ + "# Import:\n", + "from pyspark.sql import functions as f\n", + "\n", + "from gentropy.common.session import Session\n", + "from gentropy.dataset.ld_index import LDIndex\n", + "from gentropy.dataset.study_index import StudyIndex\n", + "from gentropy.dataset.study_locus import StudyLocus\n", + "from gentropy.dataset.summary_statistics import SummaryStatistics\n", + "from gentropy.method.ld import LDAnnotator\n", + "from gentropy.method.pics import PICS\n", + "\n", + "# Initialize session:\n", + "session = Session()\n", + "\n", + "# Input:\n", + "sumstats = \"gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/*\"\n", + "ld_index_path = \"gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/ld_index/\"\n", + "\n", + "# Output:\n", + "ld_clumped_output = \"gs://ot-team/dsuveges/finngen/2023.10.06_LD_clumped\"\n", + "picsed_output = \"gs://ot-team/dsuveges/finngen/2023.10.06_PICSed\"\n", + "window_based_clumped_output = \"gs://ot-team/dsuveges/finngen/2023.10.06_window_clumped\"" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "52861491", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-13T11:39:14.790777Z", + "start_time": "2023-10-13T10:47:20.349271Z" }, + "code_folding": [], + "scrolled": false + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 9, - "id": "f637e3c0", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-13T10:43:26.766382Z", - "start_time": "2023-10-13T10:43:25.980232Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------------------------------------------------+-----+\n", - "|qualityControls |count|\n", - "+--------------------------------------------------------------+-----+\n", - "|[Variant not found in LD reference] |4607 |\n", - "|[] |13813|\n", - "|[Explained by a more significant variant in high LD (clumped)]|585 |\n", - "+--------------------------------------------------------------+-----+\n", - "\n" - ] - } - ], - "source": [ - "(\n", - " session.spark.read.parquet(ld_clumped_output)\n", - " .groupBy('qualityControls')\n", - " .count()\n", - "# .show(1, False, True)\n", - " .show(truncate=False)\n", - ")" - ] - }, + "name": "stderr", + "output_type": "stream", + "text": [ + "23/10/13 10:47:53 WARN HintErrorLogger: Hint (strategy=broadcast) is not supported in the query: build right for right outer join.\n", + "23/10/13 10:47:53 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.\n", + "23/10/13 10:48:19 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=1248; previousMaxLatencyMs=1230; operationCount=59116581; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_OTHER_SYSTCON_FG/chromosome=2/part-00019-1d683f00-9247-401b-846c-8b2498bc68bf.c000.snappy.parquet\n", + "23/10/13 10:49:02 WARN GhfsStorageStatistics: Detected potential high latency for operation op_get_file_status. latencyMs=1039; previousMaxLatencyMs=1037; operationCount=126124; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_K11_SCISS_BITE_INCLAVO/chromosome=2/part-00018-e37f4a01-bdb4-442f-8dcd-681da303e876.c000.snappy.parquet\n", + "23/10/13 10:52:37 WARN GhfsStorageStatistics: Detected potential high latency for operation op_get_file_status. latencyMs=1917; previousMaxLatencyMs=1039; operationCount=144264; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_AUTOIMMUNE_NONTHYROID_STRICT/chromosome=5/part-00006-02537c5f-2dea-47c5-8655-b0d84827115a.c000.snappy.parquet\n", + "23/10/13 10:52:38 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_read_operations. latencyMs=1942; previousMaxLatencyMs=1248; operationCount=94444057; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_K11_PULPITIS_1_ONLYAVO/chromosome=5/part-00006-de277609-305a-4cec-8bb1-5b37a70be48b.c000.snappy.parquet\n", + "23/10/13 11:07:41 WARN HintErrorLogger: Hint (strategy=broadcast) is not supported in the query: build right for right outer join.\n", + "23/10/13 11:12:32 WARN HintErrorLogger: Hint (strategy=broadcast) is not supported in the query: build right for right outer join.\n", + "23/10/13 11:12:48 WARN HintErrorLogger: Hint (strategy=broadcast) is not supported in the query: build right for right outer join.\n", + "23/10/13 11:21:40 WARN GoogleCloudStorageReadChannel: Failed read retry #1/10 for 'gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_H7_CONJUHAEMOR/chromosome=12/part-00016-03e2c019-d707-414b-b4e1-8b6474ccfb26.c000.snappy.parquet'. Sleeping...\n", + "java.net.SocketException: Connection reset\n", + "\tat java.net.SocketInputStream.read(SocketInputStream.java:186) ~[?:?]\n", + "\tat java.net.SocketInputStream.read(SocketInputStream.java:140) ~[?:?]\n", + "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.readFromSocket(ConscryptEngineSocket.java:920) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", + "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.processDataFromSocket(ConscryptEngineSocket.java:884) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", + "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.readUntilDataAvailable(ConscryptEngineSocket.java:799) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", + "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.read(ConscryptEngineSocket.java:772) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", + "\tat java.io.BufferedInputStream.read1(BufferedInputStream.java:290) ~[?:?]\n", + "\tat java.io.BufferedInputStream.read(BufferedInputStream.java:351) ~[?:?]\n", + "\tat sun.net.www.MeteredStream.read(MeteredStream.java:134) ~[?:?]\n", + "\tat java.io.FilterInputStream.read(FilterInputStream.java:133) ~[?:?]\n", + "\tat sun.net.www.protocol.http.HttpURLConnection$HttpInputStream.read(HttpURLConnection.java:3529) ~[?:?]\n", + "\tat com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.http.javanet.NetHttpResponse$SizeValidatingInputStream.read(NetHttpResponse.java:164) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", + "\tat java.io.BufferedInputStream.read1(BufferedInputStream.java:290) ~[?:?]\n", + "\tat java.io.BufferedInputStream.read(BufferedInputStream.java:351) ~[?:?]\n", + "\tat java.nio.channels.Channels$ReadableByteChannelImpl.read(Channels.java:388) ~[?:?]\n", + "\tat com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadChannel.read(GoogleCloudStorageReadChannel.java:315) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", + "\tat com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream.lambda$read$1(GoogleHadoopFSInputStream.java:170) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", + "\tat com.google.cloud.hadoop.fs.gcs.GhfsStorageStatistics.trackDuration(GhfsStorageStatistics.java:77) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", + "\tat com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream.read(GoogleHadoopFSInputStream.java:159) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", + "\tat java.io.DataInputStream.read(DataInputStream.java:149) ~[?:?]\n", + "\tat org.apache.parquet.io.DelegatingSeekableInputStream.readFully(DelegatingSeekableInputStream.java:102) ~[parquet-common-1.12.2.jar:1.12.2]\n", + "\tat org.apache.parquet.io.DelegatingSeekableInputStream.readFullyHeapBuffer(DelegatingSeekableInputStream.java:127) ~[parquet-common-1.12.2.jar:1.12.2]\n", + "\tat org.apache.parquet.io.DelegatingSeekableInputStream.readFully(DelegatingSeekableInputStream.java:91) ~[parquet-common-1.12.2.jar:1.12.2]\n", + "\tat org.apache.parquet.hadoop.ParquetFileReader$ConsecutivePartList.readAll(ParquetFileReader.java:1704) ~[parquet-hadoop-1.12.2.jar:1.12.2]\n", + "\tat org.apache.parquet.hadoop.ParquetFileReader.readNextRowGroup(ParquetFileReader.java:925) ~[parquet-hadoop-1.12.2.jar:1.12.2]\n", + "\tat org.apache.parquet.hadoop.ParquetFileReader.readNextFilteredRowGroup(ParquetFileReader.java:972) ~[parquet-hadoop-1.12.2.jar:1.12.2]\n", + "\tat org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase$ParquetRowGroupReaderImpl.readNextRowGroup(SpecificParquetRecordReaderBase.java:320) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.checkEndOfRowGroup(VectorizedParquetRecordReader.java:403) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:324) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:227) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:116) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:274) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:116) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:565) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source) ~[?:?]\n", + "\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) ~[?:?]\n", + "\tat org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) ~[scala-library-2.12.14.jar:?]\n", + "\tat org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:142) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.scheduler.Task.run(Task.scala:136) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) ~[?:?]\n", + "\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) ~[?:?]\n", + "\tat java.lang.Thread.run(Thread.java:829) ~[?:?]\n", + "23/10/13 11:21:41 WARN GoogleCloudStorageReadChannel: Failed read retry #1/10 for 'gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/preprocess/finngen/summary_stats/FINNGEN_R9_AMN2/chromosome=12/part-00016-6c463da6-b063-495d-8b62-8ad766491cfb.c000.snappy.parquet'. Sleeping...\n", + "java.net.SocketException: Connection reset\n", + "\tat java.net.SocketInputStream.read(SocketInputStream.java:186) ~[?:?]\n", + "\tat java.net.SocketInputStream.read(SocketInputStream.java:140) ~[?:?]\n", + "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.readFromSocket(ConscryptEngineSocket.java:920) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", + "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.processDataFromSocket(ConscryptEngineSocket.java:884) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", + "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.readUntilDataAvailable(ConscryptEngineSocket.java:799) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", + "\tat org.conscrypt.ConscryptEngineSocket$SSLInputStream.read(ConscryptEngineSocket.java:772) ~[conscrypt-openjdk-2.5.2-linux-x86_64.jar:2.5.2]\n", + "\tat java.io.BufferedInputStream.read1(BufferedInputStream.java:290) ~[?:?]\n", + "\tat java.io.BufferedInputStream.read(BufferedInputStream.java:351) ~[?:?]\n", + "\tat sun.net.www.MeteredStream.read(MeteredStream.java:134) ~[?:?]\n", + "\tat java.io.FilterInputStream.read(FilterInputStream.java:133) ~[?:?]\n", + "\tat sun.net.www.protocol.http.HttpURLConnection$HttpInputStream.read(HttpURLConnection.java:3529) ~[?:?]\n", + "\tat com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.http.javanet.NetHttpResponse$SizeValidatingInputStream.read(NetHttpResponse.java:164) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", + "\tat java.io.BufferedInputStream.read1(BufferedInputStream.java:290) ~[?:?]\n", + "\tat java.io.BufferedInputStream.read(BufferedInputStream.java:351) ~[?:?]\n", + "\tat java.nio.channels.Channels$ReadableByteChannelImpl.read(Channels.java:388) ~[?:?]\n", + "\tat com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadChannel.read(GoogleCloudStorageReadChannel.java:315) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", + "\tat com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream.lambda$read$1(GoogleHadoopFSInputStream.java:170) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", + "\tat com.google.cloud.hadoop.fs.gcs.GhfsStorageStatistics.trackDuration(GhfsStorageStatistics.java:77) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", + "\tat com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream.read(GoogleHadoopFSInputStream.java:159) ~[gcs-connector-hadoop3-2.2.16.jar:?]\n", + "\tat java.io.DataInputStream.read(DataInputStream.java:149) ~[?:?]\n", + "\tat org.apache.parquet.io.DelegatingSeekableInputStream.readFully(DelegatingSeekableInputStream.java:102) ~[parquet-common-1.12.2.jar:1.12.2]\n", + "\tat org.apache.parquet.io.DelegatingSeekableInputStream.readFullyHeapBuffer(DelegatingSeekableInputStream.java:127) ~[parquet-common-1.12.2.jar:1.12.2]\n", + "\tat org.apache.parquet.io.DelegatingSeekableInputStream.readFully(DelegatingSeekableInputStream.java:91) ~[parquet-common-1.12.2.jar:1.12.2]\n", + "\tat org.apache.parquet.hadoop.ParquetFileReader$ConsecutivePartList.readAll(ParquetFileReader.java:1704) ~[parquet-hadoop-1.12.2.jar:1.12.2]\n", + "\tat org.apache.parquet.hadoop.ParquetFileReader.readNextRowGroup(ParquetFileReader.java:925) ~[parquet-hadoop-1.12.2.jar:1.12.2]\n", + "\tat org.apache.parquet.hadoop.ParquetFileReader.readNextFilteredRowGroup(ParquetFileReader.java:972) ~[parquet-hadoop-1.12.2.jar:1.12.2]\n", + "\tat org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase$ParquetRowGroupReaderImpl.readNextRowGroup(SpecificParquetRecordReaderBase.java:320) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.checkEndOfRowGroup(VectorizedParquetRecordReader.java:403) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:324) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:227) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:116) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:274) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:116) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:565) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source) ~[?:?]\n", + "\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) ~[?:?]\n", + "\tat org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760) ~[spark-sql_2.12-3.3.0.jar:3.3.0]\n", + "\tat scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) ~[scala-library-2.12.14.jar:?]\n", + "\tat org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:170) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.scheduler.Task.run(Task.scala:136) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) ~[spark-core_2.12-3.3.0.jar:3.3.0]\n", + "\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) ~[?:?]\n", + "\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) ~[?:?]\n", + "\tat java.lang.Thread.run(Thread.java:829) ~[?:?]\n", + "23/10/13 11:36:03 WARN HintErrorLogger: Hint (strategy=broadcast) is not supported in the query: build right for right outer join.\n", + "23/10/13 11:39:00 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=202; previousMaxLatencyMs=190; operationCount=105; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_20231013113900962145652452525416_0067_m_000009_79286/part-00009-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", + "23/10/13 11:39:00 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=216; previousMaxLatencyMs=202; operationCount=105; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_202310131139003257422348970599913_0067_m_000028_79305/part-00028-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", + "23/10/13 11:39:00 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=218; previousMaxLatencyMs=216; operationCount=105; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_202310131139006484121277706965747_0067_m_000003_79280/part-00003-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", + "23/10/13 11:39:01 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=243; previousMaxLatencyMs=218; operationCount=105; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_202310131139004953018478188900860_0067_m_000023_79300/part-00023-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", + "23/10/13 11:39:02 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=165; previousMaxLatencyMs=153; operationCount=92; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_202310131139003598197994288369319_0067_m_000026_79303/part-00026-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", + "23/10/13 11:39:02 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=193; previousMaxLatencyMs=165; operationCount=99; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_202310131139004279983947938826985_0067_m_000025_79302/part-00025-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", + "23/10/13 11:39:02 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=243; previousMaxLatencyMs=193; operationCount=103; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_202310131139004602145763603687891_0067_m_000030_79307/part-00030-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", + "23/10/13 11:39:02 WARN GhfsStorageStatistics: Detected potential high latency for operation stream_write_close_operations. latencyMs=329; previousMaxLatencyMs=243; operationCount=104; context=gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus/_temporary/0/_temporary/attempt_20231013113900353649088592248874_0067_m_000005_79282/part-00005-65e9ae61-ec7d-4672-a525-8de91e583465-c000.snappy.parquet\n", + " \r" + ] + } + ], + "source": [ + "window_based_clumped_output = \"gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus\"\n", + "\n", + "clump_window_length = 500_000\n", + "locus_window_length = 250_000\n", + "\n", + "(\n", + " SummaryStatistics(\n", + " _df=(\n", + " session.spark.read.parquet(sumstats, recursiveFileLookup=True)\n", + " .withColumn(\n", + " \"chromosome\",\n", + " f.split(f.col(\"variantId\"), \"_\")[0]\n", + " )\n", + " ),\n", + " _schema=SummaryStatistics.get_schema()\n", + " )\n", + " .window_based_clumping(\n", + " distance=clump_window_length,\n", + " locus_collect_distance=locus_window_length,\n", + " with_locus=True\n", + " )\n", + " .df.write.mode(\"overwrite\")\n", + " .parquet(window_based_clumped_output)\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "6b234362", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-13T12:39:15.023402Z", + "start_time": "2023-10-13T11:50:42.603425Z" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 29, - "id": "5bf42196", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-13T12:55:39.212186Z", - "start_time": "2023-10-13T12:55:38.131092Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "data": { - "text/plain": [ - "[Row(locus=[Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99764322_T_C', pValueMantissa=2.3949999809265137, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0201963, standardError=0.00665169, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99764860_G_A', pValueMantissa=1.0499999523162842, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00813303, standardError=0.00248194, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99765280_C_T', pValueMantissa=7.499000072479248, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0147615, standardError=0.00437943, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99765335_A_G', pValueMantissa=1.2640000581741333, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107816, standardError=0.00189456, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99766311_C_T', pValueMantissa=1.0579999685287476, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.010837, standardError=0.00189422, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99766702_A_G', pValueMantissa=1.2059999704360962, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107966, standardError=0.00189453, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99766923_T_C', pValueMantissa=1.3300000429153442, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010765, standardError=0.00189451, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99767090_TTTG_T', pValueMantissa=1.2740000486373901, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107778, standardError=0.00189432, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99768341_T_C', pValueMantissa=1.2289999723434448, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107887, standardError=0.00189422, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99768718_A_G', pValueMantissa=1.2649999856948853, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107795, standardError=0.0018942, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99768993_G_A', pValueMantissa=1.1629999876022339, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108066, standardError=0.00189423, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99769297_T_C', pValueMantissa=1.2929999828338623, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0113037, standardError=0.00186275, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99769386_C_T', pValueMantissa=1.7280000448226929, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00670209, standardError=0.00281514, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99769548_A_G', pValueMantissa=1.7280000448226929, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0067021, standardError=0.00281514, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99769607_C_T', pValueMantissa=1.2710000276565552, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107775, standardError=0.00189416, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99770233_A_AT', pValueMantissa=1.1360000371932983, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108149, standardError=0.00189434, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99771038_C_A', pValueMantissa=1.1039999723434448, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0108218, standardError=0.00189392, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99771332_T_C', pValueMantissa=1.284999966621399, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00972726, standardError=0.00390986, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99771548_C_T', pValueMantissa=1.1369999647140503, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108168, standardError=0.00189473, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99774249_G_A', pValueMantissa=1.0410000085830688, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00813435, standardError=0.00248058, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99774476_AT_A', pValueMantissa=1.1540000438690186, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0108061, standardError=0.0018937, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99777761_G_A', pValueMantissa=2.9170000553131104, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0857961, standardError=0.0393337, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99777984_T_G', pValueMantissa=9.097999572753906, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108749, standardError=0.00189236, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99778063_G_A', pValueMantissa=1.2730000019073486, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00887632, standardError=0.00203368, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99778821_G_A', pValueMantissa=1.6679999828338623, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00682868, standardError=0.00285271, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99779613_C_G', pValueMantissa=7.630000114440918, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109299, standardError=0.00189215, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99781296_A_G', pValueMantissa=8.560999870300293, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.010892, standardError=0.00189195, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99781822_G_A', pValueMantissa=1.4390000104904175, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00689344, standardError=0.00281655, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99781826_A_G', pValueMantissa=7.65500020980835, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0109284, standardError=0.00189207, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99782821_C_T', pValueMantissa=1.3680000305175781, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00573303, standardError=0.00232523, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99783206_A_G', pValueMantissa=7.632999897003174, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99783290_A_C', pValueMantissa=8.821999549865723, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108841, standardError=0.00189224, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99784269_C_T', pValueMantissa=9.149999618530273, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108721, standardError=0.00189218, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99784331_G_A', pValueMantissa=4.435999870300293, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0327234, standardError=0.0162747, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99784751_C_A', pValueMantissa=3.0409998893737793, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.149822, standardError=0.0692119, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99785581_A_G', pValueMantissa=7.164000034332275, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0113888, standardError=0.00423514, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99786875_C_T', pValueMantissa=5.257999897003174, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0110515, standardError=0.00189278, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787050_C_G', pValueMantissa=6.671999931335449, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109731, standardError=0.00189226, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787264_C_A', pValueMantissa=7.834000110626221, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109198, standardError=0.00189186, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787285_C_T', pValueMantissa=9.189000129699707, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108723, standardError=0.00189246, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787359_T_G', pValueMantissa=7.504000186920166, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109356, standardError=0.00189222, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787460_G_T', pValueMantissa=6.499000072479248, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.010984, standardError=0.00189269, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787461_G_T', pValueMantissa=6.499000072479248, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.010984, standardError=0.00189269, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787483_G_A', pValueMantissa=7.631999969482422, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787568_C_T', pValueMantissa=7.631999969482422, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787571_C_T', pValueMantissa=7.631999969482422, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787694_A_AT', pValueMantissa=7.631999969482422, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787855_C_T', pValueMantissa=8.937999725341797, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108798, standardError=0.00189223, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787860_C_T', pValueMantissa=7.64300012588501, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0231471, standardError=0.0086776, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787890_G_A', pValueMantissa=7.631999969482422, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787906_T_A', pValueMantissa=7.631999969482422, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787915_T_G', pValueMantissa=7.631999969482422, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99788026_C_T', pValueMantissa=7.7870001792907715, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109255, standardError=0.00189252, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99788420_A_C', pValueMantissa=7.001999855041504, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109573, standardError=0.00189216, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99788711_G_A', pValueMantissa=1.0420000553131104, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0108302, standardError=0.00189215, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99788859_C_T', pValueMantissa=7.632999897003174, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99788952_C_T', pValueMantissa=9.12399959564209, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108727, standardError=0.00189214, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99789038_A_G', pValueMantissa=7.164000034332275, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109498, standardError=0.00189212, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99789217_G_C', pValueMantissa=7.72599983215332, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109277, standardError=0.00189247, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99789506_G_C', pValueMantissa=7.671000003814697, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109278, standardError=0.00189209, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99789829_T_G', pValueMantissa=7.632999897003174, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99790008_G_A', pValueMantissa=7.603000164031982, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109305, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99790154_G_A', pValueMantissa=7.632999897003174, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99790319_T_G', pValueMantissa=7.857999801635742, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109213, standardError=0.00189229, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99791174_G_A', pValueMantissa=7.623000144958496, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0276216, standardError=0.00820577, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99791224_A_G', pValueMantissa=7.632999897003174, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99793129_A_T', pValueMantissa=9.17300033569336, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108711, standardError=0.00189215, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99793502_C_T', pValueMantissa=1.0099999904632568, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108395, standardError=0.00189201, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99793829_C_CGTAT', pValueMantissa=3.756999969482422, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.201946, standardError=0.0971122, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99794292_T_A', pValueMantissa=1.8420000076293945, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0427514, standardError=0.0181376, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99794803_C_G', pValueMantissa=3.76200008392334, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0754336, standardError=0.0362844, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99797363_G_A', pValueMantissa=2.696000099182129, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0451691, standardError=0.0204184, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99797572_A_C', pValueMantissa=7.729000091552734, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109255, standardError=0.0018921, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99798877_A_T', pValueMantissa=5.47599983215332, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111187, standardError=0.0019065, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99798989_G_T', pValueMantissa=4.708000183105469, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00645552, standardError=0.00184611, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99799669_G_A', pValueMantissa=1.3769999742507935, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0283588, standardError=0.0115134, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99801055_G_A', pValueMantissa=5.751999855041504, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0863438, standardError=0.0312661, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99801648_T_C', pValueMantissa=5.201000213623047, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.35431, standardError=0.126797, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99802216_C_T', pValueMantissa=8.795999526977539, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0312825, standardError=0.00584722, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99803737_A_G', pValueMantissa=4.105000019073486, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0838538, standardError=0.0410448, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99804058_G_A', pValueMantissa=9.253999710083008, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00819335, standardError=0.00247362, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99804255_C_G', pValueMantissa=8.798999786376953, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0312823, standardError=0.00584723, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99804875_C_T', pValueMantissa=2.2079999446868896, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0532293, standardError=0.023254, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99805400_A_G', pValueMantissa=5.626999855041504, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0215236, standardError=0.00777382, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99805994_G_A', pValueMantissa=1.6449999809265137, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0125221, standardError=0.00522031, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99806253_C_T', pValueMantissa=1.2120000123977661, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00800751, standardError=0.00247451, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99806469_CAT_C', pValueMantissa=1.996999979019165, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.01596, standardError=0.00685871, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99807109_T_A', pValueMantissa=3.994999885559082, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0097045, standardError=0.00274107, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99807669_A_G', pValueMantissa=4.577000141143799, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111561, standardError=0.00190316, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99807962_A_G', pValueMantissa=3.7709999084472656, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0677375, standardError=0.0325981, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99809156_C_A', pValueMantissa=1.4980000257492065, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00696552, standardError=0.0028632, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99809726_T_C', pValueMantissa=4.446000099182129, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111654, standardError=0.00190318, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99809873_G_A', pValueMantissa=4.642000198364258, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.122379, standardError=0.0614495, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99811236_C_A', pValueMantissa=7.059999942779541, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0110215, standardError=0.00190371, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99811771_C_T', pValueMantissa=4.526000022888184, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0111597, standardError=0.00190317, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99815174_G_C', pValueMantissa=6.111000061035156, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0110649, standardError=0.00190324, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99815628_A_G', pValueMantissa=9.194999694824219, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0211397, standardError=0.00637879, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99815640_A_G', pValueMantissa=8.413999557495117, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0902758, standardError=0.0342605, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99816387_G_A', pValueMantissa=4.604000091552734, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0346061, standardError=0.0173466, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99816562_G_A', pValueMantissa=3.677000045776367, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00976812, standardError=0.00274214, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99816570_G_A', pValueMantissa=1.152999997138977, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00804666, standardError=0.00247569, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99817203_T_C', pValueMantissa=7.421999931335449, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0110057, standardError=0.00190374, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99818525_T_C', pValueMantissa=1.4880000352859497, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0186574, standardError=0.0076615, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99819195_T_G', pValueMantissa=6.382999897003174, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0124767, standardError=0.0045745, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99820908_A_G', pValueMantissa=1.4980000257492065, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00696602, standardError=0.0028633, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99821086_GA_G', pValueMantissa=3.259999990463257, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0113261, standardError=0.00272622, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99821094_C_T', pValueMantissa=4.098999977111816, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0112471, standardError=0.00191273, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99821955_T_G', pValueMantissa=4.670000076293945, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0112087, standardError=0.00191323, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99822427_T_C', pValueMantissa=2.058000087738037, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115151, standardError=0.00192136, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99822696_T_C', pValueMantissa=7.763000011444092, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.162918, standardError=0.0611962, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99823047_A_G', pValueMantissa=7.198999881744385, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0110742, standardError=0.0019139, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99823280_T_C', pValueMantissa=1.1619999408721924, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00804213, standardError=0.00247592, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99825419_A_G', pValueMantissa=1.2710000276565552, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0826158, standardError=0.0331567, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99825752_T_C', pValueMantissa=6.0370001792907715, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0111297, standardError=0.00191372, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99825835_C_T', pValueMantissa=6.0320000648498535, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0111299, standardError=0.00191372, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99827260_C_T', pValueMantissa=3.6500000953674316, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0489284, standardError=0.0233964, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99828083_C_T', pValueMantissa=3.938999891281128, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0959414, standardError=0.0465714, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99828402_T_A', pValueMantissa=9.142000198364258, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0211518, standardError=0.00637931, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99828407_A_T', pValueMantissa=9.133999824523926, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0211533, standardError=0.0063793, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99829453_A_AT', pValueMantissa=6.083000183105469, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0111253, standardError=0.00191338, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99829545_ATATTT_A', pValueMantissa=4.810999870300293, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0112061, standardError=0.00191441, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99830168_G_A', pValueMantissa=1.218999981880188, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00801018, standardError=0.00247653, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99830263_G_A', pValueMantissa=1.6970000267028809, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0179959, standardError=0.00753787, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99832187_T_C', pValueMantissa=4.421000003814697, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0112213, standardError=0.0019124, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99832400_AAC_A', pValueMantissa=6.706999778747559, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0110021, standardError=0.00189753, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99832402_C_CTGTGTGT', pValueMantissa=6.710999965667725, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0110019, standardError=0.00189753, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99832563_A_G', pValueMantissa=1.4479999542236328, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0090241, standardError=0.00187287, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99832865_A_G', pValueMantissa=4.169000148773193, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0105412, standardError=0.00192234, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99834145_G_C', pValueMantissa=6.802999973297119, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.260334, standardError=0.0961939, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99834214_T_C', pValueMantissa=1.784000039100647, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0474718, standardError=0.0200386, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99835361_G_A', pValueMantissa=2.0769999027252197, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0595141, standardError=0.0257408, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99841479_C_T', pValueMantissa=6.188000202178955, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104861, standardError=0.00193711, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99842816_A_G', pValueMantissa=8.645000457763672, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00723726, standardError=0.00184352, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99844024_C_G', pValueMantissa=2.3450000286102295, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107969, standardError=0.00193341, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99844450_C_T', pValueMantissa=2.874000072479248, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107269, standardError=0.00193316, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99845178_C_T', pValueMantissa=4.0320000648498535, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0656125, standardError=0.0319983, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99845936_A_G', pValueMantissa=3.069999933242798, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00675859, standardError=0.00312767, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99848765_C_G', pValueMantissa=2.8910000324249268, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114511, standardError=0.00192855, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99850966_A_G', pValueMantissa=8.373000144958496, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101864, standardError=0.00190084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99852563_C_T', pValueMantissa=4.5329999923706055, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0113089, standardError=0.0019287, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99853411_T_TG', pValueMantissa=9.414999961853027, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011087, standardError=0.00193121, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99856822_A_G', pValueMantissa=2.7119998931884766, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114876, standardError=0.00193129, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99856987_G_C', pValueMantissa=5.239999771118164, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010358, standardError=0.001903, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99857429_C_T', pValueMantissa=1.718000054359436, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116617, standardError=0.00193637, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99857502_A_G', pValueMantissa=6.296000003814697, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010296, standardError=0.00190308, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99857670_G_A', pValueMantissa=2.7939999103546143, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011478, standardError=0.00193126, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99857979_C_T', pValueMantissa=2.5840001106262207, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00775769, standardError=0.00184386, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99858035_C_T', pValueMantissa=2.7939999103546143, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011478, standardError=0.00193127, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99858181_G_C', pValueMantissa=2.7920000553131104, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114783, standardError=0.00193127, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99858624_T_C', pValueMantissa=6.314000129699707, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0103011, standardError=0.0019042, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99859267_C_A', pValueMantissa=1.5019999742507935, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0239274, standardError=0.00753783, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99859558_G_A', pValueMantissa=6.2779998779296875, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0321283, standardError=0.00593792, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99859674_G_A', pValueMantissa=1.1050000190734863, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100907, standardError=0.00190082, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99861014_C_G', pValueMantissa=1.350000023841858, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0080568, standardError=0.00185135, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99861191_A_G', pValueMantissa=2.062000036239624, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00788332, standardError=0.00185141, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99861743_A_C', pValueMantissa=1.621999979019165, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00893977, standardError=0.00186417, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99861975_TTGG_T', pValueMantissa=1.5509999990463257, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00895366, standardError=0.00186357, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99862766_C_T', pValueMantissa=1.7289999723434448, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00993376, standardError=0.0019007, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99863009_T_C', pValueMantissa=1.3009999990463257, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0535743, standardError=0.0215732, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99863999_T_C', pValueMantissa=1.4730000495910645, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0137073, standardError=0.00361145, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99864093_A_C', pValueMantissa=5.741000175476074, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0322238, standardError=0.00593803, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99864115_G_T', pValueMantissa=7.295000076293945, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.150642, standardError=0.0445916, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99865012_T_C', pValueMantissa=2.450000047683716, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00781456, standardError=0.00185205, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99865013_G_A', pValueMantissa=1.7259999513626099, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0159887, standardError=0.00510203, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99865261_T_C', pValueMantissa=1.4459999799728394, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00999453, standardError=0.00190036, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99865337_G_GAAGA', pValueMantissa=1.3170000314712524, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00806712, standardError=0.00185143, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99865513_C_G', pValueMantissa=1.3250000476837158, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0080656, standardError=0.00185165, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99866792_C_T', pValueMantissa=1.4500000476837158, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00803164, standardError=0.00185225, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99867170_C_T', pValueMantissa=8.121000289916992, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111182, standardError=0.00192825, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99867197_T_C', pValueMantissa=2.680999994277954, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0171717, standardError=0.00775509, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99867487_C_G', pValueMantissa=2.8310000896453857, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.359987, standardError=0.164156, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99867528_G_T', pValueMantissa=2.8389999866485596, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0077156, standardError=0.0018432, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99868259_G_A', pValueMantissa=2.265000104904175, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.119984, standardError=0.0392983, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99868412_T_G', pValueMantissa=1.6779999732971191, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00994383, standardError=0.00190064, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99868443_A_G', pValueMantissa=1.4809999465942383, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00801978, standardError=0.0018515, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99868585_TC_T', pValueMantissa=1.437999963760376, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0080333, standardError=0.00185188, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99868691_T_G', pValueMantissa=1.4730000495910645, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00802444, standardError=0.00185208, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99869742_G_A', pValueMantissa=4.677000045776367, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101608, standardError=0.00185986, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99870037_G_A', pValueMantissa=3.996999979019165, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0939625, standardError=0.0326443, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99870234_G_A', pValueMantissa=2.5759999752044678, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0270256, standardError=0.00896575, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99871164_A_T', pValueMantissa=9.444999694824219, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101485, standardError=0.0019015, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99871248_A_C', pValueMantissa=4.265999794006348, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011332, standardError=0.00192934, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99871256_C_T', pValueMantissa=1.0980000495910645, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100932, standardError=0.00190087, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99871596_T_C', pValueMantissa=1.097000002861023, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100928, standardError=0.00190078, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99871838_G_C', pValueMantissa=4.270999908447266, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0113316, standardError=0.00192933, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99871912_T_C', pValueMantissa=1.0989999771118164, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100928, standardError=0.00190087, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99872008_T_C', pValueMantissa=1.0989999771118164, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100927, standardError=0.00190087, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99872112_C_T', pValueMantissa=1.093000054359436, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100944, standardError=0.00190084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99872802_A_G', pValueMantissa=2.828000068664551, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0103237, standardError=0.00185953, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99873074_C_T', pValueMantissa=4.716000080108643, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112996, standardError=0.00192927, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99873158_C_T', pValueMantissa=1.1759999990463257, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010069, standardError=0.00190084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99873263_C_T', pValueMantissa=1.1440000534057617, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010071, standardError=0.00189938, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99873471_G_A', pValueMantissa=6.283999919891357, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0723658, standardError=0.0264824, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99873553_C_T', pValueMantissa=1.8109999895095825, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0122764, standardError=0.00286365, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99873602_C_T', pValueMantissa=3.7880001068115234, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102286, standardError=0.00185958, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99874022_C_T', pValueMantissa=3.7920000553131104, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102286, standardError=0.00185966, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99874186_C_CT', pValueMantissa=1.2230000495910645, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100561, standardError=0.00190097, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99875195_T_C', pValueMantissa=4.964000225067139, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112867, standardError=0.00192988, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99875202_G_T', pValueMantissa=1.312999963760376, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100339, standardError=0.00190142, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99875320_C_A', pValueMantissa=1.2369999885559082, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100535, standardError=0.00190119, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99875381_T_C', pValueMantissa=1.2410000562667847, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100525, standardError=0.00190121, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99875452_C_T', pValueMantissa=2.5290000438690186, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0135056, standardError=0.00447237, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99875704_T_A', pValueMantissa=2.9600000381469727, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00410906, standardError=0.00188882, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99875760_A_AG', pValueMantissa=1.2430000305175781, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010052, standardError=0.00190125, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99876031_G_GT', pValueMantissa=1.3940000534057617, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0240895, standardError=0.00753787, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99876891_T_C', pValueMantissa=3.986999988555908, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102146, standardError=0.0018601, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99878758_G_A', pValueMantissa=2.763000011444092, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.151851, standardError=0.0689453, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99878850_C_G', pValueMantissa=4.1519999504089355, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102024, standardError=0.0018603, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99880039_G_A', pValueMantissa=4.0960001945495605, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102077, standardError=0.00186045, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99880120_A_C', pValueMantissa=4.75, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0113026, standardError=0.00193019, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99880736_G_C', pValueMantissa=2.4639999866485596, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.11912, standardError=0.039343, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99882938_G_A', pValueMantissa=4.218999862670898, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102002, standardError=0.00186086, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99883489_C_T', pValueMantissa=7.486000061035156, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0140267, standardError=0.00524475, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99884754_C_T', pValueMantissa=2.5510001182556152, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011665, standardError=0.00195783, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99885306_C_T', pValueMantissa=7.618000030517578, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0122854, standardError=0.00460381, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99885741_T_C', pValueMantissa=4.251999855041504, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101963, standardError=0.00186062, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99887218_A_G', pValueMantissa=1.3589999675750732, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100249, standardError=0.00190197, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99887854_C_T', pValueMantissa=1.3580000400543213, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010025, standardError=0.00190197, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99888103_G_A', pValueMantissa=3.808000087738037, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0456176, standardError=0.0219955, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99889239_G_C', pValueMantissa=1.3580000400543213, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100256, standardError=0.00190207, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99890983_C_T', pValueMantissa=1.3830000162124634, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.01002, standardError=0.00190223, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99891336_A_G', pValueMantissa=3.815000057220459, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102374, standardError=0.00186162, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99891408_C_T', pValueMantissa=1.6360000371932983, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00996155, standardError=0.00190233, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99891590_G_A', pValueMantissa=6.8420000076293945, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114132, standardError=0.00422021, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99891697_T_C', pValueMantissa=4.46999979019165, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010205, standardError=0.00186523, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99892983_T_C', pValueMantissa=3.4000000953674316, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.177641, standardError=0.083791, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99893179_G_C', pValueMantissa=5.1570000648498535, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112781, standardError=0.00193051, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99893428_C_T', pValueMantissa=4.14300012588501, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102059, standardError=0.00186081, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99894094_G_C', pValueMantissa=4.321000099182129, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0249459, standardError=0.00708754, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99894492_G_T', pValueMantissa=8.531999588012695, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101123, standardError=0.00188821, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99894565_A_G', pValueMantissa=4.785999774932861, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00954874, standardError=0.00273411, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99894939_T_C', pValueMantissa=8.916999816894531, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00988844, standardError=0.00184916, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99895167_T_C', pValueMantissa=4.169000148773193, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00948941, standardError=0.00187497, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99895555_C_T', pValueMantissa=4.659999847412109, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0340557, standardError=0.0120355, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99896225_G_A', pValueMantissa=1.0920000076293945, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110344, standardError=0.0019305, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99896692_A_G', pValueMantissa=3.4130001068115234, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110917, standardError=0.00187665, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99896979_T_G', pValueMantissa=3.2119998931884766, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0401381, standardError=0.0187308, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99897831_T_C', pValueMantissa=1.9040000438690186, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.14616, standardError=0.0623359, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99898123_G_A', pValueMantissa=1.1069999933242798, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110304, standardError=0.00193059, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99898484_T_A', pValueMantissa=1.1139999628067017, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110285, standardError=0.00193061, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99898828_T_C', pValueMantissa=3.2699999809265137, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111045, standardError=0.00187657, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99899677_C_CTTGT', pValueMantissa=3.51200008392334, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110858, standardError=0.00187714, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99900290_T_G', pValueMantissa=7.426000118255615, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0109405, standardError=0.00189249, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99900292_A_AT', pValueMantissa=1.1360000371932983, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00988613, standardError=0.00390553, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99900500_A_C', pValueMantissa=2.2009999752044678, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0412365, standardError=0.0180062, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99901642_T_C', pValueMantissa=5.086999893188477, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.113465, standardError=0.0405014, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99901731_C_T', pValueMantissa=1.5379999876022339, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0140283, standardError=0.00442901, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99902496_G_A', pValueMantissa=1.2289999723434448, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0224604, standardError=0.00897057, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99902603_C_T', pValueMantissa=4.140999794006348, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0854585, standardError=0.0419028, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99902712_G_C', pValueMantissa=1.1929999589920044, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110082, standardError=0.00193099, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99902720_C_G', pValueMantissa=3.492000102996826, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011087, standardError=0.00187705, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99902757_G_A', pValueMantissa=4.366000175476074, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110198, standardError=0.00187741, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99903008_A_C', pValueMantissa=3.4590001106262207, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.01109, standardError=0.00187705, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99903292_G_A', pValueMantissa=3.4660000801086426, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110895, standardError=0.00187709, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99903681_G_T', pValueMantissa=6.072000026702881, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110979, standardError=0.00190858, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99904413_A_G', pValueMantissa=3.4719998836517334, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110894, standardError=0.00187714, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99904499_G_A', pValueMantissa=1.1369999647140503, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110237, standardError=0.00193097, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99904588_G_A', pValueMantissa=9.54800033569336, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0142134, standardError=0.00430252, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99905340_C_T', pValueMantissa=1.184000015258789, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110128, standardError=0.00193138, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99905534_G_C', pValueMantissa=1.2209999561309814, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0379664, standardError=0.0151507, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99905541_A_G', pValueMantissa=3.575000047683716, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110805, standardError=0.00187717, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99905693_A_G', pValueMantissa=3.490999937057495, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110881, standardError=0.00187721, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99906210_T_C', pValueMantissa=3.5, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110874, standardError=0.00187724, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99906644_G_A', pValueMantissa=1.0570000410079956, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110495, standardError=0.00193126, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99906718_T_C', pValueMantissa=3.4709999561309814, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110901, standardError=0.00187726, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99908035_C_T', pValueMantissa=7.296000003814697, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0148557, standardError=0.00439751, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99908057_A_G', pValueMantissa=7.160999774932861, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0149937, standardError=0.00443164, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99908221_T_C', pValueMantissa=3.878000020980835, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110578, standardError=0.0018776, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99908813_T_C', pValueMantissa=5.124000072479248, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112768, standardError=0.00192994, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99909229_T_C', pValueMantissa=8.24899959564209, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00998922, standardError=0.0018631, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99909454_A_C', pValueMantissa=7.598999977111816, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0109359, standardError=0.00189297, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99910263_C_T', pValueMantissa=5.330999851226807, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111417, standardError=0.00190898, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99910368_T_C', pValueMantissa=3.302999973297119, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011106, standardError=0.00187736, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99910580_T_G', pValueMantissa=1.1369999647140503, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110251, standardError=0.00193122, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99911338_T_C', pValueMantissa=3.4579999446868896, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011092, standardError=0.00187738, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99911811_G_A', pValueMantissa=2.575000047683716, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.118741, standardError=0.0393917, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99912354_A_G', pValueMantissa=5.099999904632568, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112784, standardError=0.00192997, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99912584_A_G', pValueMantissa=3.447999954223633, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011093, standardError=0.00187739, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99913097_C_G', pValueMantissa=3.181999921798706, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111182, standardError=0.00187747, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99913702_T_C', pValueMantissa=1.0369999408721924, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0108274, standardError=0.00422404, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99914373_T_G', pValueMantissa=3.0360000133514404, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.248118, standardError=0.114583, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99915372_A_G', pValueMantissa=3.250999927520752, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111123, standardError=0.0018776, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99915373_G_A', pValueMantissa=3.3289999961853027, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111042, standardError=0.00187747, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99915884_T_C', pValueMantissa=5.684999942779541, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0109318, standardError=0.00187645, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99915918_T_G', pValueMantissa=1.281000018119812, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00983696, standardError=0.00186253, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99916127_T_C', pValueMantissa=5.081999778747559, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112798, standardError=0.00193, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99916503_A_T', pValueMantissa=6.974999904632568, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00734427, standardError=0.00184662, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99916552_T_C', pValueMantissa=2.9839999675750732, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0122137, standardError=0.00562257, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99917731_C_T', pValueMantissa=5.0289998054504395, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112837, standardError=0.0019301, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99919106_C_T', pValueMantissa=2.8299999237060547, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0131403, standardError=0.00440112, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99920364_C_T', pValueMantissa=4.159999847412109, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0219416, standardError=0.0076565, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99921641_GC_G', pValueMantissa=5.14900016784668, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112777, standardError=0.00193036, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99922010_GAAGA_G', pValueMantissa=3.134999990463257, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0255599, standardError=0.00709237, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99922205_A_G', pValueMantissa=8.65999984741211, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00997687, standardError=0.00186386, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99922419_C_T', pValueMantissa=5.785999774932861, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112417, standardError=0.00193063, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99923392_A_AAAT', pValueMantissa=7.868000030517578, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010012, standardError=0.00186439, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99924208_C_T', pValueMantissa=6.610000133514404, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112121, standardError=0.00193293, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99924251_CACGGTGAA_C', pValueMantissa=5.901000022888184, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112486, standardError=0.0019329, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99924611_T_C', pValueMantissa=3.5799999237060547, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.028521, standardError=0.0135863, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99924854_A_G', pValueMantissa=4.103000164031982, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.135756, standardError=0.0664409, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99925030_G_A', pValueMantissa=7.132999897003174, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0123943, standardError=0.00460661, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99925171_T_C', pValueMantissa=2.7660000324249268, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0131669, standardError=0.00439981, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99925527_C_T', pValueMantissa=3.3429999351501465, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011112, standardError=0.00187899, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99926307_T_C', pValueMantissa=1.2309999465942383, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00985993, standardError=0.0018643, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99926344_G_A', pValueMantissa=7.146999835968018, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0123917, standardError=0.00460668, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99928121_C_T', pValueMantissa=2.6080000400543213, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0237748, standardError=0.0106854, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99928961_T_A', pValueMantissa=7.409999847412109, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0092492, standardError=0.00274137, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99929621_G_C', pValueMantissa=2.4159998893737793, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00965232, standardError=0.00186913, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99930293_C_T', pValueMantissa=2.371000051498413, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00891412, standardError=0.00188903, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99930622_A_G', pValueMantissa=2.9200000762939453, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0122713, standardError=0.0019469, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99930880_C_G', pValueMantissa=6.189000129699707, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0126631, standardError=0.00193652, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99931871_G_T', pValueMantissa=1.0870000123977661, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132779, standardError=0.00195421, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99932501_G_T', pValueMantissa=7.144999980926514, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130514, standardError=0.0020025, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99932525_C_G', pValueMantissa=3.86299991607666, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132541, standardError=0.00190924, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99932591_G_A', pValueMantissa=3.625999927520752, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0057824, standardError=0.00276148, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99933095_G_A', pValueMantissa=6.820000171661377, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.041325, standardError=0.0152743, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99933333_G_A', pValueMantissa=7.736000061035156, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130529, standardError=0.00190738, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99933398_T_G', pValueMantissa=1.1490000486373901, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132623, standardError=0.00195424, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99933499_T_C', pValueMantissa=7.710999965667725, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130536, standardError=0.00190735, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99933515_AG_A', pValueMantissa=6.0929999351501465, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130982, standardError=0.00200234, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99933811_A_C', pValueMantissa=3.0450000762939453, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0271544, standardError=0.009164, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99933841_A_G', pValueMantissa=7.820000171661377, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130497, standardError=0.00190735, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99935601_G_A', pValueMantissa=6.01200008392334, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131034, standardError=0.00200253, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99935839_G_A', pValueMantissa=1.9040000438690186, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0088457, standardError=0.00185702, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99936985_A_C', pValueMantissa=8.661999702453613, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130204, standardError=0.00190715, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99937543_C_A', pValueMantissa=6.139999866485596, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130966, standardError=0.00200245, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99938738_G_A', pValueMantissa=1.0329999923706055, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132869, standardError=0.00195344, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99939547_T_C', pValueMantissa=6.979000091552734, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130948, standardError=0.00190939, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99939652_G_A', pValueMantissa=4.034999847412109, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132546, standardError=0.00191099, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99940864_C_T', pValueMantissa=5.973999977111816, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131208, standardError=0.0020049, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99941429_ACCTCAGGGTTACC_A', pValueMantissa=4.370999813079834, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.187402, standardError=0.0929166, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99941625_T_C', pValueMantissa=7.7729997634887695, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130649, standardError=0.00190932, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99942056_T_C', pValueMantissa=4.118000030517578, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0832392, standardError=0.0407701, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99942622_C_G', pValueMantissa=4.414999961853027, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132329, standardError=0.00191137, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99942695_T_C', pValueMantissa=5.933000087738037, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131229, standardError=0.0020049, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99942879_G_C', pValueMantissa=4.004000186920166, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132567, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99943051_G_T', pValueMantissa=5.098999977111816, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0153013, standardError=0.00335503, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99943272_T_C', pValueMantissa=4.004000186920166, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132567, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99943700_G_A', pValueMantissa=1.340000033378601, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0629333, standardError=0.0254482, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99943766_A_G', pValueMantissa=3.8440001010894775, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0136334, standardError=0.00196368, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99944281_T_C', pValueMantissa=7.932000160217285, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130606, standardError=0.00190951, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99944562_A_T', pValueMantissa=4.39300012588501, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0135957, standardError=0.0019636, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99944721_TC_T', pValueMantissa=4.39300012588501, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0135957, standardError=0.0019636, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99945215_A_G', pValueMantissa=4.004000186920166, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132567, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99946139_G_A', pValueMantissa=1.3140000104904175, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00899244, standardError=0.00185887, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99946260_A_G', pValueMantissa=4.004000186920166, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132567, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99946307_A_C', pValueMantissa=4.873000144958496, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.12509, standardError=0.063468, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99946514_A_C', pValueMantissa=4.004000186920166, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132567, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99946634_G_A', pValueMantissa=4.004000186920166, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132567, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99947232_A_G', pValueMantissa=5.947999954223633, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131222, standardError=0.00200491, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99947807_T_C', pValueMantissa=4.004000186920166, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132567, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99947876_G_A', pValueMantissa=4.059000015258789, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132552, standardError=0.00191131, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99948681_A_G', pValueMantissa=7.769999980926514, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130649, standardError=0.00190932, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99949062_G_A', pValueMantissa=4.873000144958496, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.12509, standardError=0.063468, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99950056_G_A', pValueMantissa=4.761000156402588, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.107573, standardError=0.0543081, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99952120_T_C', pValueMantissa=1.062000036239624, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0236237, standardError=0.00721645, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99953194_G_A', pValueMantissa=5.874000072479248, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131262, standardError=0.00200494, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99953842_C_T', pValueMantissa=4.118000030517578, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132617, standardError=0.00191282, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99953915_A_G', pValueMantissa=3.8420000076293945, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132682, standardError=0.00191106, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99954520_G_A', pValueMantissa=6.242000102996826, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131087, standardError=0.00200505, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99954533_T_G', pValueMantissa=4.175000190734863, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132459, standardError=0.00191108, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99956142_G_A', pValueMantissa=4.054999828338623, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132533, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99956727_G_A', pValueMantissa=2.9579999446868896, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0132311, standardError=0.00445185, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99958027_A_G', pValueMantissa=7.925000190734863, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130596, standardError=0.00190933, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99959825_G_A', pValueMantissa=4.172999858856201, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0163596, standardError=0.00803454, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99959911_G_A', pValueMantissa=2.3239998817443848, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0134674, standardError=0.00201484, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99962154_T_G', pValueMantissa=6.442999839782715, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130988, standardError=0.002005, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99962896_A_G', pValueMantissa=4.985000133514404, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.018755, standardError=0.00956281, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99963747_T_C', pValueMantissa=3.8350000381469727, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.10415, standardError=0.0360205, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99963755_G_A', pValueMantissa=4.4070000648498535, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132354, standardError=0.00191167, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99963792_C_T', pValueMantissa=1.0329999923706055, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133147, standardError=0.0019575, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99963855_G_A', pValueMantissa=6.202000141143799, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131113, standardError=0.00200516, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99963858_G_C', pValueMantissa=4.052000045776367, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132537, standardError=0.00191104, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99964474_C_A', pValueMantissa=1.5709999799728394, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.013808, standardError=0.0020483, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99964716_C_T', pValueMantissa=3.6579999923706055, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.022367, standardError=0.00769613, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99965353_C_T', pValueMantissa=4.11899995803833, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0832401, standardError=0.0407717, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99965381_C_T', pValueMantissa=3.3989999294281006, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0154555, standardError=0.00728972, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99965782_C_T', pValueMantissa=1.2259999513626099, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0312055, standardError=0.00547846, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99966311_G_A', pValueMantissa=4.085000038146973, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132514, standardError=0.00191102, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99966874_G_A', pValueMantissa=9.85099983215332, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133219, standardError=0.00195661, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99967655_A_C', pValueMantissa=4.085999965667725, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132548, standardError=0.00191151, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99968174_T_C', pValueMantissa=6.03000020980835, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131184, standardError=0.00200495, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99970196_C_T', pValueMantissa=1.940999984741211, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0134874, standardError=0.00191619, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99971874_A_G', pValueMantissa=4.980999946594238, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132011, standardError=0.0019115, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99973080_C_CATTT', pValueMantissa=9.527999877929688, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00936982, standardError=0.00191176, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99973080_CATTT_C', pValueMantissa=3.311000108718872, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.013347, standardError=0.00191661, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99973360_A_G', pValueMantissa=1.6030000448226929, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0940323, standardError=0.0390452, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99973664_T_C', pValueMantissa=4.126999855041504, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.013249, standardError=0.00191107, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99973694_T_C', pValueMantissa=3.937000036239624, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.13666, standardError=0.0663293, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99973733_A_G', pValueMantissa=4.34499979019165, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132363, standardError=0.00191125, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99974260_C_T', pValueMantissa=2.316999912261963, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0316527, standardError=0.0139405, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99975197_T_C', pValueMantissa=2.117000102996826, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0134651, standardError=0.00191633, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99975489_C_G', pValueMantissa=5.743000030517578, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0354502, standardError=0.0128345, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99976504_G_A', pValueMantissa=8.508999824523926, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0819298, standardError=0.031138, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99977161_A_G', pValueMantissa=4.061999797821045, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132532, standardError=0.00191106, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99977721_G_A', pValueMantissa=6.2829999923706055, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0246959, standardError=0.00903733, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99979375_G_A', pValueMantissa=3.937000036239624, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.13666, standardError=0.0663293, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99979565_C_T', pValueMantissa=4.479000091552734, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0275515, standardError=0.0137302, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99979592_G_A', pValueMantissa=3.3989999294281006, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133022, standardError=0.00191119, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99980479_A_C', pValueMantissa=7.7230000495910645, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0930047, standardError=0.0349125, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99980767_T_TG', pValueMantissa=3.1600000858306885, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133205, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99981303_T_C', pValueMantissa=1.0260000228881836, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0129016, standardError=0.0019962, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99982408_C_T', pValueMantissa=4.807000160217285, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131845, standardError=0.00200471, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99982941_G_A', pValueMantissa=3.236999988555908, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133135, standardError=0.00191092, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99983547_G_T', pValueMantissa=1.378000020980835, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0138444, standardError=0.00204793, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99983965_C_T', pValueMantissa=1.3339999914169312, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.276188, standardError=0.0722979, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99984671_A_G', pValueMantissa=1.090999960899353, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.012885, standardError=0.00199651, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99984912_C_T', pValueMantissa=3.328000068664551, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133085, standardError=0.00191128, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99986015_T_C', pValueMantissa=1.0609999895095825, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.012891, standardError=0.00199615, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99988026_G_T', pValueMantissa=7.709000110626221, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0122774, standardError=0.0046077, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99988174_G_A', pValueMantissa=3.121999979019165, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133243, standardError=0.0019111, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99988593_A_C', pValueMantissa=4.09499979019165, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0833533, standardError=0.0407789, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99988730_T_C', pValueMantissa=3.1760001182556152, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133191, standardError=0.00191101, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99988731_G_A', pValueMantissa=5.669000148773193, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0144037, standardError=0.00520681, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99988980_A_G', pValueMantissa=4.874000072479248, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131807, standardError=0.00200475, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99989148_T_C', pValueMantissa=4.875, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131806, standardError=0.00200475, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99989685_T_C', pValueMantissa=4.556000232696533, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132006, standardError=0.00200472, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99989906_C_T', pValueMantissa=3.2639999389648438, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133133, standardError=0.00191121, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99990445_G_A', pValueMantissa=6.452000141143799, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.090219, standardError=0.0331213, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99990521_G_A', pValueMantissa=2.9019999504089355, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.013345, standardError=0.00191124, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99990640_T_A', pValueMantissa=1.5579999685287476, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0139907, standardError=0.00442234, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99992006_G_GA', pValueMantissa=3.0260000228881836, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133329, standardError=0.00191111, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99992015_T_C', pValueMantissa=3.5850000381469727, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.013291, standardError=0.00191165, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99992806_A_G', pValueMantissa=3.0290000438690186, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133327, standardError=0.00191112, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99993453_A_G', pValueMantissa=3.0329999923706055, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133323, standardError=0.00191113, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99993852_T_C', pValueMantissa=3.0360000133514404, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133322, standardError=0.00191113, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99994177_C_T', pValueMantissa=4.533999919891357, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132032, standardError=0.00200489, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99994349_A_T', pValueMantissa=1.0379999876022339, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0287633, standardError=0.00422921, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99995001_T_C', pValueMantissa=3.2860000133514404, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133152, standardError=0.00191176, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99995032_G_C', pValueMantissa=4.89900016784668, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131854, standardError=0.00200569, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99995714_T_C', pValueMantissa=3.374000072479248, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133048, standardError=0.00191128, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99995753_C_CTT', pValueMantissa=3.372999906539917, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133049, standardError=0.00191128, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99996207_C_G', pValueMantissa=3.384000062942505, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133041, standardError=0.00191129, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99997268_A_G', pValueMantissa=3.378999948501587, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133047, standardError=0.00191132, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99997438_CCA_C', pValueMantissa=6.603000164031982, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.109829, standardError=0.0404341, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99997720_A_G', pValueMantissa=2.815999984741211, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133548, standardError=0.00191149, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99998079_C_CA', pValueMantissa=4.61299991607666, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0922906, standardError=0.0462786, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99998104_G_A', pValueMantissa=1.2979999780654907, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00911911, standardError=0.0018841, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99999971_A_G', pValueMantissa=2.9570000171661377, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.030431, standardError=0.0102384, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100000235_C_T', pValueMantissa=5.6519999504089355, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.014492, standardError=0.00201036, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100000486_G_A', pValueMantissa=4.172999858856201, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.305198, standardError=0.149885, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100002038_G_A', pValueMantissa=3.7739999294281006, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0211624, standardError=0.0073064, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100002628_A_C', pValueMantissa=2.953000068664551, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0138093, standardError=0.00189258, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100004140_G_A', pValueMantissa=2.6459999084472656, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0994314, standardError=0.0448022, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100004827_A_C', pValueMantissa=6.945000171661377, pValueExponent=-14, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0142296, standardError=0.0019001, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100005233_T_C', pValueMantissa=3.111999988555908, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0216712, standardError=0.00733013, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100005358_G_C', pValueMantissa=4.872000217437744, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0145472, standardError=0.00201239, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100005438_A_G', pValueMantissa=1.093000054359436, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0533162, standardError=0.02095, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100006780_C_T', pValueMantissa=5.067999839782715, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0145408, standardError=0.002013, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100007241_C_T', pValueMantissa=6.783999919891357, pValueExponent=-14, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0142505, standardError=0.0019021, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100007404_G_GC', pValueMantissa=1.3420000076293945, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0198944, standardError=0.00377285, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100008640_A_G', pValueMantissa=5.019999980926514, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0145435, standardError=0.002013, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100009013_G_A', pValueMantissa=6.611999988555908, pValueExponent=-14, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0142475, standardError=0.00190084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100009635_T_G', pValueMantissa=1.0549999475479126, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0134968, standardError=0.0018948, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100011477_C_G', pValueMantissa=2.444999933242798, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0148805, standardError=0.00203232, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100011685_CAAAT_C', pValueMantissa=4.139999866485596, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101715, standardError=0.00185449, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100011812_A_T', pValueMantissa=1.2029999494552612, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0141145, standardError=0.00190314, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100013275_A_T', pValueMantissa=2.174999952316284, pValueExponent=-14, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0145924, standardError=0.00191005, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100013744_G_A', pValueMantissa=4.76800012588501, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.51719, standardError=0.261177, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100014566_A_G', pValueMantissa=8.550999641418457, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119513, standardError=0.00358432, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100014970_T_C', pValueMantissa=3.993000030517578, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0146099, standardError=0.00201355, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100015400_G_A', pValueMantissa=5.646999835968018, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0160357, standardError=0.00222448, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100015645_T_C', pValueMantissa=2.236999988555908, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0145559, standardError=0.00207384, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100015854_C_T', pValueMantissa=6.158999919891357, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0160044, standardError=0.0022238, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100016635_T_G', pValueMantissa=1.718999981880188, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0147181, standardError=0.00218755, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100016645_C_T', pValueMantissa=4.622000217437744, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0256776, standardError=0.00906606, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100017027_A_T', pValueMantissa=6.242000102996826, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0160069, standardError=0.0022247, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100017624_A_G', pValueMantissa=1.7170000076293945, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0147203, standardError=0.00218782, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100018116_C_T', pValueMantissa=2.197000026702881, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0145668, standardError=0.00207465, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100018135_G_C', pValueMantissa=1.4559999704360962, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.275534, standardError=0.0725377, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100018265_A_T', pValueMantissa=2.7190001010894775, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00874366, standardError=0.00186392, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100020857_T_C', pValueMantissa=9.112000465393066, pValueExponent=-14, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0156784, standardError=0.00210358, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100021675_C_A', pValueMantissa=3.066999912261963, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0233385, standardError=0.0107983, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100021907_C_T', pValueMantissa=7.229000091552734, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0159723, standardError=0.00222609, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100022313_C_CT', pValueMantissa=6.171999931335449, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0504229, standardError=0.0184125, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100023134_T_G', pValueMantissa=1.3980000019073486, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0268617, standardError=0.0109298, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100024168_G_A', pValueMantissa=2.3359999656677246, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00880598, standardError=0.00186491, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100024869_C_T', pValueMantissa=3.556999921798706, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0162101, standardError=0.00222929, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100025927_T_TA', pValueMantissa=3.621999979019165, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.014426, standardError=0.00207534, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100026878_C_T', pValueMantissa=9.258999824523926, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0185461, standardError=0.00712673, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100027354_G_A', pValueMantissa=1.968000054359436, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.103617, standardError=0.0444235, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100027592_G_C', pValueMantissa=1.965999960899353, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.103629, standardError=0.0444216, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100027903_T_C', pValueMantissa=2.75600004196167, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0384662, standardError=0.0128487, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100028049_C_T', pValueMantissa=4.144000053405762, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.130023, standardError=0.0637634, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100029663_A_G', pValueMantissa=8.883999824523926, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0147895, standardError=0.00206939, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100030853_G_A', pValueMantissa=7.5980000495910645, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0158245, standardError=0.00220759, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100030993_G_A', pValueMantissa=3.437000036239624, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0224965, standardError=0.00768921, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100031808_C_T', pValueMantissa=5.959000110626221, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0212214, standardError=0.00771671, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100032684_A_C', pValueMantissa=5.459000110626221, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.014926, standardError=0.00206922, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100033239_T_A', pValueMantissa=1.3669999837875366, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0463194, standardError=0.0187842, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100033337_C_T', pValueMantissa=9.138999938964844, pValueExponent=-14, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0165264, standardError=0.00221749, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100033635_G_GT', pValueMantissa=1.0110000371932983, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0164902, standardError=0.00221658, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100035604_T_A', pValueMantissa=3.4110000133514404, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.01433, standardError=0.002059, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100037190_G_A', pValueMantissa=3.2070000171661377, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0120692, standardError=0.00203849, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100037544_C_T', pValueMantissa=1.6990000009536743, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00996527, standardError=0.00417483, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100037655_A_G', pValueMantissa=6.13100004196167, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0127317, standardError=0.00219016, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100038188_C_T', pValueMantissa=7.738999843597412, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0126728, standardError=0.00219478, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100038589_T_C', pValueMantissa=3.734999895095825, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010628, standardError=0.00193133, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100038648_G_A', pValueMantissa=1.7230000495910645, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00994355, standardError=0.00417478, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100039544_T_C', pValueMantissa=5.021999835968018, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0126227, standardError=0.00202988, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100039545_G_T', pValueMantissa=1.5509999990463257, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130939, standardError=0.00204582, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100039976_C_T', pValueMantissa=1.7209999561309814, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00994495, standardError=0.00417473, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100039997_T_C', pValueMantissa=5.01800012588501, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0126197, standardError=0.00202937, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100040770_T_G', pValueMantissa=3.059999942779541, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.120454, standardError=0.0557085, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100042338_T_C', pValueMantissa=2.61899995803833, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.189007, standardError=0.085007, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100042505_G_A', pValueMantissa=3.4860000610351562, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101879, standardError=0.00482832, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100042848_C_CT', pValueMantissa=4.413000106811523, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0093599, standardError=0.00185336, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100043308_C_T', pValueMantissa=5.076000213623047, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0126162, standardError=0.00202939, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100044357_C_T', pValueMantissa=5.36299991607666, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0127797, standardError=0.00218999, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100044501_A_G', pValueMantissa=1.7130000591278076, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00995148, standardError=0.00417441, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100044818_C_T', pValueMantissa=4.927000045776367, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0204248, standardError=0.00726406, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100044839_G_T', pValueMantissa=4.875, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0126274, standardError=0.00202913, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100045047_A_C', pValueMantissa=4.922999858856201, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0204266, standardError=0.00726404, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100045603_T_A', pValueMantissa=1.6740000247955322, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00998325, standardError=0.00417297, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100045685_C_T', pValueMantissa=1.3309999704360962, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131383, standardError=0.00204533, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100045864_G_A', pValueMantissa=1.6770000457763672, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00998092, standardError=0.00417295, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100046495_A_G', pValueMantissa=4.683000087738037, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133938, standardError=0.00203532, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100046569_C_T', pValueMantissa=1.6740000247955322, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0129058, standardError=0.00202014, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100047823_TA_T', pValueMantissa=1.5670000314712524, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0129238, standardError=0.00201974, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100049106_G_A', pValueMantissa=1.690000057220459, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130999, standardError=0.00217423, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100049236_C_T', pValueMantissa=1.690000057220459, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00996986, standardError=0.00417357, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100049902_G_A', pValueMantissa=1.8940000534057617, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130591, standardError=0.00217409, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100050211_A_T', pValueMantissa=2.1419999599456787, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0633556, standardError=0.0275398, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100050359_GAACA_G', pValueMantissa=1.6019999980926514, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0129195, standardError=0.00202015, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100051409_C_G', pValueMantissa=1.7120000123977661, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130944, standardError=0.00217407, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100051504_G_A', pValueMantissa=1.7549999952316284, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00990516, standardError=0.00417051, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100052545_C_G', pValueMantissa=1.5299999713897705, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0129286, standardError=0.00201936, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100052742_T_C', pValueMantissa=1.687000036239624, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00997036, standardError=0.00417237, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100054045_C_T', pValueMantissa=1.3009999990463257, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0129769, standardError=0.0020191, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100054117_T_C', pValueMantissa=4.938000202178955, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0716183, standardError=0.0364414, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100054574_T_G', pValueMantissa=1.6629999876022339, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00999072, standardError=0.00417196, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100055122_T_C', pValueMantissa=6.368000030517578, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0124774, standardError=0.00214876, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100055153_G_A', pValueMantissa=1.6540000438690186, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00999801, standardError=0.0041714, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100055199_C_G', pValueMantissa=1.8609999418258667, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130598, standardError=0.00217319, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100055816_G_A', pValueMantissa=3.3589999675750732, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0126847, standardError=0.00201947, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100055862_T_G', pValueMantissa=4.1529998779296875, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0446939, standardError=0.0219282, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100056323_C_T', pValueMantissa=4.943999767303467, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0267439, standardError=0.0136118, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100056440_C_T', pValueMantissa=4.486999988555908, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00779499, standardError=0.00388601, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100057584_A_G', pValueMantissa=5.919000148773193, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011644, standardError=0.00200103, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100057785_T_C', pValueMantissa=4.816999912261963, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0128495, standardError=0.00219523, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100057800_C_T', pValueMantissa=1.0049999952316284, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0122603, standardError=0.00200707, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100059207_A_C', pValueMantissa=3.622999906539917, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0143322, standardError=0.00401899, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100059401_T_TAACAAC', pValueMantissa=3.5920000076293945, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00695612, standardError=0.00194938, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100059401_T_TAACAACAAC', pValueMantissa=3.884999990463257, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133237, standardError=0.00644961, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100059657_G_A', pValueMantissa=3.880000114440918, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133275, standardError=0.00645004, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100060102_G_C', pValueMantissa=3.874000072479248, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133327, standardError=0.00645048, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100060272_A_G', pValueMantissa=4.5370001792907715, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0594699, standardError=0.0297173, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100060908_A_G', pValueMantissa=2.742000102996826, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0113896, standardError=0.00204953, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100061298_C_T', pValueMantissa=2.503000020980835, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0150616, standardError=0.00672123, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100061574_G_C', pValueMantissa=2.7809998989105225, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0113862, standardError=0.00204983, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100061627_C_T', pValueMantissa=1.6690000295639038, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0323984, standardError=0.00537545, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100061926_T_C', pValueMantissa=1.1759999990463257, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0122155, standardError=0.00200796, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100062022_C_T', pValueMantissa=9.611000061035156, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0212829, standardError=0.00821882, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100062212_C_T', pValueMantissa=3.7899999618530273, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133927, standardError=0.00645157, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100062744_C_A', pValueMantissa=3.752000093460083, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0134223, standardError=0.00645282, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100062935_C_T', pValueMantissa=3.7669999599456787, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0134118, standardError=0.00645294, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100063173_T_G', pValueMantissa=2.7899999618530273, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011387, standardError=0.00205019, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100063265_C_T', pValueMantissa=2.61299991607666, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0142012, standardError=0.00638472, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100063639_C_T', pValueMantissa=2.7750000953674316, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0276973, standardError=0.0125851, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100063786_T_C', pValueMantissa=2.9630000591278076, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011365, standardError=0.00205011, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100064214_T_C', pValueMantissa=5.559999942779541, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0128066, standardError=0.00219686, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100064301_T_C', pValueMantissa=2.427999973297119, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0540203, standardError=0.0239811, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100064486_T_C', pValueMantissa=2.484999895095825, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0271265, standardError=0.00428678, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100064533_A_G', pValueMantissa=8.324000358581543, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00771332, standardError=0.00196024, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100064544_T_C', pValueMantissa=8.236000061035156, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00771787, standardError=0.00196013, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100065403_C_G', pValueMantissa=1.7259999513626099, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00799338, standardError=0.00185995, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100066509_T_C', pValueMantissa=3.444999933242798, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0925747, standardError=0.0437751, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100066929_C_A', pValueMantissa=4.581999778747559, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0250808, standardError=0.012559, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100067656_G_A', pValueMantissa=2.6489999294281006, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143151, standardError=0.00645107, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100067862_G_A', pValueMantissa=7.921999931335449, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0119879, standardError=0.00195036, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100068504_C_T', pValueMantissa=6.7170000076293945, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00746427, standardError=0.00187258, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100068843_C_A', pValueMantissa=1.2430000305175781, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115969, standardError=0.00203679, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100068866_G_GT', pValueMantissa=1.312000036239624, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115787, standardError=0.00203689, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100069305_A_T', pValueMantissa=2.6449999809265137, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143181, standardError=0.00645098, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100069570_T_C', pValueMantissa=2.6549999713897705, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0151566, standardError=0.00504359, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100069757_C_T', pValueMantissa=2.5869998931884766, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0145205, standardError=0.00481941, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100070175_A_G', pValueMantissa=2.000999927520752, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0130301, standardError=0.00421666, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100072279_G_C', pValueMantissa=1.2419999837875366, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115973, standardError=0.00203683, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100073428_G_T', pValueMantissa=2.609999895095825, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143509, standardError=0.00645062, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100073963_C_T', pValueMantissa=1.0750000476837158, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116458, standardError=0.00203655, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100074034_T_TC', pValueMantissa=2.005000114440918, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0113741, standardError=0.0020269, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100074335_C_T', pValueMantissa=1.0729999542236328, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116458, standardError=0.00203643, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100074511_G_A', pValueMantissa=4.894000053405762, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0166749, standardError=0.00478271, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100074712_C_T', pValueMantissa=1.2990000247955322, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0487079, standardError=0.0196078, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100075164_G_A', pValueMantissa=1.2369999885559082, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115989, standardError=0.00203683, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100075228_C_T', pValueMantissa=2.6029999256134033, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143577, standardError=0.00645054, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100075751_C_A', pValueMantissa=1.7940000295639038, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0172016, standardError=0.00726754, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100076243_C_A', pValueMantissa=2.6019999980926514, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143585, standardError=0.00645053, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100077499_A_G', pValueMantissa=4.675000190734863, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00753971, standardError=0.00185191, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100077617_C_A', pValueMantissa=8.185999870300293, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.013965, standardError=0.00417311, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100078024_G_A', pValueMantissa=4.400000095367432, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0760544, standardError=0.0267049, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100078160_G_A', pValueMantissa=2.5989999771118164, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143609, standardError=0.00645053, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100078351_A_T', pValueMantissa=5.255000114440918, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0358513, standardError=0.0103395, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100078510_A_C', pValueMantissa=8.102999687194824, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0119828, standardError=0.00195067, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100078923_G_A', pValueMantissa=1.805999994277954, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0171834, standardError=0.00726738, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100079042_G_A', pValueMantissa=2.5889999866485596, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143712, standardError=0.00645058, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100080295_T_C', pValueMantissa=5.934000015258789, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0121315, standardError=0.00195916, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100080362_T_A', pValueMantissa=9.142000198364258, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0117746, standardError=0.00204921, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100081203_A_G', pValueMantissa=7.883999824523926, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0119901, standardError=0.00195049, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100082411_T_C', pValueMantissa=2.5829999446868896, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143766, standardError=0.00645057, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100083012_C_A', pValueMantissa=3.0209999084472656, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0125696, standardError=0.0019959, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100083306_C_T', pValueMantissa=2.5810000896453857, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143789, standardError=0.00645062, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100083442_A_T', pValueMantissa=4.665999889373779, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00754005, standardError=0.0018518, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100083679_C_T', pValueMantissa=2.5810000896453857, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143785, standardError=0.00645064, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100083851_G_A', pValueMantissa=7.6570000648498535, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0119987, standardError=0.00195041, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100083869_GTGGA_G', pValueMantissa=2.5769999027252197, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143832, standardError=0.00645065, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100084120_A_G', pValueMantissa=2.5739998817443848, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143853, standardError=0.00645064, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085028_A_G', pValueMantissa=1.2200000286102295, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116042, standardError=0.00203692, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085123_T_C', pValueMantissa=2.569999933242798, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143893, standardError=0.00645068, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085191_A_C', pValueMantissa=1.0609999895095825, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116492, standardError=0.00203632, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085317_T_C', pValueMantissa=2.6549999713897705, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0151555, standardError=0.00504327, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085356_CT_C', pValueMantissa=2.989000082015991, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.012573, standardError=0.00199591, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085677_T_C', pValueMantissa=7.4720001220703125, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0120068, standardError=0.0019505, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085790_C_T', pValueMantissa=9.621999740600586, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0119266, standardError=0.0019502, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085791_G_A', pValueMantissa=6.89300012588501, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0076585, standardError=0.00283442, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100086416_A_C', pValueMantissa=4.2779998779296875, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00757727, standardError=0.00185176, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100087170_C_T', pValueMantissa=4.797999858856201, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0249466, standardError=0.0126147, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100088100_T_C', pValueMantissa=1.2690000534057617, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115908, standardError=0.00203699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100088206_G_A', pValueMantissa=1.8480000495910645, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0171185, standardError=0.00726652, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100088307_G_A', pValueMantissa=8.534000396728516, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0258604, standardError=0.00421546, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100088565_A_AGAG', pValueMantissa=2.5799999237060547, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.01438, standardError=0.00645071, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100088687_C_T', pValueMantissa=4.041999816894531, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0124937, standardError=0.00199821, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100088994_G_C', pValueMantissa=4.744999885559082, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.26271, standardError=0.13253, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100089654_C_A', pValueMantissa=1.2940000295639038, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115954, standardError=0.00203897, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100090029_G_T', pValueMantissa=3.2170000076293945, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0322465, standardError=0.00583203, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100093119_C_T', pValueMantissa=3.0880000591278076, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.379152, standardError=0.0909911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100095173_CA_C', pValueMantissa=1.1920000314712524, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116127, standardError=0.00203702, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100095421_C_T', pValueMantissa=4.376999855041504, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00756736, standardError=0.00185174, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100095474_G_C', pValueMantissa=1.0429999828338623, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011657, standardError=0.00203668, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100095491_G_A', pValueMantissa=2.575000047683716, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143845, standardError=0.00645073, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100095666_A_G', pValueMantissa=2.997999906539917, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0125726, standardError=0.001996, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100095766_C_T', pValueMantissa=2.5769999027252197, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143829, standardError=0.00645074, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100096097_G_A', pValueMantissa=4.854000091552734, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.159194, standardError=0.0807015, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100096882_A_T', pValueMantissa=1.875, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0170779, standardError=0.00726586, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100097204_C_G', pValueMantissa=4.326000213623047, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00757235, standardError=0.00185173, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100097630_G_A', pValueMantissa=6.629000186920166, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0183017, standardError=0.0067411, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100097685_G_A', pValueMantissa=2.1630001068115234, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00784104, standardError=0.00341393, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100098591_G_A', pValueMantissa=7.1529998779296875, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00762397, standardError=0.00283455, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100099448_G_A', pValueMantissa=2.5399999618530273, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110489, standardError=0.00198346, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100100113_A_G', pValueMantissa=2.996000051498413, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.012573, standardError=0.00199604, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100100236_G_A', pValueMantissa=2.565000057220459, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143946, standardError=0.00645086, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100101678_G_A', pValueMantissa=1.065000057220459, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116505, standardError=0.00203676, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100101681_G_A', pValueMantissa=2.572999954223633, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143872, standardError=0.00645085, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100101734_C_T', pValueMantissa=2.805000066757202, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0388904, standardError=0.0177047, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100101865_T_C', pValueMantissa=7.827000141143799, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00939164, standardError=0.0021011, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100101978_T_A', pValueMantissa=2.565000057220459, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143954, standardError=0.00645086, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100102891_G_A', pValueMantissa=3.0280001163482666, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0125699, standardError=0.00199607, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100102937_T_C', pValueMantissa=1.0709999799728394, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116485, standardError=0.00203677, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100103034_T_C', pValueMantissa=1.062000036239624, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116512, standardError=0.00203676, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100103075_C_T', pValueMantissa=2.5480000972747803, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0144117, standardError=0.0064509, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100103232_C_T', pValueMantissa=1.059999942779541, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116519, standardError=0.00203676, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100103284_T_C', pValueMantissa=1.2389999628067017, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115998, standardError=0.00203711, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100103427_G_A', pValueMantissa=2.5409998893737793, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0144177, standardError=0.0064504, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100103653_C_A', pValueMantissa=1.2389999628067017, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116, standardError=0.00203711, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100104060_G_C', pValueMantissa=1.0570000410079956, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011653, standardError=0.00203678, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100104416_A_G', pValueMantissa=1.062999963760376, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116511, standardError=0.00203678, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100104538_T_A', pValueMantissa=3.0250000953674316, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0125704, standardError=0.0019961, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100104974_T_C', pValueMantissa=2.5460000038146973, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0144144, standardError=0.00645109, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100105358_T_A', pValueMantissa=1.246000051498413, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115981, standardError=0.00203714, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100105587_G_A', pValueMantissa=1.0700000524520874, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116489, standardError=0.0020368, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100105759_A_T', pValueMantissa=1.253000020980835, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115962, standardError=0.00203715, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100106124_T_C', pValueMantissa=1.0019999742507935, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116787, standardError=0.00203801, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100106127_T_C', pValueMantissa=1.0019999742507935, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116787, standardError=0.00203801, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100106157_C_A', pValueMantissa=2.5399999618530273, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.01442, standardError=0.00645108, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100106705_G_GGCAGAGTAA', pValueMantissa=3.0460000038146973, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0125685, standardError=0.00199613, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100107060_CG_C', pValueMantissa=1.2519999742507935, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115965, standardError=0.00203718, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100107246_C_T', pValueMantissa=1.8339999914169312, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.148529, standardError=0.0629731, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100107759_T_C', pValueMantissa=3.5490000247955322, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00765844, standardError=0.00185209, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100107761_A_G', pValueMantissa=5.459000110626221, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.272006, standardError=0.0786791, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100108013_C_T', pValueMantissa=2.561000108718872, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0243527, standardError=0.00807438, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100108920_T_G', pValueMantissa=9.07699966430664, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011727, standardError=0.00204049, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100109874_G_A', pValueMantissa=4.85099983215332, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0248793, standardError=0.0126104, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100110231_C_T', pValueMantissa=7.883999824523926, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0117687, standardError=0.00203931, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100110702_T_G', pValueMantissa=3.5929999351501465, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0606656, standardError=0.0289201, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100110795_TTA_T', pValueMantissa=6.190999984741211, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118518, standardError=0.00203936, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100111161_G_A', pValueMantissa=2.000999927520752, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0497915, standardError=0.0161136, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100111179_C_T', pValueMantissa=6.201000213623047, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118489, standardError=0.00203896, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100111991_C_A', pValueMantissa=6.7820000648498535, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118202, standardError=0.00203929, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100112419_TA_T', pValueMantissa=5.803999900817871, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118714, standardError=0.00203896, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100112828_C_T', pValueMantissa=2.8289999961853027, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0388309, standardError=0.0177045, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100112930_A_C', pValueMantissa=6.804999828338623, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118191, standardError=0.0020393, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100114067_A_G', pValueMantissa=1.0110000371932983, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104825, standardError=0.00196864, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100115072_C_T', pValueMantissa=1.034999966621399, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104694, standardError=0.00196776, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100115087_C_T', pValueMantissa=5.876999855041504, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118682, standardError=0.00203912, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100115453_T_C', pValueMantissa=1.0360000133514404, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104692, standardError=0.00196776, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100116179_G_C', pValueMantissa=5.88100004196167, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118679, standardError=0.00203913, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100116494_A_T', pValueMantissa=1.0360000133514404, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010469, standardError=0.00196776, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100116607_C_T', pValueMantissa=1.0160000324249268, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104762, standardError=0.00196781, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100116979_C_G', pValueMantissa=1.0180000066757202, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104755, standardError=0.0019678, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100117013_G_A', pValueMantissa=1.0379999876022339, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104684, standardError=0.00196777, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100118204_T_G', pValueMantissa=1.0509999990463257, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104641, standardError=0.00196778, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100118462_G_A', pValueMantissa=5.9079999923706055, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118664, standardError=0.00203914, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100118652_C_T', pValueMantissa=1.128999948501587, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104375, standardError=0.00196763, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100119929_C_G', pValueMantissa=8.840999603271484, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0121944, standardError=0.0019896, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100120722_G_A', pValueMantissa=5.441999912261963, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0106837, standardError=0.00196527, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100121497_C_T', pValueMantissa=1.5360000133514404, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.573298, standardError=0.23652, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100124748_T_C', pValueMantissa=2.8399999141693115, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0388018, standardError=0.0177043, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100125143_C_CA', pValueMantissa=3.8269999027252197, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0260846, standardError=0.0125892, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100125152_C_A', pValueMantissa=7.866000175476074, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105533, standardError=0.00196517, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100125180_C_T', pValueMantissa=7.633999824523926, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105635, standardError=0.00196509, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100125779_A_G', pValueMantissa=7.78000020980835, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105568, standardError=0.00196509, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100126618_A_C', pValueMantissa=4.631999969482422, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.133295, standardError=0.0668985, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100126748_A_G', pValueMantissa=7.800000190734863, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105558, standardError=0.00196509, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100127494_A_T', pValueMantissa=1.8029999732971191, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0082724, standardError=0.00349777, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100128204_G_GAGAGAA', pValueMantissa=5.4019999504089355, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107556, standardError=0.00197804, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100128221_A_G', pValueMantissa=6.446000099182129, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0106227, standardError=0.001965, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100128550_G_A', pValueMantissa=8.194000244140625, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105396, standardError=0.00196532, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100128651_T_C', pValueMantissa=7.315000057220459, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105796, standardError=0.00196528, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100128763_C_T', pValueMantissa=7.829999923706055, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105544, standardError=0.00196508, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100128883_C_T', pValueMantissa=8.057000160217285, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105443, standardError=0.00196508, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100128958_G_A', pValueMantissa=2.124000072479248, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00680841, standardError=0.00295533, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100129035_C_A', pValueMantissa=3.3989999294281006, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0925999, standardError=0.043676, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100129320_C_T', pValueMantissa=2.13100004196167, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00680465, standardError=0.00295531, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100129629_A_C', pValueMantissa=1.8580000400543213, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.148278, standardError=0.0629939, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100129660_T_C', pValueMantissa=8.699000358581543, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115118, standardError=0.00200055, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100129776_G_A', pValueMantissa=1.690000057220459, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0884659, standardError=0.0281741, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100130207_C_T', pValueMantissa=3.569000005722046, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0140895, standardError=0.00483524, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100130573_C_G', pValueMantissa=2.9149999618530273, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00674022, standardError=0.00186053, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100130860_A_G', pValueMantissa=9.043999671936035, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114981, standardError=0.00200046, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100131042_G_A', pValueMantissa=3.619999885559082, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.179459, standardError=0.0503203, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100131063_C_G', pValueMantissa=2.2909998893737793, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00671867, standardError=0.00295325, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100131465_G_T', pValueMantissa=8.934000015258789, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115031, standardError=0.00200061, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100132172_C_G', pValueMantissa=2.128000020980835, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00680593, standardError=0.00295533, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100132602_T_TTAAA', pValueMantissa=4.828000068664551, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00431663, standardError=0.00218576, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100132602_T_TTAAATAAA', pValueMantissa=4.5980000495910645, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0519863, standardError=0.0260513, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100132602_TTAAATAAATAAA_T', pValueMantissa=7.363999843597412, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116156, standardError=0.00200877, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100133539_G_T', pValueMantissa=2.6610000133514404, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00678474, standardError=0.00186073, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100133661_G_A', pValueMantissa=2.6619999408721924, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00678444, standardError=0.00186073, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100134226_T_TAAAC', pValueMantissa=9.397000312805176, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114918, standardError=0.00200162, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100134368_G_T', pValueMantissa=2.305999994277954, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00685323, standardError=0.0018608, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100134849_C_G', pValueMantissa=2.5420000553131104, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00680611, standardError=0.00186063, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100136849_G_A', pValueMantissa=2.4240000247955322, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00682879, standardError=0.00186061, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100137626_G_C', pValueMantissa=4.564000129699707, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.133964, standardError=0.067025, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100139939_C_T', pValueMantissa=2.2850000858306885, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0168361, standardError=0.00739727, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100140109_A_G', pValueMantissa=8.161999702453613, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116036, standardError=0.00201274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100140433_T_C', pValueMantissa=8.460000038146973, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00745523, standardError=0.00283129, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100140822_C_T', pValueMantissa=4.0370001792907715, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00653853, standardError=0.00184827, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100140993_C_T', pValueMantissa=8.553999900817871, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00744479, standardError=0.00283137, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100141769_G_C', pValueMantissa=4.497000217437744, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.134653, standardError=0.0671606, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100142018_C_T', pValueMantissa=8.651000022888184, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00615783, standardError=0.00184859, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100142970_A_T', pValueMantissa=3.749000072479248, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0279115, standardError=0.00962947, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100143128_C_A', pValueMantissa=2.7290000915527344, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0151084, standardError=0.0050416, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100143200_T_G', pValueMantissa=8.8100004196167, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00751992, standardError=0.00191775, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100143201_T_C', pValueMantissa=8.812999725341797, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00751979, standardError=0.00191775, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100143322_A_G', pValueMantissa=8.62600040435791, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0074353, standardError=0.00283084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100143412_T_C', pValueMantissa=3.306999921798706, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0244099, standardError=0.00588015, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100143655_T_C', pValueMantissa=5.8420000076293945, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0063633, standardError=0.00185041, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100144149_T_G', pValueMantissa=5.2210001945495605, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0106098, standardError=0.00194903, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100144392_A_ACGTG', pValueMantissa=1.059999942779541, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0122106, standardError=0.00200172, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100144587_G_A', pValueMantissa=4.710000038146973, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0106442, standardError=0.00194881, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100144738_C_T', pValueMantissa=3.628999948501587, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00387717, standardError=0.00185181, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100145180_CA_C', pValueMantissa=7.709000110626221, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00754349, standardError=0.00283104, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100145446_T_C', pValueMantissa=4.289000034332275, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00722839, standardError=0.0025309, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100145462_G_A', pValueMantissa=1.184999942779541, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00704211, standardError=0.00279842, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100145533_C_T', pValueMantissa=6.809999942779541, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0688508, standardError=0.0254439, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100145864_T_C', pValueMantissa=2.5959999561309814, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00556169, standardError=0.00184654, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100146576_G_A', pValueMantissa=2.239000082015991, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0102862, standardError=0.0027873, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100147206_C_G', pValueMantissa=2.24399995803833, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0102847, standardError=0.00278728, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100148436_A_G', pValueMantissa=7.232999801635742, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.01072, standardError=0.00317104, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100148745_A_G', pValueMantissa=9.111000061035156, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011124, standardError=0.00335401, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100150256_TG_T', pValueMantissa=1.621000051498413, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0058237, standardError=0.00184761, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100151385_A_G', pValueMantissa=2.200000047683716, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0102977, standardError=0.002787, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100151760_T_G', pValueMantissa=1.680999994277954, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0116223, standardError=0.00369954, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100151918_T_C', pValueMantissa=1.305999994277954, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.01191, standardError=0.00370485, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100152307_T_C', pValueMantissa=9.949999809265137, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108402, standardError=0.00189131, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100152437_C_T', pValueMantissa=1.0140000581741333, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108394, standardError=0.00189222, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100153963_AT_A', pValueMantissa=3.984999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00551895, standardError=0.00191674, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100155294_C_T', pValueMantissa=5.695000171661377, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0370698, standardError=0.0134074, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100155300_C_A', pValueMantissa=3.9560000896453857, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.013611, standardError=0.0066127, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100155337_G_C', pValueMantissa=9.121999740600586, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0129497, standardError=0.00390489, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100155409_G_C', pValueMantissa=1.305999994277954, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.01191, standardError=0.00370485, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100155608_A_G', pValueMantissa=2.359999895095825, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0102538, standardError=0.00278865, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100157105_C_T', pValueMantissa=1.9290000200271606, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114702, standardError=0.00369896, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100157116_T_A', pValueMantissa=1.4279999732971191, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00588326, standardError=0.00184495, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100157609_G_A', pValueMantissa=2.1989998817443848, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0103034, standardError=0.00278846, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100157763_C_T', pValueMantissa=3.859999895095825, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00711467, standardError=0.00343966, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100158037_G_A', pValueMantissa=4.420000076293945, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0259169, standardError=0.00910471, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100158360_A_G', pValueMantissa=1.7910000085830688, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115545, standardError=0.0037, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100158691_C_T', pValueMantissa=6.251999855041504, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0131114, standardError=0.00327532, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100159080_T_C', pValueMantissa=8.289999961853027, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0343476, standardError=0.0102747, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100159144_G_A', pValueMantissa=1.3380000591278076, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00857697, standardError=0.00346768, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100159600_GAAATCAACAATAA_G', pValueMantissa=1.7860000133514404, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115575, standardError=0.0037, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100159706_T_TTTAAA', pValueMantissa=2.239000082015991, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0102922, standardError=0.00278886, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100161011_T_C', pValueMantissa=1.305999994277954, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.01191, standardError=0.00370485, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100161641_G_T', pValueMantissa=1.305999994277954, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.01191, standardError=0.00370485, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100162586_C_T', pValueMantissa=1.5570000410079956, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0117095, standardError=0.00370097, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100162599_A_G', pValueMantissa=1.7910000085830688, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115545, standardError=0.0037, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100164449_A_T', pValueMantissa=8.289999961853027, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0343476, standardError=0.0102747, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100164555_T_C', pValueMantissa=1.8140000104904175, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0162269, standardError=0.00686797, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100164661_T_C', pValueMantissa=1.065000057220459, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108199, standardError=0.00189161, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100166335_G_A', pValueMantissa=1.569000005722046, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370099, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100167234_TA_T', pValueMantissa=1.9529999494552612, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114517, standardError=0.00369739, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100167290_A_G', pValueMantissa=1.9539999961853027, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114514, standardError=0.00369739, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100167685_C_T', pValueMantissa=3.9630000591278076, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0526946, standardError=0.0256104, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100167711_G_A', pValueMantissa=1.3270000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011893, standardError=0.00370488, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100168884_G_A', pValueMantissa=1.815000057220459, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.01154, standardError=0.00370004, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100169073_T_A', pValueMantissa=1.659000039100647, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0164986, standardError=0.00688708, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100170367_T_A', pValueMantissa=1.8220000267028809, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115361, standardError=0.00370004, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100170447_T_C', pValueMantissa=1.8200000524520874, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011537, standardError=0.00370004, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100170652_A_G', pValueMantissa=8.133000373840332, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0162704, standardError=0.00614785, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100170743_T_C', pValueMantissa=1.819000005722046, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115379, standardError=0.00370004, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100171278_T_G', pValueMantissa=1.965999960899353, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114445, standardError=0.00369739, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100171408_C_CTATT', pValueMantissa=1.8220000267028809, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115357, standardError=0.00370003, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100171892_C_A', pValueMantissa=4.783999919891357, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0290312, standardError=0.0146713, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100171937_A_T', pValueMantissa=3.7090001106262207, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0264529, standardError=0.0126887, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100172580_C_A', pValueMantissa=1.8209999799728394, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115368, standardError=0.00370004, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100172766_C_T', pValueMantissa=1.3320000171661377, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0118891, standardError=0.00370488, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100173406_G_C', pValueMantissa=3.993000030517578, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0276948, standardError=0.00962055, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100173454_A_G', pValueMantissa=1.8220000267028809, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011536, standardError=0.00370004, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100174322_A_AT', pValueMantissa=1.9670000076293945, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114438, standardError=0.00369738, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100174478_C_T', pValueMantissa=5.164999961853027, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114058, standardError=0.00195247, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100175179_A_G', pValueMantissa=1.968000054359436, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114433, standardError=0.00369738, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100175211_T_C', pValueMantissa=9.72700023651123, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0910994, standardError=0.02762, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100175305_C_T', pValueMantissa=1.8240000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011535, standardError=0.00370003, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100175350_T_C', pValueMantissa=1.8240000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011535, standardError=0.00370003, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100176550_CAAAAGACCGTTTTTA_C', pValueMantissa=1.8869999647140503, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115298, standardError=0.00371041, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100177021_C_G', pValueMantissa=1.8309999704360962, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115307, standardError=0.00370003, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100177437_T_C', pValueMantissa=4.168000221252441, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00536168, standardError=0.00187135, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100177850_C_T', pValueMantissa=7.14300012588501, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0216516, standardError=0.00545178, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100178264_G_A', pValueMantissa=1.8339999914169312, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011529, standardError=0.00370002, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100178377_T_A', pValueMantissa=3.9769999980926514, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0277079, standardError=0.00962096, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100178824_A_C', pValueMantissa=1.1480000019073486, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0273359, standardError=0.0108138, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100178873_G_C', pValueMantissa=3.313999891281128, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00845972, standardError=0.00288043, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100178957_A_G', pValueMantissa=1.8370000123977661, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011527, standardError=0.00370001, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100181244_T_A', pValueMantissa=9.291000366210938, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0116765, standardError=0.00352639, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100182158_GT_G', pValueMantissa=1.440999984741211, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0118051, standardError=0.00370502, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100182496_T_C', pValueMantissa=2.2899999618530273, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0102733, standardError=0.00278807, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100182799_A_G', pValueMantissa=2.365000009536743, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0102471, standardError=0.00278728, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100183196_T_C', pValueMantissa=1.8519999980926514, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011518, standardError=0.00369996, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100183255_G_A', pValueMantissa=1.3580000400543213, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0118679, standardError=0.0037048, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100183966_G_A', pValueMantissa=1.8609999418258667, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115126, standardError=0.00369993, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100184256_T_C', pValueMantissa=4.7220001220703125, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.029124, standardError=0.0146772, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100184474_G_A', pValueMantissa=3.7109999656677246, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00543283, standardError=0.00187227, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100184568_G_A', pValueMantissa=2.1500000953674316, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0103173, standardError=0.00278792, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100186257_C_G', pValueMantissa=1.2259999513626099, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00596993, standardError=0.00184663, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100186688_C_T', pValueMantissa=1.9229999780654907, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114751, standardError=0.00369939, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100187980_C_T', pValueMantissa=4.630000114440918, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00813035, standardError=0.00287118, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100189674_T_G', pValueMantissa=9.279999732971191, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.126539, standardError=0.0486397, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100190351_G_A', pValueMantissa=4.619999885559082, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00813223, standardError=0.00287113, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100190395_G_A', pValueMantissa=2.0, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0760922, standardError=0.024623, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100192759_T_C', pValueMantissa=1.9559999704360962, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114555, standardError=0.00369915, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100193524_C_T', pValueMantissa=9.555000305175781, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0122529, standardError=0.0037093, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100193948_A_C', pValueMantissa=3.0179998874664307, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115477, standardError=0.00194714, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100195245_A_T', pValueMantissa=4.635000228881836, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0081285, standardError=0.00287091, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100195684_C_CA', pValueMantissa=4.2270002365112305, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.137066, standardError=0.0674911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100195855_G_A', pValueMantissa=4.870999813079834, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00807719, standardError=0.00286889, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100197192_T_C', pValueMantissa=4.242000102996826, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.136954, standardError=0.067486, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100197458_C_A', pValueMantissa=4.629000186920166, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00812979, standardError=0.00287088, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100197612_C_G', pValueMantissa=4.498000144958496, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815595, standardError=0.00287088, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100197640_T_C', pValueMantissa=1.965000033378601, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114501, standardError=0.00369901, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100197675_G_A', pValueMantissa=2.75600004196167, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0386379, standardError=0.0175348, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100197920_A_G', pValueMantissa=2.384999990463257, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0167093, standardError=0.00739523, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100198475_T_C', pValueMantissa=1.9639999866485596, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114509, standardError=0.00369901, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100199282_C_G', pValueMantissa=1.1059999465942383, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0127328, standardError=0.00390313, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100199476_G_A', pValueMantissa=1.319000005722046, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0118973, standardError=0.00370432, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100200167_G_A', pValueMantissa=3.1589999198913574, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0376207, standardError=0.012745, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100201209_C_T', pValueMantissa=1.097000002861023, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00602792, standardError=0.0018465, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100201414_G_A', pValueMantissa=4.788000106811523, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0068162, standardError=0.00344526, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100201633_T_A', pValueMantissa=1.3179999589920044, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0118983, standardError=0.00370432, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100202455_T_C', pValueMantissa=1.9600000381469727, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114531, standardError=0.00369901, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100202467_G_A', pValueMantissa=3.875999927520752, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0277856, standardError=0.00962082, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100202772_A_T', pValueMantissa=1.9600000381469727, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114531, standardError=0.00369901, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100203494_T_C', pValueMantissa=1.9589999914169312, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114534, standardError=0.00369901, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100203542_G_T', pValueMantissa=1.6130000352859497, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00697446, standardError=0.00184854, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100203788_C_A', pValueMantissa=4.110000133514404, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.113144, standardError=0.0394286, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100204779_C_A', pValueMantissa=4.3379998207092285, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101425, standardError=0.00502088, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100205801_A_G', pValueMantissa=4.5329999923706055, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00814881, standardError=0.00287085, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100206194_A_T', pValueMantissa=1.9570000171661377, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114546, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100206333_A_G', pValueMantissa=4.160999774932861, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0263098, standardError=0.0129133, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100206660_A_G', pValueMantissa=2.4539999961853027, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.113651, standardError=0.0505422, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100206733_C_T', pValueMantissa=1.315999984741211, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119003, standardError=0.00370432, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100207009_TA_T', pValueMantissa=1.9559999704360962, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114549, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100207455_C_G', pValueMantissa=1.305999994277954, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00593547, standardError=0.00184639, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100207653_G_A', pValueMantissa=3.6470000743865967, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114874, standardError=0.00194719, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100207898_CAAGTT_C', pValueMantissa=3.384999990463257, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0921644, standardError=0.0434369, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100208164_G_C', pValueMantissa=3.874000072479248, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0277875, standardError=0.0096208, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100209200_T_C', pValueMantissa=1.315000057220459, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119009, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100210277_C_T', pValueMantissa=1.3040000200271606, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119097, standardError=0.00370434, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100211064_G_A', pValueMantissa=1.9550000429153442, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114558, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100211240_A_G', pValueMantissa=7.230000019073486, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00932586, standardError=0.00188208, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100212531_T_C', pValueMantissa=3.874000072479248, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0277872, standardError=0.00962078, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100212621_T_C', pValueMantissa=1.6109999418258667, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00697491, standardError=0.00184852, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100213072_C_T', pValueMantissa=1.9529999494552612, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114567, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100213340_T_C', pValueMantissa=1.6119999885559082, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00697479, standardError=0.00184851, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100213631_T_C', pValueMantissa=1.6119999885559082, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00697476, standardError=0.00184851, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100214977_G_T', pValueMantissa=4.242000102996826, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.136966, standardError=0.0674913, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100215512_G_A', pValueMantissa=4.473999977111816, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00816059, standardError=0.0028708, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100215525_C_CA', pValueMantissa=1.9509999752044678, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114578, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100215535_T_A', pValueMantissa=9.406000137329102, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0122847, standardError=0.00371395, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100215669_A_G', pValueMantissa=1.9509999752044678, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011458, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100216129_G_T', pValueMantissa=1.312000036239624, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119034, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100216744_G_C', pValueMantissa=2.9519999027252197, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115543, standardError=0.00194705, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100217073_T_TCATA', pValueMantissa=4.507999897003174, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815394, standardError=0.00287084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100218126_C_T', pValueMantissa=1.055999994277954, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00604851, standardError=0.00184672, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100218867_A_G', pValueMantissa=3.3550000190734863, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.205837, standardError=0.0968422, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100218929_A_C', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100219439_G_A', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100219496_T_C', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100219653_G_A', pValueMantissa=8.52400016784668, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0210222, standardError=0.00799145, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100219661_T_C', pValueMantissa=7.915999889373779, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0163115, standardError=0.00614227, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100220164_T_C', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100220814_T_TA', pValueMantissa=4.504000186920166, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815468, standardError=0.00287084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100220831_C_A', pValueMantissa=4.275000095367432, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.136751, standardError=0.0674941, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100221396_C_T', pValueMantissa=2.944000005722046, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115551, standardError=0.00194705, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100221486_T_G', pValueMantissa=1.3890000581741333, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0160699, standardError=0.00653242, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100221517_C_A', pValueMantissa=2.5439999103546143, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00413576, standardError=0.00185074, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100221594_C_G', pValueMantissa=2.0799999237060547, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0779801, standardError=0.0337333, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100223656_G_A', pValueMantissa=3.621000051498413, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114895, standardError=0.00194717, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100223788_T_C', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100224059_G_A', pValueMantissa=3.621000051498413, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114895, standardError=0.00194717, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100224086_A_ACT', pValueMantissa=1.0759999752044678, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00473991, standardError=0.00185855, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100224086_A_ACTCTCTCT', pValueMantissa=9.140999794006348, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0123865, standardError=0.00373569, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100224729_G_A', pValueMantissa=9.512999534606934, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0122608, standardError=0.00371028, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100224788_C_T', pValueMantissa=4.552000045776367, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00814496, standardError=0.00287083, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100224846_CA_C', pValueMantissa=4.505000114440918, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815457, standardError=0.00287084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100225330_C_T', pValueMantissa=4.679999828338623, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00811972, standardError=0.0028709, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100225460_G_A', pValueMantissa=1.4520000219345093, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0138989, standardError=0.00568655, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100227298_T_G', pValueMantissa=4.505000114440918, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815457, standardError=0.00287084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100227436_A_C', pValueMantissa=2.1089999675750732, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0204774, standardError=0.00887858, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100227710_T_C', pValueMantissa=2.624000072479248, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115932, standardError=0.00194728, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100228161_T_C', pValueMantissa=1.1180000305175781, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0127208, standardError=0.00390316, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100228188_C_G', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100228581_G_GGGA', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100228620_A_G', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100229692_G_T', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100229731_G_C', pValueMantissa=4.275000095367432, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.136751, standardError=0.0674941, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100230118_T_C', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100230958_C_A', pValueMantissa=3.694000005722046, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0264656, standardError=0.0126847, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100231193_T_TTTG', pValueMantissa=2.11299991607666, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0113217, standardError=0.00368321, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100231368_T_A', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100231561_G_GT', pValueMantissa=4.546000003814697, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815129, standardError=0.00287266, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100231791_G_C', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100233804_CAG_C', pValueMantissa=3.674999952316284, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114848, standardError=0.00194718, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100235720_T_G', pValueMantissa=4.255000114440918, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0683751, standardError=0.0337137, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100236254_A_G', pValueMantissa=1.8240000009536743, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.621107, standardError=0.263096, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100236529_C_T', pValueMantissa=1.5670000314712524, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00698791, standardError=0.00184852, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100236538_A_G', pValueMantissa=1.5640000104904175, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00698861, standardError=0.00184852, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100236603_C_T', pValueMantissa=3.1489999294281006, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115356, standardError=0.00194738, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100237030_C_T', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100237733_G_A', pValueMantissa=4.505000114440918, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815457, standardError=0.00287084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100237772_G_A', pValueMantissa=9.51200008392334, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0122572, standardError=0.00370917, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100237893_A_T', pValueMantissa=4.275000095367432, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.136751, standardError=0.0674941, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100238295_A_G', pValueMantissa=4.505000114440918, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815457, standardError=0.00287084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100238581_A_G', pValueMantissa=6.859000205993652, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0255013, standardError=0.00751108, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100238657_T_C', pValueMantissa=2.809000015258789, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.01157, standardError=0.00194702, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100239610_G_T', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100239637_A_G', pValueMantissa=1.1369999647140503, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00600877, standardError=0.00184638, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100239850_T_C', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100239989_G_A', pValueMantissa=3.2009999752044678, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011529, standardError=0.00194715, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100240726_T_G', pValueMantissa=4.000999927520752, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0525952, standardError=0.0256104, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100240944_C_T', pValueMantissa=2.8889999389648438, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115616, standardError=0.00194713, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100242014_C_T', pValueMantissa=4.505000114440918, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815457, standardError=0.00287084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100242478_A_G', pValueMantissa=9.51200008392334, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0122572, standardError=0.00370917, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100242630_C_T', pValueMantissa=1.253999948501587, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011921, standardError=0.00369498, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100242920_TGA_T', pValueMantissa=4.275000095367432, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.136751, standardError=0.0674941, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100243390_G_A', pValueMantissa=8.79800033569336, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0252612, standardError=0.00439142, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100244149_G_C', pValueMantissa=3.316999912261963, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011525, standardError=0.0019484, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100244398_T_C', pValueMantissa=3.694000005722046, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00834099, standardError=0.00287304, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100244540_A_G', pValueMantissa=1.8240000009536743, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.621107, standardError=0.263096, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100244650_T_C', pValueMantissa=1.8240000009536743, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.621107, standardError=0.263096, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100244773_C_G', pValueMantissa=1.8240000009536743, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.621107, standardError=0.263096, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100244991_G_C', pValueMantissa=1.8240000009536743, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.621107, standardError=0.263096, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100245546_T_C', pValueMantissa=1.472000002861023, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0117796, standardError=0.00370405, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100245863_C_T', pValueMantissa=1.472000002861023, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0117796, standardError=0.00370405, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100246645_C_T', pValueMantissa=3.694000005722046, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00834099, standardError=0.00287304, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100246672_T_C', pValueMantissa=1.1399999856948853, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00600727, standardError=0.00184638, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100246924_C_T', pValueMantissa=3.4030001163482666, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0281666, standardError=0.00961714, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100247616_T_C', pValueMantissa=3.694000005722046, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00834089, standardError=0.00287304, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100248412_C_G', pValueMantissa=1.4470000267028809, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00702484, standardError=0.00184863, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100248726_A_G', pValueMantissa=3.694999933242798, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00834082, standardError=0.00287304, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100249239_A_C', pValueMantissa=2.0339999198913574, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0353474, standardError=0.0152353, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100249267_A_T', pValueMantissa=1.472000002861023, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0117796, standardError=0.00370405, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100249476_T_TC', pValueMantissa=3.694999933242798, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00834082, standardError=0.00287304, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100249705_T_C', pValueMantissa=1.472000002861023, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0117796, standardError=0.00370405, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100250941_T_C', pValueMantissa=1.8240000009536743, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.621107, standardError=0.263096, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100250976_T_A', pValueMantissa=6.709000110626221, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0690304, standardError=0.0254633, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100251390_T_A', pValueMantissa=2.9049999713897705, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0055723, standardError=0.00187139, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100251454_A_G', pValueMantissa=1.1790000200271606, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00712036, standardError=0.00184919, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100251790_C_T', pValueMantissa=3.497999906539917, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00593413, standardError=0.00281426, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100252662_G_A', pValueMantissa=1.5770000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100252768_G_A', pValueMantissa=1.5770000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100252920_T_G', pValueMantissa=1.5770000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100254207_CCAGA_C', pValueMantissa=3.382999897003174, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0921782, standardError=0.0434383, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100254700_T_C', pValueMantissa=1.3079999685287476, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0884636, standardError=0.0275222, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100254749_C_A', pValueMantissa=3.940999984741211, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00710422, standardError=0.00344885, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100255058_C_G', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100255162_C_T', pValueMantissa=3.9679999351501465, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00827516, standardError=0.00287265, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100256000_C_A', pValueMantissa=1.5770000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100256287_C_T', pValueMantissa=4.053999900817871, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00825485, standardError=0.00287235, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100256511_G_A', pValueMantissa=1.0700000524520874, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00604016, standardError=0.00184634, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100257612_C_T', pValueMantissa=1.9170000553131104, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0082026, standardError=0.00350204, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100258083_A_G', pValueMantissa=2.7039999961853027, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0635849, standardError=0.0287601, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100258094_C_T', pValueMantissa=1.5770000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100258332_C_T', pValueMantissa=3.9760000705718994, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0082729, standardError=0.00287251, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100260000_A_G', pValueMantissa=1.6239999532699585, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0116706, standardError=0.00370306, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100260586_A_G', pValueMantissa=3.8919999599456787, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00829222, standardError=0.00287251, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100260869_C_T', pValueMantissa=4.205999851226807, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0082302, standardError=0.00287541, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100262992_C_T', pValueMantissa=1.5770000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100263055_G_C', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100263140_C_T', pValueMantissa=1.5770000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100263249_C_T', pValueMantissa=1.1790000200271606, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00711917, standardError=0.00184892, betaConditioned=None, standardErrorConditioned=None, r2Overall=None)], size=1001)]" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "window_based_clumped_output = 'gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus'\n", - "\n", - "(\n", - " session.spark.read.parquet(ld_clumped_output)\n", - " .filter(f.size(f.col('qualityControls')) == 0)\n", - "# .show(1, False, True)\n", - " .select('locus', f.size(f.col('locus')).alias('size'))\n", - " .limit(1)\n", - " .collect()\n", - ")" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+---------------------+---------+---------+---------------+\n", + "| studyId|ldPopulationStructure|projectId|studyType|traitFromSource|\n", + "+--------------------+---------------------+---------+---------+---------------+\n", + "|FINNGEN_R9_K11_EN...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_H7_KER...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_H8_EXT...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_H7_RET...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_RHEUMA...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_H7_KER...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_HEIGHT...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_M13_SY...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_M13_DO...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_M13_PY...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "| FINNGEN_R9_GOUT_NOS| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_M13_FI...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_ALLERG...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_E4_DM2...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_G6_CER...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_L12_UR...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_AUTOIM...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_I9_HYP...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_M13_AR...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "|FINNGEN_R9_K11_AP...| [{fin, 1.0}]| FINNGEN| gwas| cicaful|\n", + "+--------------------+---------------------+---------+---------+---------------+\n", + "only showing top 20 rows\n", + "\n" + ] }, { - "cell_type": "code", - "execution_count": null, - "id": "e242acdd", - "metadata": {}, - "outputs": [], - "source": [] + "name": "stderr", + "output_type": "stream", + "text": [ + "23/10/13 11:55:34 WARN GhfsStorageStatistics: Detected potential high latency for operation op_open. latencyMs=676; previousMaxLatencyMs=470; operationCount=333148; context=gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/ld_index/chromosome=5/part-00076-ff42773a-494c-46d2-bc22-322062b5e715.c000.snappy.parquet\n", + "23/10/13 12:39:11 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=279; previousMaxLatencyMs=243; operationCount=140; context=gs://ot-team/dsuveges/finngen/2023.10.13_ld_clumped_w_locus/_temporary/0/_temporary/attempt_202310131239103300432709600941830_0086_m_000000_80535/part-00000-17d449c4-f0c3-4617-b378-b74b864ab64a-c000.snappy.parquet\n", + "23/10/13 12:39:11 WARN GhfsStorageStatistics: Detected potential high latency for operation op_create. latencyMs=335; previousMaxLatencyMs=279; operationCount=140; context=gs://ot-team/dsuveges/finngen/2023.10.13_ld_clumped_w_locus/_temporary/0/_temporary/attempt_202310131239106417331548307580589_0086_m_000027_80562/part-00027-17d449c4-f0c3-4617-b378-b74b864ab64a-c000.snappy.parquet\n", + " \r" + ] } - ], - "metadata": { - "gist": { - "data": { - "description": "GCS/dsuveges/PICS/2023.10.06 - PICS FINNGEN from top to bottom.ipynb", - "public": false - }, - "id": "" - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + ], + "source": [ + "ld_clumped_output = \"gs://ot-team/dsuveges/finngen/2023.10.13_ld_clumped_w_locus\"\n", + "\n", + "studies_df = (\n", + " session.spark.read.parquet(window_based_clumped_output)\n", + " # Generating a list of study identifiers:\n", + " .select(\"studyId\")\n", + " .distinct()\n", + " # Adding fabricated values required to parse as gwas catalog study:\n", + " .select(\n", + " \"studyId\",\n", + " StudyIndex.aggregate_and_map_ancestries(\n", + " f.array(\n", + " f.struct(\n", + " f.lit(\"Finnish\").alias(\"ancestry\"),\n", + " f.lit(100).cast(\"long\").alias(\"sampleSize\")\n", + " )\n", + " )\n", + " ).alias(\"ldPopulationStructure\"),\n", + " f.lit(\"FINNGEN\").alias(\"projectId\"),\n", + " f.lit(\"gwas\").alias(\"studyType\"),\n", + " f.lit(\"cicaful\").alias(\"traitFromSource\")\n", + " )\n", + ")\n", + "\n", + "study_index = (\n", + " StudyIndex(\n", + " _df=studies_df,\n", + " _schema=StudyIndex.get_schema()\n", + " )\n", + ")\n", + "\n", + "study_index.df.show()\n", + "\n", + "# Loading ld index:\n", + "ld_index = LDIndex.from_parquet(session, ld_index_path)\n", + "\n", + "(\n", + " # To annotate study/locus, study level info and ld panel is needed:\n", + " LDAnnotator.ld_annotate(\n", + " StudyLocus.from_parquet(session, window_based_clumped_output),\n", + " study_index,\n", + " ld_index\n", + " )\n", + " # Clumping linked study-loci together:\n", + " .clump()\n", + " .df.write.mode(\"overwrite\").parquet(ld_clumped_output)\n", + ")\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f637e3c0", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-13T10:43:26.766382Z", + "start_time": "2023-10-13T10:43:25.980232Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------------------------------------------------+-----+\n", + "|qualityControls |count|\n", + "+--------------------------------------------------------------+-----+\n", + "|[Variant not found in LD reference] |4607 |\n", + "|[] |13813|\n", + "|[Explained by a more significant variant in high LD (clumped)]|585 |\n", + "+--------------------------------------------------------------+-----+\n", + "\n" + ] + } + ], + "source": [ + "(\n", + " session.spark.read.parquet(ld_clumped_output)\n", + " .groupBy(\"qualityControls\")\n", + " .count()\n", + "# .show(1, False, True)\n", + " .show(truncate=False)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "5bf42196", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-13T12:55:39.212186Z", + "start_time": "2023-10-13T12:55:38.131092Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.8" + { + "data": { + "text/plain": [ + "[Row(locus=[Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99764322_T_C', pValueMantissa=2.3949999809265137, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0201963, standardError=0.00665169, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99764860_G_A', pValueMantissa=1.0499999523162842, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00813303, standardError=0.00248194, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99765280_C_T', pValueMantissa=7.499000072479248, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0147615, standardError=0.00437943, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99765335_A_G', pValueMantissa=1.2640000581741333, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107816, standardError=0.00189456, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99766311_C_T', pValueMantissa=1.0579999685287476, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.010837, standardError=0.00189422, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99766702_A_G', pValueMantissa=1.2059999704360962, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107966, standardError=0.00189453, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99766923_T_C', pValueMantissa=1.3300000429153442, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010765, standardError=0.00189451, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99767090_TTTG_T', pValueMantissa=1.2740000486373901, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107778, standardError=0.00189432, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99768341_T_C', pValueMantissa=1.2289999723434448, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107887, standardError=0.00189422, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99768718_A_G', pValueMantissa=1.2649999856948853, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107795, standardError=0.0018942, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99768993_G_A', pValueMantissa=1.1629999876022339, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108066, standardError=0.00189423, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99769297_T_C', pValueMantissa=1.2929999828338623, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0113037, standardError=0.00186275, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99769386_C_T', pValueMantissa=1.7280000448226929, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00670209, standardError=0.00281514, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99769548_A_G', pValueMantissa=1.7280000448226929, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0067021, standardError=0.00281514, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99769607_C_T', pValueMantissa=1.2710000276565552, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107775, standardError=0.00189416, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99770233_A_AT', pValueMantissa=1.1360000371932983, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108149, standardError=0.00189434, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99771038_C_A', pValueMantissa=1.1039999723434448, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0108218, standardError=0.00189392, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99771332_T_C', pValueMantissa=1.284999966621399, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00972726, standardError=0.00390986, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99771548_C_T', pValueMantissa=1.1369999647140503, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108168, standardError=0.00189473, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99774249_G_A', pValueMantissa=1.0410000085830688, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00813435, standardError=0.00248058, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99774476_AT_A', pValueMantissa=1.1540000438690186, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0108061, standardError=0.0018937, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99777761_G_A', pValueMantissa=2.9170000553131104, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0857961, standardError=0.0393337, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99777984_T_G', pValueMantissa=9.097999572753906, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108749, standardError=0.00189236, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99778063_G_A', pValueMantissa=1.2730000019073486, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00887632, standardError=0.00203368, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99778821_G_A', pValueMantissa=1.6679999828338623, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00682868, standardError=0.00285271, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99779613_C_G', pValueMantissa=7.630000114440918, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109299, standardError=0.00189215, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99781296_A_G', pValueMantissa=8.560999870300293, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.010892, standardError=0.00189195, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99781822_G_A', pValueMantissa=1.4390000104904175, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00689344, standardError=0.00281655, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99781826_A_G', pValueMantissa=7.65500020980835, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0109284, standardError=0.00189207, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99782821_C_T', pValueMantissa=1.3680000305175781, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00573303, standardError=0.00232523, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99783206_A_G', pValueMantissa=7.632999897003174, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99783290_A_C', pValueMantissa=8.821999549865723, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108841, standardError=0.00189224, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99784269_C_T', pValueMantissa=9.149999618530273, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108721, standardError=0.00189218, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99784331_G_A', pValueMantissa=4.435999870300293, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0327234, standardError=0.0162747, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99784751_C_A', pValueMantissa=3.0409998893737793, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.149822, standardError=0.0692119, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99785581_A_G', pValueMantissa=7.164000034332275, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0113888, standardError=0.00423514, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99786875_C_T', pValueMantissa=5.257999897003174, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0110515, standardError=0.00189278, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787050_C_G', pValueMantissa=6.671999931335449, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109731, standardError=0.00189226, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787264_C_A', pValueMantissa=7.834000110626221, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109198, standardError=0.00189186, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787285_C_T', pValueMantissa=9.189000129699707, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108723, standardError=0.00189246, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787359_T_G', pValueMantissa=7.504000186920166, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109356, standardError=0.00189222, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787460_G_T', pValueMantissa=6.499000072479248, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.010984, standardError=0.00189269, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787461_G_T', pValueMantissa=6.499000072479248, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.010984, standardError=0.00189269, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787483_G_A', pValueMantissa=7.631999969482422, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787568_C_T', pValueMantissa=7.631999969482422, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787571_C_T', pValueMantissa=7.631999969482422, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787694_A_AT', pValueMantissa=7.631999969482422, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787855_C_T', pValueMantissa=8.937999725341797, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108798, standardError=0.00189223, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787860_C_T', pValueMantissa=7.64300012588501, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0231471, standardError=0.0086776, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787890_G_A', pValueMantissa=7.631999969482422, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787906_T_A', pValueMantissa=7.631999969482422, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99787915_T_G', pValueMantissa=7.631999969482422, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99788026_C_T', pValueMantissa=7.7870001792907715, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109255, standardError=0.00189252, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99788420_A_C', pValueMantissa=7.001999855041504, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109573, standardError=0.00189216, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99788711_G_A', pValueMantissa=1.0420000553131104, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0108302, standardError=0.00189215, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99788859_C_T', pValueMantissa=7.632999897003174, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99788952_C_T', pValueMantissa=9.12399959564209, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108727, standardError=0.00189214, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99789038_A_G', pValueMantissa=7.164000034332275, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109498, standardError=0.00189212, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99789217_G_C', pValueMantissa=7.72599983215332, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109277, standardError=0.00189247, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99789506_G_C', pValueMantissa=7.671000003814697, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109278, standardError=0.00189209, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99789829_T_G', pValueMantissa=7.632999897003174, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99790008_G_A', pValueMantissa=7.603000164031982, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109305, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99790154_G_A', pValueMantissa=7.632999897003174, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99790319_T_G', pValueMantissa=7.857999801635742, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109213, standardError=0.00189229, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99791174_G_A', pValueMantissa=7.623000144958496, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0276216, standardError=0.00820577, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99791224_A_G', pValueMantissa=7.632999897003174, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109293, standardError=0.00189206, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99793129_A_T', pValueMantissa=9.17300033569336, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108711, standardError=0.00189215, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99793502_C_T', pValueMantissa=1.0099999904632568, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108395, standardError=0.00189201, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99793829_C_CGTAT', pValueMantissa=3.756999969482422, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.201946, standardError=0.0971122, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99794292_T_A', pValueMantissa=1.8420000076293945, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0427514, standardError=0.0181376, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99794803_C_G', pValueMantissa=3.76200008392334, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0754336, standardError=0.0362844, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99797363_G_A', pValueMantissa=2.696000099182129, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0451691, standardError=0.0204184, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99797572_A_C', pValueMantissa=7.729000091552734, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0109255, standardError=0.0018921, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99798877_A_T', pValueMantissa=5.47599983215332, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111187, standardError=0.0019065, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99798989_G_T', pValueMantissa=4.708000183105469, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00645552, standardError=0.00184611, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99799669_G_A', pValueMantissa=1.3769999742507935, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0283588, standardError=0.0115134, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99801055_G_A', pValueMantissa=5.751999855041504, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0863438, standardError=0.0312661, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99801648_T_C', pValueMantissa=5.201000213623047, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.35431, standardError=0.126797, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99802216_C_T', pValueMantissa=8.795999526977539, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0312825, standardError=0.00584722, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99803737_A_G', pValueMantissa=4.105000019073486, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0838538, standardError=0.0410448, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99804058_G_A', pValueMantissa=9.253999710083008, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00819335, standardError=0.00247362, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99804255_C_G', pValueMantissa=8.798999786376953, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0312823, standardError=0.00584723, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99804875_C_T', pValueMantissa=2.2079999446868896, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0532293, standardError=0.023254, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99805400_A_G', pValueMantissa=5.626999855041504, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0215236, standardError=0.00777382, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99805994_G_A', pValueMantissa=1.6449999809265137, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0125221, standardError=0.00522031, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99806253_C_T', pValueMantissa=1.2120000123977661, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00800751, standardError=0.00247451, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99806469_CAT_C', pValueMantissa=1.996999979019165, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.01596, standardError=0.00685871, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99807109_T_A', pValueMantissa=3.994999885559082, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0097045, standardError=0.00274107, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99807669_A_G', pValueMantissa=4.577000141143799, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111561, standardError=0.00190316, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99807962_A_G', pValueMantissa=3.7709999084472656, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0677375, standardError=0.0325981, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99809156_C_A', pValueMantissa=1.4980000257492065, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00696552, standardError=0.0028632, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99809726_T_C', pValueMantissa=4.446000099182129, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111654, standardError=0.00190318, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99809873_G_A', pValueMantissa=4.642000198364258, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.122379, standardError=0.0614495, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99811236_C_A', pValueMantissa=7.059999942779541, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0110215, standardError=0.00190371, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99811771_C_T', pValueMantissa=4.526000022888184, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0111597, standardError=0.00190317, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99815174_G_C', pValueMantissa=6.111000061035156, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0110649, standardError=0.00190324, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99815628_A_G', pValueMantissa=9.194999694824219, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0211397, standardError=0.00637879, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99815640_A_G', pValueMantissa=8.413999557495117, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0902758, standardError=0.0342605, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99816387_G_A', pValueMantissa=4.604000091552734, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0346061, standardError=0.0173466, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99816562_G_A', pValueMantissa=3.677000045776367, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00976812, standardError=0.00274214, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99816570_G_A', pValueMantissa=1.152999997138977, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00804666, standardError=0.00247569, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99817203_T_C', pValueMantissa=7.421999931335449, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0110057, standardError=0.00190374, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99818525_T_C', pValueMantissa=1.4880000352859497, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0186574, standardError=0.0076615, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99819195_T_G', pValueMantissa=6.382999897003174, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0124767, standardError=0.0045745, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99820908_A_G', pValueMantissa=1.4980000257492065, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00696602, standardError=0.0028633, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99821086_GA_G', pValueMantissa=3.259999990463257, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0113261, standardError=0.00272622, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99821094_C_T', pValueMantissa=4.098999977111816, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0112471, standardError=0.00191273, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99821955_T_G', pValueMantissa=4.670000076293945, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0112087, standardError=0.00191323, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99822427_T_C', pValueMantissa=2.058000087738037, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115151, standardError=0.00192136, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99822696_T_C', pValueMantissa=7.763000011444092, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.162918, standardError=0.0611962, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99823047_A_G', pValueMantissa=7.198999881744385, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0110742, standardError=0.0019139, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99823280_T_C', pValueMantissa=1.1619999408721924, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00804213, standardError=0.00247592, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99825419_A_G', pValueMantissa=1.2710000276565552, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0826158, standardError=0.0331567, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99825752_T_C', pValueMantissa=6.0370001792907715, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0111297, standardError=0.00191372, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99825835_C_T', pValueMantissa=6.0320000648498535, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0111299, standardError=0.00191372, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99827260_C_T', pValueMantissa=3.6500000953674316, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0489284, standardError=0.0233964, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99828083_C_T', pValueMantissa=3.938999891281128, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0959414, standardError=0.0465714, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99828402_T_A', pValueMantissa=9.142000198364258, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0211518, standardError=0.00637931, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99828407_A_T', pValueMantissa=9.133999824523926, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0211533, standardError=0.0063793, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99829453_A_AT', pValueMantissa=6.083000183105469, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0111253, standardError=0.00191338, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99829545_ATATTT_A', pValueMantissa=4.810999870300293, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0112061, standardError=0.00191441, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99830168_G_A', pValueMantissa=1.218999981880188, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00801018, standardError=0.00247653, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99830263_G_A', pValueMantissa=1.6970000267028809, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0179959, standardError=0.00753787, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99832187_T_C', pValueMantissa=4.421000003814697, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0112213, standardError=0.0019124, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99832400_AAC_A', pValueMantissa=6.706999778747559, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0110021, standardError=0.00189753, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99832402_C_CTGTGTGT', pValueMantissa=6.710999965667725, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0110019, standardError=0.00189753, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99832563_A_G', pValueMantissa=1.4479999542236328, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0090241, standardError=0.00187287, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99832865_A_G', pValueMantissa=4.169000148773193, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0105412, standardError=0.00192234, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99834145_G_C', pValueMantissa=6.802999973297119, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.260334, standardError=0.0961939, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99834214_T_C', pValueMantissa=1.784000039100647, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0474718, standardError=0.0200386, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99835361_G_A', pValueMantissa=2.0769999027252197, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0595141, standardError=0.0257408, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99841479_C_T', pValueMantissa=6.188000202178955, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104861, standardError=0.00193711, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99842816_A_G', pValueMantissa=8.645000457763672, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00723726, standardError=0.00184352, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99844024_C_G', pValueMantissa=2.3450000286102295, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107969, standardError=0.00193341, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99844450_C_T', pValueMantissa=2.874000072479248, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107269, standardError=0.00193316, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99845178_C_T', pValueMantissa=4.0320000648498535, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0656125, standardError=0.0319983, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99845936_A_G', pValueMantissa=3.069999933242798, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00675859, standardError=0.00312767, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99848765_C_G', pValueMantissa=2.8910000324249268, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114511, standardError=0.00192855, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99850966_A_G', pValueMantissa=8.373000144958496, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101864, standardError=0.00190084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99852563_C_T', pValueMantissa=4.5329999923706055, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0113089, standardError=0.0019287, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99853411_T_TG', pValueMantissa=9.414999961853027, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011087, standardError=0.00193121, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99856822_A_G', pValueMantissa=2.7119998931884766, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114876, standardError=0.00193129, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99856987_G_C', pValueMantissa=5.239999771118164, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010358, standardError=0.001903, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99857429_C_T', pValueMantissa=1.718000054359436, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116617, standardError=0.00193637, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99857502_A_G', pValueMantissa=6.296000003814697, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010296, standardError=0.00190308, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99857670_G_A', pValueMantissa=2.7939999103546143, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011478, standardError=0.00193126, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99857979_C_T', pValueMantissa=2.5840001106262207, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00775769, standardError=0.00184386, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99858035_C_T', pValueMantissa=2.7939999103546143, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011478, standardError=0.00193127, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99858181_G_C', pValueMantissa=2.7920000553131104, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114783, standardError=0.00193127, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99858624_T_C', pValueMantissa=6.314000129699707, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0103011, standardError=0.0019042, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99859267_C_A', pValueMantissa=1.5019999742507935, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0239274, standardError=0.00753783, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99859558_G_A', pValueMantissa=6.2779998779296875, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0321283, standardError=0.00593792, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99859674_G_A', pValueMantissa=1.1050000190734863, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100907, standardError=0.00190082, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99861014_C_G', pValueMantissa=1.350000023841858, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0080568, standardError=0.00185135, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99861191_A_G', pValueMantissa=2.062000036239624, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00788332, standardError=0.00185141, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99861743_A_C', pValueMantissa=1.621999979019165, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00893977, standardError=0.00186417, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99861975_TTGG_T', pValueMantissa=1.5509999990463257, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00895366, standardError=0.00186357, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99862766_C_T', pValueMantissa=1.7289999723434448, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00993376, standardError=0.0019007, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99863009_T_C', pValueMantissa=1.3009999990463257, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0535743, standardError=0.0215732, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99863999_T_C', pValueMantissa=1.4730000495910645, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0137073, standardError=0.00361145, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99864093_A_C', pValueMantissa=5.741000175476074, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0322238, standardError=0.00593803, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99864115_G_T', pValueMantissa=7.295000076293945, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.150642, standardError=0.0445916, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99865012_T_C', pValueMantissa=2.450000047683716, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00781456, standardError=0.00185205, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99865013_G_A', pValueMantissa=1.7259999513626099, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0159887, standardError=0.00510203, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99865261_T_C', pValueMantissa=1.4459999799728394, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00999453, standardError=0.00190036, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99865337_G_GAAGA', pValueMantissa=1.3170000314712524, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00806712, standardError=0.00185143, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99865513_C_G', pValueMantissa=1.3250000476837158, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0080656, standardError=0.00185165, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99866792_C_T', pValueMantissa=1.4500000476837158, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00803164, standardError=0.00185225, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99867170_C_T', pValueMantissa=8.121000289916992, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111182, standardError=0.00192825, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99867197_T_C', pValueMantissa=2.680999994277954, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0171717, standardError=0.00775509, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99867487_C_G', pValueMantissa=2.8310000896453857, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.359987, standardError=0.164156, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99867528_G_T', pValueMantissa=2.8389999866485596, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0077156, standardError=0.0018432, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99868259_G_A', pValueMantissa=2.265000104904175, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.119984, standardError=0.0392983, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99868412_T_G', pValueMantissa=1.6779999732971191, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00994383, standardError=0.00190064, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99868443_A_G', pValueMantissa=1.4809999465942383, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00801978, standardError=0.0018515, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99868585_TC_T', pValueMantissa=1.437999963760376, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0080333, standardError=0.00185188, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99868691_T_G', pValueMantissa=1.4730000495910645, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00802444, standardError=0.00185208, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99869742_G_A', pValueMantissa=4.677000045776367, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101608, standardError=0.00185986, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99870037_G_A', pValueMantissa=3.996999979019165, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0939625, standardError=0.0326443, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99870234_G_A', pValueMantissa=2.5759999752044678, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0270256, standardError=0.00896575, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99871164_A_T', pValueMantissa=9.444999694824219, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101485, standardError=0.0019015, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99871248_A_C', pValueMantissa=4.265999794006348, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011332, standardError=0.00192934, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99871256_C_T', pValueMantissa=1.0980000495910645, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100932, standardError=0.00190087, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99871596_T_C', pValueMantissa=1.097000002861023, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100928, standardError=0.00190078, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99871838_G_C', pValueMantissa=4.270999908447266, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0113316, standardError=0.00192933, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99871912_T_C', pValueMantissa=1.0989999771118164, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100928, standardError=0.00190087, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99872008_T_C', pValueMantissa=1.0989999771118164, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100927, standardError=0.00190087, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99872112_C_T', pValueMantissa=1.093000054359436, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100944, standardError=0.00190084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99872802_A_G', pValueMantissa=2.828000068664551, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0103237, standardError=0.00185953, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99873074_C_T', pValueMantissa=4.716000080108643, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112996, standardError=0.00192927, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99873158_C_T', pValueMantissa=1.1759999990463257, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010069, standardError=0.00190084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99873263_C_T', pValueMantissa=1.1440000534057617, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010071, standardError=0.00189938, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99873471_G_A', pValueMantissa=6.283999919891357, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0723658, standardError=0.0264824, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99873553_C_T', pValueMantissa=1.8109999895095825, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0122764, standardError=0.00286365, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99873602_C_T', pValueMantissa=3.7880001068115234, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102286, standardError=0.00185958, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99874022_C_T', pValueMantissa=3.7920000553131104, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102286, standardError=0.00185966, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99874186_C_CT', pValueMantissa=1.2230000495910645, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100561, standardError=0.00190097, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99875195_T_C', pValueMantissa=4.964000225067139, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112867, standardError=0.00192988, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99875202_G_T', pValueMantissa=1.312999963760376, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100339, standardError=0.00190142, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99875320_C_A', pValueMantissa=1.2369999885559082, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100535, standardError=0.00190119, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99875381_T_C', pValueMantissa=1.2410000562667847, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100525, standardError=0.00190121, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99875452_C_T', pValueMantissa=2.5290000438690186, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0135056, standardError=0.00447237, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99875704_T_A', pValueMantissa=2.9600000381469727, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00410906, standardError=0.00188882, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99875760_A_AG', pValueMantissa=1.2430000305175781, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010052, standardError=0.00190125, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99876031_G_GT', pValueMantissa=1.3940000534057617, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0240895, standardError=0.00753787, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99876891_T_C', pValueMantissa=3.986999988555908, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102146, standardError=0.0018601, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99878758_G_A', pValueMantissa=2.763000011444092, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.151851, standardError=0.0689453, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99878850_C_G', pValueMantissa=4.1519999504089355, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102024, standardError=0.0018603, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99880039_G_A', pValueMantissa=4.0960001945495605, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102077, standardError=0.00186045, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99880120_A_C', pValueMantissa=4.75, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0113026, standardError=0.00193019, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99880736_G_C', pValueMantissa=2.4639999866485596, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.11912, standardError=0.039343, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99882938_G_A', pValueMantissa=4.218999862670898, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102002, standardError=0.00186086, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99883489_C_T', pValueMantissa=7.486000061035156, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0140267, standardError=0.00524475, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99884754_C_T', pValueMantissa=2.5510001182556152, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011665, standardError=0.00195783, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99885306_C_T', pValueMantissa=7.618000030517578, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0122854, standardError=0.00460381, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99885741_T_C', pValueMantissa=4.251999855041504, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101963, standardError=0.00186062, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99887218_A_G', pValueMantissa=1.3589999675750732, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100249, standardError=0.00190197, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99887854_C_T', pValueMantissa=1.3580000400543213, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010025, standardError=0.00190197, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99888103_G_A', pValueMantissa=3.808000087738037, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0456176, standardError=0.0219955, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99889239_G_C', pValueMantissa=1.3580000400543213, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0100256, standardError=0.00190207, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99890983_C_T', pValueMantissa=1.3830000162124634, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.01002, standardError=0.00190223, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99891336_A_G', pValueMantissa=3.815000057220459, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102374, standardError=0.00186162, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99891408_C_T', pValueMantissa=1.6360000371932983, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00996155, standardError=0.00190233, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99891590_G_A', pValueMantissa=6.8420000076293945, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114132, standardError=0.00422021, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99891697_T_C', pValueMantissa=4.46999979019165, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010205, standardError=0.00186523, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99892983_T_C', pValueMantissa=3.4000000953674316, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.177641, standardError=0.083791, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99893179_G_C', pValueMantissa=5.1570000648498535, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112781, standardError=0.00193051, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99893428_C_T', pValueMantissa=4.14300012588501, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0102059, standardError=0.00186081, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99894094_G_C', pValueMantissa=4.321000099182129, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0249459, standardError=0.00708754, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99894492_G_T', pValueMantissa=8.531999588012695, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101123, standardError=0.00188821, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99894565_A_G', pValueMantissa=4.785999774932861, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00954874, standardError=0.00273411, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99894939_T_C', pValueMantissa=8.916999816894531, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00988844, standardError=0.00184916, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99895167_T_C', pValueMantissa=4.169000148773193, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00948941, standardError=0.00187497, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99895555_C_T', pValueMantissa=4.659999847412109, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0340557, standardError=0.0120355, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99896225_G_A', pValueMantissa=1.0920000076293945, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110344, standardError=0.0019305, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99896692_A_G', pValueMantissa=3.4130001068115234, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110917, standardError=0.00187665, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99896979_T_G', pValueMantissa=3.2119998931884766, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0401381, standardError=0.0187308, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99897831_T_C', pValueMantissa=1.9040000438690186, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.14616, standardError=0.0623359, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99898123_G_A', pValueMantissa=1.1069999933242798, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110304, standardError=0.00193059, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99898484_T_A', pValueMantissa=1.1139999628067017, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110285, standardError=0.00193061, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99898828_T_C', pValueMantissa=3.2699999809265137, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111045, standardError=0.00187657, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99899677_C_CTTGT', pValueMantissa=3.51200008392334, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110858, standardError=0.00187714, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99900290_T_G', pValueMantissa=7.426000118255615, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0109405, standardError=0.00189249, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99900292_A_AT', pValueMantissa=1.1360000371932983, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00988613, standardError=0.00390553, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99900500_A_C', pValueMantissa=2.2009999752044678, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0412365, standardError=0.0180062, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99901642_T_C', pValueMantissa=5.086999893188477, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.113465, standardError=0.0405014, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99901731_C_T', pValueMantissa=1.5379999876022339, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0140283, standardError=0.00442901, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99902496_G_A', pValueMantissa=1.2289999723434448, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0224604, standardError=0.00897057, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99902603_C_T', pValueMantissa=4.140999794006348, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0854585, standardError=0.0419028, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99902712_G_C', pValueMantissa=1.1929999589920044, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110082, standardError=0.00193099, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99902720_C_G', pValueMantissa=3.492000102996826, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011087, standardError=0.00187705, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99902757_G_A', pValueMantissa=4.366000175476074, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110198, standardError=0.00187741, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99903008_A_C', pValueMantissa=3.4590001106262207, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.01109, standardError=0.00187705, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99903292_G_A', pValueMantissa=3.4660000801086426, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110895, standardError=0.00187709, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99903681_G_T', pValueMantissa=6.072000026702881, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110979, standardError=0.00190858, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99904413_A_G', pValueMantissa=3.4719998836517334, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110894, standardError=0.00187714, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99904499_G_A', pValueMantissa=1.1369999647140503, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110237, standardError=0.00193097, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99904588_G_A', pValueMantissa=9.54800033569336, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0142134, standardError=0.00430252, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99905340_C_T', pValueMantissa=1.184000015258789, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110128, standardError=0.00193138, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99905534_G_C', pValueMantissa=1.2209999561309814, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0379664, standardError=0.0151507, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99905541_A_G', pValueMantissa=3.575000047683716, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110805, standardError=0.00187717, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99905693_A_G', pValueMantissa=3.490999937057495, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110881, standardError=0.00187721, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99906210_T_C', pValueMantissa=3.5, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110874, standardError=0.00187724, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99906644_G_A', pValueMantissa=1.0570000410079956, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110495, standardError=0.00193126, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99906718_T_C', pValueMantissa=3.4709999561309814, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110901, standardError=0.00187726, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99908035_C_T', pValueMantissa=7.296000003814697, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0148557, standardError=0.00439751, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99908057_A_G', pValueMantissa=7.160999774932861, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0149937, standardError=0.00443164, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99908221_T_C', pValueMantissa=3.878000020980835, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110578, standardError=0.0018776, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99908813_T_C', pValueMantissa=5.124000072479248, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112768, standardError=0.00192994, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99909229_T_C', pValueMantissa=8.24899959564209, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00998922, standardError=0.0018631, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99909454_A_C', pValueMantissa=7.598999977111816, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0109359, standardError=0.00189297, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99910263_C_T', pValueMantissa=5.330999851226807, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111417, standardError=0.00190898, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99910368_T_C', pValueMantissa=3.302999973297119, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011106, standardError=0.00187736, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99910580_T_G', pValueMantissa=1.1369999647140503, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110251, standardError=0.00193122, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99911338_T_C', pValueMantissa=3.4579999446868896, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011092, standardError=0.00187738, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99911811_G_A', pValueMantissa=2.575000047683716, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.118741, standardError=0.0393917, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99912354_A_G', pValueMantissa=5.099999904632568, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112784, standardError=0.00192997, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99912584_A_G', pValueMantissa=3.447999954223633, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011093, standardError=0.00187739, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99913097_C_G', pValueMantissa=3.181999921798706, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111182, standardError=0.00187747, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99913702_T_C', pValueMantissa=1.0369999408721924, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0108274, standardError=0.00422404, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99914373_T_G', pValueMantissa=3.0360000133514404, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.248118, standardError=0.114583, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99915372_A_G', pValueMantissa=3.250999927520752, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111123, standardError=0.0018776, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99915373_G_A', pValueMantissa=3.3289999961853027, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0111042, standardError=0.00187747, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99915884_T_C', pValueMantissa=5.684999942779541, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0109318, standardError=0.00187645, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99915918_T_G', pValueMantissa=1.281000018119812, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00983696, standardError=0.00186253, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99916127_T_C', pValueMantissa=5.081999778747559, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112798, standardError=0.00193, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99916503_A_T', pValueMantissa=6.974999904632568, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00734427, standardError=0.00184662, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99916552_T_C', pValueMantissa=2.9839999675750732, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0122137, standardError=0.00562257, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99917731_C_T', pValueMantissa=5.0289998054504395, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112837, standardError=0.0019301, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99919106_C_T', pValueMantissa=2.8299999237060547, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0131403, standardError=0.00440112, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99920364_C_T', pValueMantissa=4.159999847412109, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0219416, standardError=0.0076565, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99921641_GC_G', pValueMantissa=5.14900016784668, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112777, standardError=0.00193036, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99922010_GAAGA_G', pValueMantissa=3.134999990463257, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0255599, standardError=0.00709237, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99922205_A_G', pValueMantissa=8.65999984741211, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00997687, standardError=0.00186386, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99922419_C_T', pValueMantissa=5.785999774932861, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112417, standardError=0.00193063, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99923392_A_AAAT', pValueMantissa=7.868000030517578, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010012, standardError=0.00186439, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99924208_C_T', pValueMantissa=6.610000133514404, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112121, standardError=0.00193293, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99924251_CACGGTGAA_C', pValueMantissa=5.901000022888184, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0112486, standardError=0.0019329, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99924611_T_C', pValueMantissa=3.5799999237060547, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.028521, standardError=0.0135863, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99924854_A_G', pValueMantissa=4.103000164031982, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.135756, standardError=0.0664409, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99925030_G_A', pValueMantissa=7.132999897003174, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0123943, standardError=0.00460661, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99925171_T_C', pValueMantissa=2.7660000324249268, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0131669, standardError=0.00439981, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99925527_C_T', pValueMantissa=3.3429999351501465, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011112, standardError=0.00187899, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99926307_T_C', pValueMantissa=1.2309999465942383, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00985993, standardError=0.0018643, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99926344_G_A', pValueMantissa=7.146999835968018, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0123917, standardError=0.00460668, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99928121_C_T', pValueMantissa=2.6080000400543213, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0237748, standardError=0.0106854, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99928961_T_A', pValueMantissa=7.409999847412109, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0092492, standardError=0.00274137, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99929621_G_C', pValueMantissa=2.4159998893737793, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00965232, standardError=0.00186913, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99930293_C_T', pValueMantissa=2.371000051498413, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00891412, standardError=0.00188903, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99930622_A_G', pValueMantissa=2.9200000762939453, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0122713, standardError=0.0019469, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99930880_C_G', pValueMantissa=6.189000129699707, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0126631, standardError=0.00193652, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99931871_G_T', pValueMantissa=1.0870000123977661, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132779, standardError=0.00195421, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99932501_G_T', pValueMantissa=7.144999980926514, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130514, standardError=0.0020025, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99932525_C_G', pValueMantissa=3.86299991607666, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132541, standardError=0.00190924, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99932591_G_A', pValueMantissa=3.625999927520752, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0057824, standardError=0.00276148, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99933095_G_A', pValueMantissa=6.820000171661377, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.041325, standardError=0.0152743, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99933333_G_A', pValueMantissa=7.736000061035156, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130529, standardError=0.00190738, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99933398_T_G', pValueMantissa=1.1490000486373901, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132623, standardError=0.00195424, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99933499_T_C', pValueMantissa=7.710999965667725, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130536, standardError=0.00190735, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99933515_AG_A', pValueMantissa=6.0929999351501465, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130982, standardError=0.00200234, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99933811_A_C', pValueMantissa=3.0450000762939453, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0271544, standardError=0.009164, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99933841_A_G', pValueMantissa=7.820000171661377, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130497, standardError=0.00190735, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99935601_G_A', pValueMantissa=6.01200008392334, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131034, standardError=0.00200253, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99935839_G_A', pValueMantissa=1.9040000438690186, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0088457, standardError=0.00185702, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99936985_A_C', pValueMantissa=8.661999702453613, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130204, standardError=0.00190715, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99937543_C_A', pValueMantissa=6.139999866485596, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130966, standardError=0.00200245, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99938738_G_A', pValueMantissa=1.0329999923706055, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132869, standardError=0.00195344, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99939547_T_C', pValueMantissa=6.979000091552734, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130948, standardError=0.00190939, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99939652_G_A', pValueMantissa=4.034999847412109, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132546, standardError=0.00191099, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99940864_C_T', pValueMantissa=5.973999977111816, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131208, standardError=0.0020049, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99941429_ACCTCAGGGTTACC_A', pValueMantissa=4.370999813079834, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.187402, standardError=0.0929166, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99941625_T_C', pValueMantissa=7.7729997634887695, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130649, standardError=0.00190932, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99942056_T_C', pValueMantissa=4.118000030517578, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0832392, standardError=0.0407701, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99942622_C_G', pValueMantissa=4.414999961853027, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132329, standardError=0.00191137, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99942695_T_C', pValueMantissa=5.933000087738037, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131229, standardError=0.0020049, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99942879_G_C', pValueMantissa=4.004000186920166, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132567, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99943051_G_T', pValueMantissa=5.098999977111816, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0153013, standardError=0.00335503, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99943272_T_C', pValueMantissa=4.004000186920166, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132567, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99943700_G_A', pValueMantissa=1.340000033378601, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0629333, standardError=0.0254482, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99943766_A_G', pValueMantissa=3.8440001010894775, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0136334, standardError=0.00196368, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99944281_T_C', pValueMantissa=7.932000160217285, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130606, standardError=0.00190951, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99944562_A_T', pValueMantissa=4.39300012588501, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0135957, standardError=0.0019636, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99944721_TC_T', pValueMantissa=4.39300012588501, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0135957, standardError=0.0019636, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99945215_A_G', pValueMantissa=4.004000186920166, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132567, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99946139_G_A', pValueMantissa=1.3140000104904175, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00899244, standardError=0.00185887, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99946260_A_G', pValueMantissa=4.004000186920166, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132567, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99946307_A_C', pValueMantissa=4.873000144958496, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.12509, standardError=0.063468, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99946514_A_C', pValueMantissa=4.004000186920166, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132567, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99946634_G_A', pValueMantissa=4.004000186920166, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132567, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99947232_A_G', pValueMantissa=5.947999954223633, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131222, standardError=0.00200491, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99947807_T_C', pValueMantissa=4.004000186920166, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132567, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99947876_G_A', pValueMantissa=4.059000015258789, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132552, standardError=0.00191131, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99948681_A_G', pValueMantissa=7.769999980926514, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130649, standardError=0.00190932, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99949062_G_A', pValueMantissa=4.873000144958496, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.12509, standardError=0.063468, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99950056_G_A', pValueMantissa=4.761000156402588, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.107573, standardError=0.0543081, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99952120_T_C', pValueMantissa=1.062000036239624, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0236237, standardError=0.00721645, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99953194_G_A', pValueMantissa=5.874000072479248, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131262, standardError=0.00200494, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99953842_C_T', pValueMantissa=4.118000030517578, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132617, standardError=0.00191282, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99953915_A_G', pValueMantissa=3.8420000076293945, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132682, standardError=0.00191106, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99954520_G_A', pValueMantissa=6.242000102996826, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131087, standardError=0.00200505, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99954533_T_G', pValueMantissa=4.175000190734863, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132459, standardError=0.00191108, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99956142_G_A', pValueMantissa=4.054999828338623, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132533, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99956727_G_A', pValueMantissa=2.9579999446868896, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0132311, standardError=0.00445185, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99958027_A_G', pValueMantissa=7.925000190734863, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130596, standardError=0.00190933, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99959825_G_A', pValueMantissa=4.172999858856201, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0163596, standardError=0.00803454, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99959911_G_A', pValueMantissa=2.3239998817443848, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0134674, standardError=0.00201484, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99962154_T_G', pValueMantissa=6.442999839782715, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130988, standardError=0.002005, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99962896_A_G', pValueMantissa=4.985000133514404, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.018755, standardError=0.00956281, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99963747_T_C', pValueMantissa=3.8350000381469727, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.10415, standardError=0.0360205, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99963755_G_A', pValueMantissa=4.4070000648498535, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132354, standardError=0.00191167, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99963792_C_T', pValueMantissa=1.0329999923706055, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133147, standardError=0.0019575, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99963855_G_A', pValueMantissa=6.202000141143799, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131113, standardError=0.00200516, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99963858_G_C', pValueMantissa=4.052000045776367, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132537, standardError=0.00191104, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99964474_C_A', pValueMantissa=1.5709999799728394, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.013808, standardError=0.0020483, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99964716_C_T', pValueMantissa=3.6579999923706055, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.022367, standardError=0.00769613, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99965353_C_T', pValueMantissa=4.11899995803833, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0832401, standardError=0.0407717, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99965381_C_T', pValueMantissa=3.3989999294281006, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0154555, standardError=0.00728972, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99965782_C_T', pValueMantissa=1.2259999513626099, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0312055, standardError=0.00547846, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99966311_G_A', pValueMantissa=4.085000038146973, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132514, standardError=0.00191102, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99966874_G_A', pValueMantissa=9.85099983215332, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133219, standardError=0.00195661, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99967655_A_C', pValueMantissa=4.085999965667725, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132548, standardError=0.00191151, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99968174_T_C', pValueMantissa=6.03000020980835, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131184, standardError=0.00200495, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99970196_C_T', pValueMantissa=1.940999984741211, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0134874, standardError=0.00191619, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99971874_A_G', pValueMantissa=4.980999946594238, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132011, standardError=0.0019115, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99973080_C_CATTT', pValueMantissa=9.527999877929688, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00936982, standardError=0.00191176, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99973080_CATTT_C', pValueMantissa=3.311000108718872, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.013347, standardError=0.00191661, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99973360_A_G', pValueMantissa=1.6030000448226929, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0940323, standardError=0.0390452, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99973664_T_C', pValueMantissa=4.126999855041504, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.013249, standardError=0.00191107, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99973694_T_C', pValueMantissa=3.937000036239624, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.13666, standardError=0.0663293, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99973733_A_G', pValueMantissa=4.34499979019165, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132363, standardError=0.00191125, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99974260_C_T', pValueMantissa=2.316999912261963, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0316527, standardError=0.0139405, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99975197_T_C', pValueMantissa=2.117000102996826, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0134651, standardError=0.00191633, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99975489_C_G', pValueMantissa=5.743000030517578, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0354502, standardError=0.0128345, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99976504_G_A', pValueMantissa=8.508999824523926, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0819298, standardError=0.031138, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99977161_A_G', pValueMantissa=4.061999797821045, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132532, standardError=0.00191106, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99977721_G_A', pValueMantissa=6.2829999923706055, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0246959, standardError=0.00903733, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99979375_G_A', pValueMantissa=3.937000036239624, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.13666, standardError=0.0663293, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99979565_C_T', pValueMantissa=4.479000091552734, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0275515, standardError=0.0137302, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99979592_G_A', pValueMantissa=3.3989999294281006, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133022, standardError=0.00191119, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99980479_A_C', pValueMantissa=7.7230000495910645, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0930047, standardError=0.0349125, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99980767_T_TG', pValueMantissa=3.1600000858306885, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133205, standardError=0.001911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99981303_T_C', pValueMantissa=1.0260000228881836, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0129016, standardError=0.0019962, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99982408_C_T', pValueMantissa=4.807000160217285, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131845, standardError=0.00200471, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99982941_G_A', pValueMantissa=3.236999988555908, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133135, standardError=0.00191092, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99983547_G_T', pValueMantissa=1.378000020980835, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0138444, standardError=0.00204793, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99983965_C_T', pValueMantissa=1.3339999914169312, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.276188, standardError=0.0722979, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99984671_A_G', pValueMantissa=1.090999960899353, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.012885, standardError=0.00199651, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99984912_C_T', pValueMantissa=3.328000068664551, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133085, standardError=0.00191128, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99986015_T_C', pValueMantissa=1.0609999895095825, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.012891, standardError=0.00199615, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99988026_G_T', pValueMantissa=7.709000110626221, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0122774, standardError=0.0046077, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99988174_G_A', pValueMantissa=3.121999979019165, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133243, standardError=0.0019111, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99988593_A_C', pValueMantissa=4.09499979019165, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0833533, standardError=0.0407789, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99988730_T_C', pValueMantissa=3.1760001182556152, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133191, standardError=0.00191101, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99988731_G_A', pValueMantissa=5.669000148773193, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0144037, standardError=0.00520681, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99988980_A_G', pValueMantissa=4.874000072479248, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131807, standardError=0.00200475, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99989148_T_C', pValueMantissa=4.875, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131806, standardError=0.00200475, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99989685_T_C', pValueMantissa=4.556000232696533, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132006, standardError=0.00200472, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99989906_C_T', pValueMantissa=3.2639999389648438, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133133, standardError=0.00191121, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99990445_G_A', pValueMantissa=6.452000141143799, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.090219, standardError=0.0331213, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99990521_G_A', pValueMantissa=2.9019999504089355, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.013345, standardError=0.00191124, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99990640_T_A', pValueMantissa=1.5579999685287476, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0139907, standardError=0.00442234, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99992006_G_GA', pValueMantissa=3.0260000228881836, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133329, standardError=0.00191111, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99992015_T_C', pValueMantissa=3.5850000381469727, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.013291, standardError=0.00191165, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99992806_A_G', pValueMantissa=3.0290000438690186, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133327, standardError=0.00191112, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99993453_A_G', pValueMantissa=3.0329999923706055, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133323, standardError=0.00191113, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99993852_T_C', pValueMantissa=3.0360000133514404, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133322, standardError=0.00191113, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99994177_C_T', pValueMantissa=4.533999919891357, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0132032, standardError=0.00200489, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99994349_A_T', pValueMantissa=1.0379999876022339, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0287633, standardError=0.00422921, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99995001_T_C', pValueMantissa=3.2860000133514404, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133152, standardError=0.00191176, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99995032_G_C', pValueMantissa=4.89900016784668, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131854, standardError=0.00200569, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99995714_T_C', pValueMantissa=3.374000072479248, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133048, standardError=0.00191128, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99995753_C_CTT', pValueMantissa=3.372999906539917, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133049, standardError=0.00191128, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99996207_C_G', pValueMantissa=3.384000062942505, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133041, standardError=0.00191129, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99997268_A_G', pValueMantissa=3.378999948501587, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133047, standardError=0.00191132, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99997438_CCA_C', pValueMantissa=6.603000164031982, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.109829, standardError=0.0404341, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99997720_A_G', pValueMantissa=2.815999984741211, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133548, standardError=0.00191149, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99998079_C_CA', pValueMantissa=4.61299991607666, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0922906, standardError=0.0462786, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99998104_G_A', pValueMantissa=1.2979999780654907, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00911911, standardError=0.0018841, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_99999971_A_G', pValueMantissa=2.9570000171661377, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.030431, standardError=0.0102384, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100000235_C_T', pValueMantissa=5.6519999504089355, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.014492, standardError=0.00201036, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100000486_G_A', pValueMantissa=4.172999858856201, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.305198, standardError=0.149885, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100002038_G_A', pValueMantissa=3.7739999294281006, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0211624, standardError=0.0073064, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100002628_A_C', pValueMantissa=2.953000068664551, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0138093, standardError=0.00189258, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100004140_G_A', pValueMantissa=2.6459999084472656, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0994314, standardError=0.0448022, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100004827_A_C', pValueMantissa=6.945000171661377, pValueExponent=-14, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0142296, standardError=0.0019001, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100005233_T_C', pValueMantissa=3.111999988555908, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0216712, standardError=0.00733013, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100005358_G_C', pValueMantissa=4.872000217437744, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0145472, standardError=0.00201239, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100005438_A_G', pValueMantissa=1.093000054359436, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0533162, standardError=0.02095, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100006780_C_T', pValueMantissa=5.067999839782715, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0145408, standardError=0.002013, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100007241_C_T', pValueMantissa=6.783999919891357, pValueExponent=-14, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0142505, standardError=0.0019021, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100007404_G_GC', pValueMantissa=1.3420000076293945, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0198944, standardError=0.00377285, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100008640_A_G', pValueMantissa=5.019999980926514, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0145435, standardError=0.002013, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100009013_G_A', pValueMantissa=6.611999988555908, pValueExponent=-14, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0142475, standardError=0.00190084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100009635_T_G', pValueMantissa=1.0549999475479126, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0134968, standardError=0.0018948, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100011477_C_G', pValueMantissa=2.444999933242798, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0148805, standardError=0.00203232, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100011685_CAAAT_C', pValueMantissa=4.139999866485596, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101715, standardError=0.00185449, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100011812_A_T', pValueMantissa=1.2029999494552612, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0141145, standardError=0.00190314, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100013275_A_T', pValueMantissa=2.174999952316284, pValueExponent=-14, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0145924, standardError=0.00191005, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100013744_G_A', pValueMantissa=4.76800012588501, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.51719, standardError=0.261177, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100014566_A_G', pValueMantissa=8.550999641418457, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119513, standardError=0.00358432, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100014970_T_C', pValueMantissa=3.993000030517578, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0146099, standardError=0.00201355, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100015400_G_A', pValueMantissa=5.646999835968018, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0160357, standardError=0.00222448, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100015645_T_C', pValueMantissa=2.236999988555908, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0145559, standardError=0.00207384, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100015854_C_T', pValueMantissa=6.158999919891357, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0160044, standardError=0.0022238, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100016635_T_G', pValueMantissa=1.718999981880188, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0147181, standardError=0.00218755, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100016645_C_T', pValueMantissa=4.622000217437744, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0256776, standardError=0.00906606, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100017027_A_T', pValueMantissa=6.242000102996826, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0160069, standardError=0.0022247, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100017624_A_G', pValueMantissa=1.7170000076293945, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0147203, standardError=0.00218782, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100018116_C_T', pValueMantissa=2.197000026702881, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0145668, standardError=0.00207465, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100018135_G_C', pValueMantissa=1.4559999704360962, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.275534, standardError=0.0725377, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100018265_A_T', pValueMantissa=2.7190001010894775, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00874366, standardError=0.00186392, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100020857_T_C', pValueMantissa=9.112000465393066, pValueExponent=-14, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0156784, standardError=0.00210358, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100021675_C_A', pValueMantissa=3.066999912261963, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0233385, standardError=0.0107983, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100021907_C_T', pValueMantissa=7.229000091552734, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0159723, standardError=0.00222609, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100022313_C_CT', pValueMantissa=6.171999931335449, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0504229, standardError=0.0184125, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100023134_T_G', pValueMantissa=1.3980000019073486, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0268617, standardError=0.0109298, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100024168_G_A', pValueMantissa=2.3359999656677246, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00880598, standardError=0.00186491, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100024869_C_T', pValueMantissa=3.556999921798706, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0162101, standardError=0.00222929, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100025927_T_TA', pValueMantissa=3.621999979019165, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.014426, standardError=0.00207534, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100026878_C_T', pValueMantissa=9.258999824523926, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0185461, standardError=0.00712673, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100027354_G_A', pValueMantissa=1.968000054359436, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.103617, standardError=0.0444235, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100027592_G_C', pValueMantissa=1.965999960899353, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.103629, standardError=0.0444216, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100027903_T_C', pValueMantissa=2.75600004196167, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0384662, standardError=0.0128487, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100028049_C_T', pValueMantissa=4.144000053405762, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.130023, standardError=0.0637634, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100029663_A_G', pValueMantissa=8.883999824523926, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0147895, standardError=0.00206939, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100030853_G_A', pValueMantissa=7.5980000495910645, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0158245, standardError=0.00220759, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100030993_G_A', pValueMantissa=3.437000036239624, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0224965, standardError=0.00768921, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100031808_C_T', pValueMantissa=5.959000110626221, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0212214, standardError=0.00771671, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100032684_A_C', pValueMantissa=5.459000110626221, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.014926, standardError=0.00206922, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100033239_T_A', pValueMantissa=1.3669999837875366, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0463194, standardError=0.0187842, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100033337_C_T', pValueMantissa=9.138999938964844, pValueExponent=-14, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0165264, standardError=0.00221749, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100033635_G_GT', pValueMantissa=1.0110000371932983, pValueExponent=-13, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0164902, standardError=0.00221658, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100035604_T_A', pValueMantissa=3.4110000133514404, pValueExponent=-12, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.01433, standardError=0.002059, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100037190_G_A', pValueMantissa=3.2070000171661377, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0120692, standardError=0.00203849, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100037544_C_T', pValueMantissa=1.6990000009536743, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00996527, standardError=0.00417483, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100037655_A_G', pValueMantissa=6.13100004196167, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0127317, standardError=0.00219016, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100038188_C_T', pValueMantissa=7.738999843597412, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0126728, standardError=0.00219478, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100038589_T_C', pValueMantissa=3.734999895095825, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010628, standardError=0.00193133, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100038648_G_A', pValueMantissa=1.7230000495910645, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00994355, standardError=0.00417478, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100039544_T_C', pValueMantissa=5.021999835968018, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0126227, standardError=0.00202988, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100039545_G_T', pValueMantissa=1.5509999990463257, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130939, standardError=0.00204582, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100039976_C_T', pValueMantissa=1.7209999561309814, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00994495, standardError=0.00417473, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100039997_T_C', pValueMantissa=5.01800012588501, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0126197, standardError=0.00202937, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100040770_T_G', pValueMantissa=3.059999942779541, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.120454, standardError=0.0557085, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100042338_T_C', pValueMantissa=2.61899995803833, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.189007, standardError=0.085007, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100042505_G_A', pValueMantissa=3.4860000610351562, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101879, standardError=0.00482832, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100042848_C_CT', pValueMantissa=4.413000106811523, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0093599, standardError=0.00185336, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100043308_C_T', pValueMantissa=5.076000213623047, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0126162, standardError=0.00202939, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100044357_C_T', pValueMantissa=5.36299991607666, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0127797, standardError=0.00218999, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100044501_A_G', pValueMantissa=1.7130000591278076, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00995148, standardError=0.00417441, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100044818_C_T', pValueMantissa=4.927000045776367, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0204248, standardError=0.00726406, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100044839_G_T', pValueMantissa=4.875, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0126274, standardError=0.00202913, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100045047_A_C', pValueMantissa=4.922999858856201, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0204266, standardError=0.00726404, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100045603_T_A', pValueMantissa=1.6740000247955322, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00998325, standardError=0.00417297, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100045685_C_T', pValueMantissa=1.3309999704360962, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0131383, standardError=0.00204533, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100045864_G_A', pValueMantissa=1.6770000457763672, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00998092, standardError=0.00417295, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100046495_A_G', pValueMantissa=4.683000087738037, pValueExponent=-11, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133938, standardError=0.00203532, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100046569_C_T', pValueMantissa=1.6740000247955322, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0129058, standardError=0.00202014, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100047823_TA_T', pValueMantissa=1.5670000314712524, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0129238, standardError=0.00201974, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100049106_G_A', pValueMantissa=1.690000057220459, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130999, standardError=0.00217423, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100049236_C_T', pValueMantissa=1.690000057220459, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00996986, standardError=0.00417357, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100049902_G_A', pValueMantissa=1.8940000534057617, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130591, standardError=0.00217409, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100050211_A_T', pValueMantissa=2.1419999599456787, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0633556, standardError=0.0275398, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100050359_GAACA_G', pValueMantissa=1.6019999980926514, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0129195, standardError=0.00202015, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100051409_C_G', pValueMantissa=1.7120000123977661, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130944, standardError=0.00217407, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100051504_G_A', pValueMantissa=1.7549999952316284, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00990516, standardError=0.00417051, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100052545_C_G', pValueMantissa=1.5299999713897705, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0129286, standardError=0.00201936, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100052742_T_C', pValueMantissa=1.687000036239624, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00997036, standardError=0.00417237, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100054045_C_T', pValueMantissa=1.3009999990463257, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0129769, standardError=0.0020191, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100054117_T_C', pValueMantissa=4.938000202178955, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0716183, standardError=0.0364414, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100054574_T_G', pValueMantissa=1.6629999876022339, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00999072, standardError=0.00417196, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100055122_T_C', pValueMantissa=6.368000030517578, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0124774, standardError=0.00214876, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100055153_G_A', pValueMantissa=1.6540000438690186, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00999801, standardError=0.0041714, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100055199_C_G', pValueMantissa=1.8609999418258667, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0130598, standardError=0.00217319, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100055816_G_A', pValueMantissa=3.3589999675750732, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0126847, standardError=0.00201947, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100055862_T_G', pValueMantissa=4.1529998779296875, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0446939, standardError=0.0219282, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100056323_C_T', pValueMantissa=4.943999767303467, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0267439, standardError=0.0136118, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100056440_C_T', pValueMantissa=4.486999988555908, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00779499, standardError=0.00388601, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100057584_A_G', pValueMantissa=5.919000148773193, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011644, standardError=0.00200103, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100057785_T_C', pValueMantissa=4.816999912261963, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0128495, standardError=0.00219523, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100057800_C_T', pValueMantissa=1.0049999952316284, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0122603, standardError=0.00200707, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100059207_A_C', pValueMantissa=3.622999906539917, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0143322, standardError=0.00401899, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100059401_T_TAACAAC', pValueMantissa=3.5920000076293945, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00695612, standardError=0.00194938, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100059401_T_TAACAACAAC', pValueMantissa=3.884999990463257, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133237, standardError=0.00644961, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100059657_G_A', pValueMantissa=3.880000114440918, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133275, standardError=0.00645004, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100060102_G_C', pValueMantissa=3.874000072479248, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133327, standardError=0.00645048, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100060272_A_G', pValueMantissa=4.5370001792907715, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0594699, standardError=0.0297173, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100060908_A_G', pValueMantissa=2.742000102996826, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0113896, standardError=0.00204953, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100061298_C_T', pValueMantissa=2.503000020980835, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0150616, standardError=0.00672123, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100061574_G_C', pValueMantissa=2.7809998989105225, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0113862, standardError=0.00204983, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100061627_C_T', pValueMantissa=1.6690000295639038, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0323984, standardError=0.00537545, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100061926_T_C', pValueMantissa=1.1759999990463257, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0122155, standardError=0.00200796, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100062022_C_T', pValueMantissa=9.611000061035156, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0212829, standardError=0.00821882, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100062212_C_T', pValueMantissa=3.7899999618530273, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0133927, standardError=0.00645157, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100062744_C_A', pValueMantissa=3.752000093460083, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0134223, standardError=0.00645282, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100062935_C_T', pValueMantissa=3.7669999599456787, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0134118, standardError=0.00645294, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100063173_T_G', pValueMantissa=2.7899999618530273, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011387, standardError=0.00205019, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100063265_C_T', pValueMantissa=2.61299991607666, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0142012, standardError=0.00638472, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100063639_C_T', pValueMantissa=2.7750000953674316, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0276973, standardError=0.0125851, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100063786_T_C', pValueMantissa=2.9630000591278076, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011365, standardError=0.00205011, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100064214_T_C', pValueMantissa=5.559999942779541, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0128066, standardError=0.00219686, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100064301_T_C', pValueMantissa=2.427999973297119, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0540203, standardError=0.0239811, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100064486_T_C', pValueMantissa=2.484999895095825, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0271265, standardError=0.00428678, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100064533_A_G', pValueMantissa=8.324000358581543, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00771332, standardError=0.00196024, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100064544_T_C', pValueMantissa=8.236000061035156, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00771787, standardError=0.00196013, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100065403_C_G', pValueMantissa=1.7259999513626099, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00799338, standardError=0.00185995, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100066509_T_C', pValueMantissa=3.444999933242798, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0925747, standardError=0.0437751, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100066929_C_A', pValueMantissa=4.581999778747559, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0250808, standardError=0.012559, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100067656_G_A', pValueMantissa=2.6489999294281006, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143151, standardError=0.00645107, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100067862_G_A', pValueMantissa=7.921999931335449, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0119879, standardError=0.00195036, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100068504_C_T', pValueMantissa=6.7170000076293945, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00746427, standardError=0.00187258, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100068843_C_A', pValueMantissa=1.2430000305175781, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115969, standardError=0.00203679, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100068866_G_GT', pValueMantissa=1.312000036239624, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115787, standardError=0.00203689, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100069305_A_T', pValueMantissa=2.6449999809265137, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143181, standardError=0.00645098, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100069570_T_C', pValueMantissa=2.6549999713897705, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0151566, standardError=0.00504359, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100069757_C_T', pValueMantissa=2.5869998931884766, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0145205, standardError=0.00481941, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100070175_A_G', pValueMantissa=2.000999927520752, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0130301, standardError=0.00421666, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100072279_G_C', pValueMantissa=1.2419999837875366, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115973, standardError=0.00203683, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100073428_G_T', pValueMantissa=2.609999895095825, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143509, standardError=0.00645062, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100073963_C_T', pValueMantissa=1.0750000476837158, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116458, standardError=0.00203655, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100074034_T_TC', pValueMantissa=2.005000114440918, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0113741, standardError=0.0020269, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100074335_C_T', pValueMantissa=1.0729999542236328, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116458, standardError=0.00203643, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100074511_G_A', pValueMantissa=4.894000053405762, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0166749, standardError=0.00478271, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100074712_C_T', pValueMantissa=1.2990000247955322, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0487079, standardError=0.0196078, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100075164_G_A', pValueMantissa=1.2369999885559082, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115989, standardError=0.00203683, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100075228_C_T', pValueMantissa=2.6029999256134033, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143577, standardError=0.00645054, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100075751_C_A', pValueMantissa=1.7940000295639038, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0172016, standardError=0.00726754, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100076243_C_A', pValueMantissa=2.6019999980926514, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143585, standardError=0.00645053, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100077499_A_G', pValueMantissa=4.675000190734863, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00753971, standardError=0.00185191, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100077617_C_A', pValueMantissa=8.185999870300293, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.013965, standardError=0.00417311, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100078024_G_A', pValueMantissa=4.400000095367432, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0760544, standardError=0.0267049, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100078160_G_A', pValueMantissa=2.5989999771118164, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143609, standardError=0.00645053, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100078351_A_T', pValueMantissa=5.255000114440918, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0358513, standardError=0.0103395, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100078510_A_C', pValueMantissa=8.102999687194824, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0119828, standardError=0.00195067, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100078923_G_A', pValueMantissa=1.805999994277954, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0171834, standardError=0.00726738, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100079042_G_A', pValueMantissa=2.5889999866485596, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143712, standardError=0.00645058, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100080295_T_C', pValueMantissa=5.934000015258789, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0121315, standardError=0.00195916, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100080362_T_A', pValueMantissa=9.142000198364258, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0117746, standardError=0.00204921, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100081203_A_G', pValueMantissa=7.883999824523926, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0119901, standardError=0.00195049, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100082411_T_C', pValueMantissa=2.5829999446868896, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143766, standardError=0.00645057, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100083012_C_A', pValueMantissa=3.0209999084472656, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0125696, standardError=0.0019959, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100083306_C_T', pValueMantissa=2.5810000896453857, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143789, standardError=0.00645062, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100083442_A_T', pValueMantissa=4.665999889373779, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00754005, standardError=0.0018518, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100083679_C_T', pValueMantissa=2.5810000896453857, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143785, standardError=0.00645064, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100083851_G_A', pValueMantissa=7.6570000648498535, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0119987, standardError=0.00195041, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100083869_GTGGA_G', pValueMantissa=2.5769999027252197, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143832, standardError=0.00645065, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100084120_A_G', pValueMantissa=2.5739998817443848, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143853, standardError=0.00645064, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085028_A_G', pValueMantissa=1.2200000286102295, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116042, standardError=0.00203692, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085123_T_C', pValueMantissa=2.569999933242798, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143893, standardError=0.00645068, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085191_A_C', pValueMantissa=1.0609999895095825, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116492, standardError=0.00203632, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085317_T_C', pValueMantissa=2.6549999713897705, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0151555, standardError=0.00504327, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085356_CT_C', pValueMantissa=2.989000082015991, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.012573, standardError=0.00199591, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085677_T_C', pValueMantissa=7.4720001220703125, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0120068, standardError=0.0019505, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085790_C_T', pValueMantissa=9.621999740600586, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0119266, standardError=0.0019502, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100085791_G_A', pValueMantissa=6.89300012588501, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0076585, standardError=0.00283442, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100086416_A_C', pValueMantissa=4.2779998779296875, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00757727, standardError=0.00185176, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100087170_C_T', pValueMantissa=4.797999858856201, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0249466, standardError=0.0126147, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100088100_T_C', pValueMantissa=1.2690000534057617, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115908, standardError=0.00203699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100088206_G_A', pValueMantissa=1.8480000495910645, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0171185, standardError=0.00726652, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100088307_G_A', pValueMantissa=8.534000396728516, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0258604, standardError=0.00421546, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100088565_A_AGAG', pValueMantissa=2.5799999237060547, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.01438, standardError=0.00645071, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100088687_C_T', pValueMantissa=4.041999816894531, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0124937, standardError=0.00199821, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100088994_G_C', pValueMantissa=4.744999885559082, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.26271, standardError=0.13253, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100089654_C_A', pValueMantissa=1.2940000295639038, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115954, standardError=0.00203897, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100090029_G_T', pValueMantissa=3.2170000076293945, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0322465, standardError=0.00583203, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100093119_C_T', pValueMantissa=3.0880000591278076, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.379152, standardError=0.0909911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100095173_CA_C', pValueMantissa=1.1920000314712524, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116127, standardError=0.00203702, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100095421_C_T', pValueMantissa=4.376999855041504, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00756736, standardError=0.00185174, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100095474_G_C', pValueMantissa=1.0429999828338623, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011657, standardError=0.00203668, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100095491_G_A', pValueMantissa=2.575000047683716, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143845, standardError=0.00645073, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100095666_A_G', pValueMantissa=2.997999906539917, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0125726, standardError=0.001996, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100095766_C_T', pValueMantissa=2.5769999027252197, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143829, standardError=0.00645074, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100096097_G_A', pValueMantissa=4.854000091552734, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.159194, standardError=0.0807015, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100096882_A_T', pValueMantissa=1.875, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0170779, standardError=0.00726586, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100097204_C_G', pValueMantissa=4.326000213623047, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00757235, standardError=0.00185173, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100097630_G_A', pValueMantissa=6.629000186920166, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0183017, standardError=0.0067411, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100097685_G_A', pValueMantissa=2.1630001068115234, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00784104, standardError=0.00341393, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100098591_G_A', pValueMantissa=7.1529998779296875, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00762397, standardError=0.00283455, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100099448_G_A', pValueMantissa=2.5399999618530273, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0110489, standardError=0.00198346, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100100113_A_G', pValueMantissa=2.996000051498413, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.012573, standardError=0.00199604, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100100236_G_A', pValueMantissa=2.565000057220459, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143946, standardError=0.00645086, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100101678_G_A', pValueMantissa=1.065000057220459, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116505, standardError=0.00203676, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100101681_G_A', pValueMantissa=2.572999954223633, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143872, standardError=0.00645085, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100101734_C_T', pValueMantissa=2.805000066757202, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0388904, standardError=0.0177047, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100101865_T_C', pValueMantissa=7.827000141143799, pValueExponent=-6, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00939164, standardError=0.0021011, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100101978_T_A', pValueMantissa=2.565000057220459, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0143954, standardError=0.00645086, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100102891_G_A', pValueMantissa=3.0280001163482666, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0125699, standardError=0.00199607, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100102937_T_C', pValueMantissa=1.0709999799728394, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116485, standardError=0.00203677, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100103034_T_C', pValueMantissa=1.062000036239624, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116512, standardError=0.00203676, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100103075_C_T', pValueMantissa=2.5480000972747803, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0144117, standardError=0.0064509, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100103232_C_T', pValueMantissa=1.059999942779541, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116519, standardError=0.00203676, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100103284_T_C', pValueMantissa=1.2389999628067017, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115998, standardError=0.00203711, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100103427_G_A', pValueMantissa=2.5409998893737793, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0144177, standardError=0.0064504, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100103653_C_A', pValueMantissa=1.2389999628067017, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116, standardError=0.00203711, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100104060_G_C', pValueMantissa=1.0570000410079956, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011653, standardError=0.00203678, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100104416_A_G', pValueMantissa=1.062999963760376, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116511, standardError=0.00203678, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100104538_T_A', pValueMantissa=3.0250000953674316, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0125704, standardError=0.0019961, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100104974_T_C', pValueMantissa=2.5460000038146973, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0144144, standardError=0.00645109, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100105358_T_A', pValueMantissa=1.246000051498413, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115981, standardError=0.00203714, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100105587_G_A', pValueMantissa=1.0700000524520874, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116489, standardError=0.0020368, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100105759_A_T', pValueMantissa=1.253000020980835, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115962, standardError=0.00203715, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100106124_T_C', pValueMantissa=1.0019999742507935, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116787, standardError=0.00203801, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100106127_T_C', pValueMantissa=1.0019999742507935, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116787, standardError=0.00203801, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100106157_C_A', pValueMantissa=2.5399999618530273, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.01442, standardError=0.00645108, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100106705_G_GGCAGAGTAA', pValueMantissa=3.0460000038146973, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0125685, standardError=0.00199613, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100107060_CG_C', pValueMantissa=1.2519999742507935, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115965, standardError=0.00203718, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100107246_C_T', pValueMantissa=1.8339999914169312, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.148529, standardError=0.0629731, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100107759_T_C', pValueMantissa=3.5490000247955322, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00765844, standardError=0.00185209, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100107761_A_G', pValueMantissa=5.459000110626221, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.272006, standardError=0.0786791, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100108013_C_T', pValueMantissa=2.561000108718872, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0243527, standardError=0.00807438, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100108920_T_G', pValueMantissa=9.07699966430664, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011727, standardError=0.00204049, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100109874_G_A', pValueMantissa=4.85099983215332, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0248793, standardError=0.0126104, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100110231_C_T', pValueMantissa=7.883999824523926, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0117687, standardError=0.00203931, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100110702_T_G', pValueMantissa=3.5929999351501465, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0606656, standardError=0.0289201, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100110795_TTA_T', pValueMantissa=6.190999984741211, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118518, standardError=0.00203936, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100111161_G_A', pValueMantissa=2.000999927520752, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0497915, standardError=0.0161136, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100111179_C_T', pValueMantissa=6.201000213623047, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118489, standardError=0.00203896, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100111991_C_A', pValueMantissa=6.7820000648498535, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118202, standardError=0.00203929, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100112419_TA_T', pValueMantissa=5.803999900817871, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118714, standardError=0.00203896, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100112828_C_T', pValueMantissa=2.8289999961853027, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0388309, standardError=0.0177045, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100112930_A_C', pValueMantissa=6.804999828338623, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118191, standardError=0.0020393, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100114067_A_G', pValueMantissa=1.0110000371932983, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104825, standardError=0.00196864, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100115072_C_T', pValueMantissa=1.034999966621399, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104694, standardError=0.00196776, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100115087_C_T', pValueMantissa=5.876999855041504, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118682, standardError=0.00203912, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100115453_T_C', pValueMantissa=1.0360000133514404, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104692, standardError=0.00196776, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100116179_G_C', pValueMantissa=5.88100004196167, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118679, standardError=0.00203913, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100116494_A_T', pValueMantissa=1.0360000133514404, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.010469, standardError=0.00196776, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100116607_C_T', pValueMantissa=1.0160000324249268, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104762, standardError=0.00196781, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100116979_C_G', pValueMantissa=1.0180000066757202, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104755, standardError=0.0019678, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100117013_G_A', pValueMantissa=1.0379999876022339, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104684, standardError=0.00196777, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100118204_T_G', pValueMantissa=1.0509999990463257, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104641, standardError=0.00196778, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100118462_G_A', pValueMantissa=5.9079999923706055, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0118664, standardError=0.00203914, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100118652_C_T', pValueMantissa=1.128999948501587, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0104375, standardError=0.00196763, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100119929_C_G', pValueMantissa=8.840999603271484, pValueExponent=-10, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0121944, standardError=0.0019896, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100120722_G_A', pValueMantissa=5.441999912261963, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0106837, standardError=0.00196527, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100121497_C_T', pValueMantissa=1.5360000133514404, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.573298, standardError=0.23652, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100124748_T_C', pValueMantissa=2.8399999141693115, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0388018, standardError=0.0177043, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100125143_C_CA', pValueMantissa=3.8269999027252197, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0260846, standardError=0.0125892, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100125152_C_A', pValueMantissa=7.866000175476074, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105533, standardError=0.00196517, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100125180_C_T', pValueMantissa=7.633999824523926, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105635, standardError=0.00196509, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100125779_A_G', pValueMantissa=7.78000020980835, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105568, standardError=0.00196509, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100126618_A_C', pValueMantissa=4.631999969482422, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.133295, standardError=0.0668985, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100126748_A_G', pValueMantissa=7.800000190734863, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105558, standardError=0.00196509, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100127494_A_T', pValueMantissa=1.8029999732971191, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0082724, standardError=0.00349777, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100128204_G_GAGAGAA', pValueMantissa=5.4019999504089355, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0107556, standardError=0.00197804, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100128221_A_G', pValueMantissa=6.446000099182129, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0106227, standardError=0.001965, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100128550_G_A', pValueMantissa=8.194000244140625, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105396, standardError=0.00196532, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100128651_T_C', pValueMantissa=7.315000057220459, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105796, standardError=0.00196528, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100128763_C_T', pValueMantissa=7.829999923706055, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105544, standardError=0.00196508, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100128883_C_T', pValueMantissa=8.057000160217285, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0105443, standardError=0.00196508, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100128958_G_A', pValueMantissa=2.124000072479248, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00680841, standardError=0.00295533, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100129035_C_A', pValueMantissa=3.3989999294281006, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0925999, standardError=0.043676, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100129320_C_T', pValueMantissa=2.13100004196167, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00680465, standardError=0.00295531, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100129629_A_C', pValueMantissa=1.8580000400543213, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.148278, standardError=0.0629939, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100129660_T_C', pValueMantissa=8.699000358581543, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115118, standardError=0.00200055, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100129776_G_A', pValueMantissa=1.690000057220459, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0884659, standardError=0.0281741, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100130207_C_T', pValueMantissa=3.569000005722046, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0140895, standardError=0.00483524, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100130573_C_G', pValueMantissa=2.9149999618530273, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00674022, standardError=0.00186053, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100130860_A_G', pValueMantissa=9.043999671936035, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114981, standardError=0.00200046, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100131042_G_A', pValueMantissa=3.619999885559082, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.179459, standardError=0.0503203, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100131063_C_G', pValueMantissa=2.2909998893737793, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00671867, standardError=0.00295325, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100131465_G_T', pValueMantissa=8.934000015258789, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115031, standardError=0.00200061, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100132172_C_G', pValueMantissa=2.128000020980835, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00680593, standardError=0.00295533, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100132602_T_TTAAA', pValueMantissa=4.828000068664551, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00431663, standardError=0.00218576, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100132602_T_TTAAATAAA', pValueMantissa=4.5980000495910645, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0519863, standardError=0.0260513, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100132602_TTAAATAAATAAA_T', pValueMantissa=7.363999843597412, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116156, standardError=0.00200877, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100133539_G_T', pValueMantissa=2.6610000133514404, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00678474, standardError=0.00186073, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100133661_G_A', pValueMantissa=2.6619999408721924, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00678444, standardError=0.00186073, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100134226_T_TAAAC', pValueMantissa=9.397000312805176, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114918, standardError=0.00200162, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100134368_G_T', pValueMantissa=2.305999994277954, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00685323, standardError=0.0018608, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100134849_C_G', pValueMantissa=2.5420000553131104, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00680611, standardError=0.00186063, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100136849_G_A', pValueMantissa=2.4240000247955322, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00682879, standardError=0.00186061, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100137626_G_C', pValueMantissa=4.564000129699707, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.133964, standardError=0.067025, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100139939_C_T', pValueMantissa=2.2850000858306885, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0168361, standardError=0.00739727, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100140109_A_G', pValueMantissa=8.161999702453613, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0116036, standardError=0.00201274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100140433_T_C', pValueMantissa=8.460000038146973, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00745523, standardError=0.00283129, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100140822_C_T', pValueMantissa=4.0370001792907715, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00653853, standardError=0.00184827, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100140993_C_T', pValueMantissa=8.553999900817871, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00744479, standardError=0.00283137, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100141769_G_C', pValueMantissa=4.497000217437744, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.134653, standardError=0.0671606, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100142018_C_T', pValueMantissa=8.651000022888184, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00615783, standardError=0.00184859, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100142970_A_T', pValueMantissa=3.749000072479248, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0279115, standardError=0.00962947, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100143128_C_A', pValueMantissa=2.7290000915527344, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0151084, standardError=0.0050416, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100143200_T_G', pValueMantissa=8.8100004196167, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00751992, standardError=0.00191775, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100143201_T_C', pValueMantissa=8.812999725341797, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00751979, standardError=0.00191775, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100143322_A_G', pValueMantissa=8.62600040435791, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0074353, standardError=0.00283084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100143412_T_C', pValueMantissa=3.306999921798706, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0244099, standardError=0.00588015, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100143655_T_C', pValueMantissa=5.8420000076293945, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0063633, standardError=0.00185041, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100144149_T_G', pValueMantissa=5.2210001945495605, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0106098, standardError=0.00194903, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100144392_A_ACGTG', pValueMantissa=1.059999942779541, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0122106, standardError=0.00200172, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100144587_G_A', pValueMantissa=4.710000038146973, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0106442, standardError=0.00194881, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100144738_C_T', pValueMantissa=3.628999948501587, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00387717, standardError=0.00185181, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100145180_CA_C', pValueMantissa=7.709000110626221, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00754349, standardError=0.00283104, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100145446_T_C', pValueMantissa=4.289000034332275, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00722839, standardError=0.0025309, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100145462_G_A', pValueMantissa=1.184999942779541, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00704211, standardError=0.00279842, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100145533_C_T', pValueMantissa=6.809999942779541, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0688508, standardError=0.0254439, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100145864_T_C', pValueMantissa=2.5959999561309814, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00556169, standardError=0.00184654, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100146576_G_A', pValueMantissa=2.239000082015991, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0102862, standardError=0.0027873, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100147206_C_G', pValueMantissa=2.24399995803833, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0102847, standardError=0.00278728, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100148436_A_G', pValueMantissa=7.232999801635742, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.01072, standardError=0.00317104, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100148745_A_G', pValueMantissa=9.111000061035156, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011124, standardError=0.00335401, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100150256_TG_T', pValueMantissa=1.621000051498413, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0058237, standardError=0.00184761, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100151385_A_G', pValueMantissa=2.200000047683716, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0102977, standardError=0.002787, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100151760_T_G', pValueMantissa=1.680999994277954, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0116223, standardError=0.00369954, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100151918_T_C', pValueMantissa=1.305999994277954, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.01191, standardError=0.00370485, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100152307_T_C', pValueMantissa=9.949999809265137, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108402, standardError=0.00189131, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100152437_C_T', pValueMantissa=1.0140000581741333, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108394, standardError=0.00189222, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100153963_AT_A', pValueMantissa=3.984999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00551895, standardError=0.00191674, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100155294_C_T', pValueMantissa=5.695000171661377, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0370698, standardError=0.0134074, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100155300_C_A', pValueMantissa=3.9560000896453857, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.013611, standardError=0.0066127, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100155337_G_C', pValueMantissa=9.121999740600586, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0129497, standardError=0.00390489, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100155409_G_C', pValueMantissa=1.305999994277954, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.01191, standardError=0.00370485, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100155608_A_G', pValueMantissa=2.359999895095825, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0102538, standardError=0.00278865, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100157105_C_T', pValueMantissa=1.9290000200271606, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114702, standardError=0.00369896, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100157116_T_A', pValueMantissa=1.4279999732971191, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00588326, standardError=0.00184495, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100157609_G_A', pValueMantissa=2.1989998817443848, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0103034, standardError=0.00278846, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100157763_C_T', pValueMantissa=3.859999895095825, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00711467, standardError=0.00343966, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100158037_G_A', pValueMantissa=4.420000076293945, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0259169, standardError=0.00910471, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100158360_A_G', pValueMantissa=1.7910000085830688, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115545, standardError=0.0037, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100158691_C_T', pValueMantissa=6.251999855041504, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0131114, standardError=0.00327532, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100159080_T_C', pValueMantissa=8.289999961853027, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0343476, standardError=0.0102747, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100159144_G_A', pValueMantissa=1.3380000591278076, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00857697, standardError=0.00346768, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100159600_GAAATCAACAATAA_G', pValueMantissa=1.7860000133514404, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115575, standardError=0.0037, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100159706_T_TTTAAA', pValueMantissa=2.239000082015991, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0102922, standardError=0.00278886, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100161011_T_C', pValueMantissa=1.305999994277954, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.01191, standardError=0.00370485, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100161641_G_T', pValueMantissa=1.305999994277954, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.01191, standardError=0.00370485, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100162586_C_T', pValueMantissa=1.5570000410079956, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0117095, standardError=0.00370097, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100162599_A_G', pValueMantissa=1.7910000085830688, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115545, standardError=0.0037, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100164449_A_T', pValueMantissa=8.289999961853027, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0343476, standardError=0.0102747, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100164555_T_C', pValueMantissa=1.8140000104904175, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0162269, standardError=0.00686797, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100164661_T_C', pValueMantissa=1.065000057220459, pValueExponent=-8, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0108199, standardError=0.00189161, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100166335_G_A', pValueMantissa=1.569000005722046, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370099, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100167234_TA_T', pValueMantissa=1.9529999494552612, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114517, standardError=0.00369739, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100167290_A_G', pValueMantissa=1.9539999961853027, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114514, standardError=0.00369739, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100167685_C_T', pValueMantissa=3.9630000591278076, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0526946, standardError=0.0256104, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100167711_G_A', pValueMantissa=1.3270000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011893, standardError=0.00370488, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100168884_G_A', pValueMantissa=1.815000057220459, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.01154, standardError=0.00370004, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100169073_T_A', pValueMantissa=1.659000039100647, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0164986, standardError=0.00688708, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100170367_T_A', pValueMantissa=1.8220000267028809, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115361, standardError=0.00370004, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100170447_T_C', pValueMantissa=1.8200000524520874, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011537, standardError=0.00370004, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100170652_A_G', pValueMantissa=8.133000373840332, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0162704, standardError=0.00614785, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100170743_T_C', pValueMantissa=1.819000005722046, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115379, standardError=0.00370004, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100171278_T_G', pValueMantissa=1.965999960899353, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114445, standardError=0.00369739, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100171408_C_CTATT', pValueMantissa=1.8220000267028809, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115357, standardError=0.00370003, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100171892_C_A', pValueMantissa=4.783999919891357, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0290312, standardError=0.0146713, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100171937_A_T', pValueMantissa=3.7090001106262207, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0264529, standardError=0.0126887, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100172580_C_A', pValueMantissa=1.8209999799728394, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115368, standardError=0.00370004, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100172766_C_T', pValueMantissa=1.3320000171661377, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0118891, standardError=0.00370488, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100173406_G_C', pValueMantissa=3.993000030517578, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0276948, standardError=0.00962055, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100173454_A_G', pValueMantissa=1.8220000267028809, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011536, standardError=0.00370004, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100174322_A_AT', pValueMantissa=1.9670000076293945, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114438, standardError=0.00369738, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100174478_C_T', pValueMantissa=5.164999961853027, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114058, standardError=0.00195247, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100175179_A_G', pValueMantissa=1.968000054359436, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114433, standardError=0.00369738, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100175211_T_C', pValueMantissa=9.72700023651123, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0910994, standardError=0.02762, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100175305_C_T', pValueMantissa=1.8240000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011535, standardError=0.00370003, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100175350_T_C', pValueMantissa=1.8240000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011535, standardError=0.00370003, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100176550_CAAAAGACCGTTTTTA_C', pValueMantissa=1.8869999647140503, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115298, standardError=0.00371041, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100177021_C_G', pValueMantissa=1.8309999704360962, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115307, standardError=0.00370003, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100177437_T_C', pValueMantissa=4.168000221252441, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00536168, standardError=0.00187135, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100177850_C_T', pValueMantissa=7.14300012588501, pValueExponent=-5, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0216516, standardError=0.00545178, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100178264_G_A', pValueMantissa=1.8339999914169312, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011529, standardError=0.00370002, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100178377_T_A', pValueMantissa=3.9769999980926514, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0277079, standardError=0.00962096, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100178824_A_C', pValueMantissa=1.1480000019073486, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0273359, standardError=0.0108138, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100178873_G_C', pValueMantissa=3.313999891281128, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00845972, standardError=0.00288043, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100178957_A_G', pValueMantissa=1.8370000123977661, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011527, standardError=0.00370001, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100181244_T_A', pValueMantissa=9.291000366210938, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0116765, standardError=0.00352639, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100182158_GT_G', pValueMantissa=1.440999984741211, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0118051, standardError=0.00370502, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100182496_T_C', pValueMantissa=2.2899999618530273, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0102733, standardError=0.00278807, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100182799_A_G', pValueMantissa=2.365000009536743, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0102471, standardError=0.00278728, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100183196_T_C', pValueMantissa=1.8519999980926514, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011518, standardError=0.00369996, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100183255_G_A', pValueMantissa=1.3580000400543213, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0118679, standardError=0.0037048, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100183966_G_A', pValueMantissa=1.8609999418258667, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0115126, standardError=0.00369993, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100184256_T_C', pValueMantissa=4.7220001220703125, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.029124, standardError=0.0146772, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100184474_G_A', pValueMantissa=3.7109999656677246, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00543283, standardError=0.00187227, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100184568_G_A', pValueMantissa=2.1500000953674316, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0103173, standardError=0.00278792, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100186257_C_G', pValueMantissa=1.2259999513626099, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00596993, standardError=0.00184663, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100186688_C_T', pValueMantissa=1.9229999780654907, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114751, standardError=0.00369939, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100187980_C_T', pValueMantissa=4.630000114440918, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00813035, standardError=0.00287118, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100189674_T_G', pValueMantissa=9.279999732971191, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.126539, standardError=0.0486397, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100190351_G_A', pValueMantissa=4.619999885559082, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00813223, standardError=0.00287113, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100190395_G_A', pValueMantissa=2.0, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0760922, standardError=0.024623, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100192759_T_C', pValueMantissa=1.9559999704360962, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114555, standardError=0.00369915, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100193524_C_T', pValueMantissa=9.555000305175781, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0122529, standardError=0.0037093, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100193948_A_C', pValueMantissa=3.0179998874664307, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115477, standardError=0.00194714, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100195245_A_T', pValueMantissa=4.635000228881836, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0081285, standardError=0.00287091, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100195684_C_CA', pValueMantissa=4.2270002365112305, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.137066, standardError=0.0674911, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100195855_G_A', pValueMantissa=4.870999813079834, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00807719, standardError=0.00286889, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100197192_T_C', pValueMantissa=4.242000102996826, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.136954, standardError=0.067486, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100197458_C_A', pValueMantissa=4.629000186920166, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00812979, standardError=0.00287088, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100197612_C_G', pValueMantissa=4.498000144958496, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815595, standardError=0.00287088, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100197640_T_C', pValueMantissa=1.965000033378601, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114501, standardError=0.00369901, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100197675_G_A', pValueMantissa=2.75600004196167, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0386379, standardError=0.0175348, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100197920_A_G', pValueMantissa=2.384999990463257, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0167093, standardError=0.00739523, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100198475_T_C', pValueMantissa=1.9639999866485596, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114509, standardError=0.00369901, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100199282_C_G', pValueMantissa=1.1059999465942383, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0127328, standardError=0.00390313, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100199476_G_A', pValueMantissa=1.319000005722046, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0118973, standardError=0.00370432, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100200167_G_A', pValueMantissa=3.1589999198913574, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0376207, standardError=0.012745, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100201209_C_T', pValueMantissa=1.097000002861023, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00602792, standardError=0.0018465, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100201414_G_A', pValueMantissa=4.788000106811523, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0068162, standardError=0.00344526, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100201633_T_A', pValueMantissa=1.3179999589920044, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0118983, standardError=0.00370432, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100202455_T_C', pValueMantissa=1.9600000381469727, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114531, standardError=0.00369901, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100202467_G_A', pValueMantissa=3.875999927520752, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0277856, standardError=0.00962082, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100202772_A_T', pValueMantissa=1.9600000381469727, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114531, standardError=0.00369901, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100203494_T_C', pValueMantissa=1.9589999914169312, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114534, standardError=0.00369901, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100203542_G_T', pValueMantissa=1.6130000352859497, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00697446, standardError=0.00184854, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100203788_C_A', pValueMantissa=4.110000133514404, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.113144, standardError=0.0394286, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100204779_C_A', pValueMantissa=4.3379998207092285, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0101425, standardError=0.00502088, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100205801_A_G', pValueMantissa=4.5329999923706055, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00814881, standardError=0.00287085, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100206194_A_T', pValueMantissa=1.9570000171661377, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114546, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100206333_A_G', pValueMantissa=4.160999774932861, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0263098, standardError=0.0129133, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100206660_A_G', pValueMantissa=2.4539999961853027, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.113651, standardError=0.0505422, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100206733_C_T', pValueMantissa=1.315999984741211, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119003, standardError=0.00370432, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100207009_TA_T', pValueMantissa=1.9559999704360962, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114549, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100207455_C_G', pValueMantissa=1.305999994277954, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00593547, standardError=0.00184639, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100207653_G_A', pValueMantissa=3.6470000743865967, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114874, standardError=0.00194719, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100207898_CAAGTT_C', pValueMantissa=3.384999990463257, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0921644, standardError=0.0434369, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100208164_G_C', pValueMantissa=3.874000072479248, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0277875, standardError=0.0096208, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100209200_T_C', pValueMantissa=1.315000057220459, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119009, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100210277_C_T', pValueMantissa=1.3040000200271606, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119097, standardError=0.00370434, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100211064_G_A', pValueMantissa=1.9550000429153442, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114558, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100211240_A_G', pValueMantissa=7.230000019073486, pValueExponent=-7, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00932586, standardError=0.00188208, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100212531_T_C', pValueMantissa=3.874000072479248, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0277872, standardError=0.00962078, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100212621_T_C', pValueMantissa=1.6109999418258667, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00697491, standardError=0.00184852, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100213072_C_T', pValueMantissa=1.9529999494552612, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114567, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100213340_T_C', pValueMantissa=1.6119999885559082, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00697479, standardError=0.00184851, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100213631_T_C', pValueMantissa=1.6119999885559082, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00697476, standardError=0.00184851, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100214977_G_T', pValueMantissa=4.242000102996826, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.136966, standardError=0.0674913, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100215512_G_A', pValueMantissa=4.473999977111816, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00816059, standardError=0.0028708, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100215525_C_CA', pValueMantissa=1.9509999752044678, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0114578, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100215535_T_A', pValueMantissa=9.406000137329102, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0122847, standardError=0.00371395, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100215669_A_G', pValueMantissa=1.9509999752044678, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011458, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100216129_G_T', pValueMantissa=1.312000036239624, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119034, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100216744_G_C', pValueMantissa=2.9519999027252197, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115543, standardError=0.00194705, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100217073_T_TCATA', pValueMantissa=4.507999897003174, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815394, standardError=0.00287084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100218126_C_T', pValueMantissa=1.055999994277954, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00604851, standardError=0.00184672, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100218867_A_G', pValueMantissa=3.3550000190734863, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.205837, standardError=0.0968422, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100218929_A_C', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100219439_G_A', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100219496_T_C', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100219653_G_A', pValueMantissa=8.52400016784668, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0210222, standardError=0.00799145, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100219661_T_C', pValueMantissa=7.915999889373779, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0163115, standardError=0.00614227, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100220164_T_C', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100220814_T_TA', pValueMantissa=4.504000186920166, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815468, standardError=0.00287084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100220831_C_A', pValueMantissa=4.275000095367432, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.136751, standardError=0.0674941, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100221396_C_T', pValueMantissa=2.944000005722046, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115551, standardError=0.00194705, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100221486_T_G', pValueMantissa=1.3890000581741333, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0160699, standardError=0.00653242, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100221517_C_A', pValueMantissa=2.5439999103546143, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00413576, standardError=0.00185074, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100221594_C_G', pValueMantissa=2.0799999237060547, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0779801, standardError=0.0337333, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100223656_G_A', pValueMantissa=3.621000051498413, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114895, standardError=0.00194717, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100223788_T_C', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100224059_G_A', pValueMantissa=3.621000051498413, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114895, standardError=0.00194717, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100224086_A_ACT', pValueMantissa=1.0759999752044678, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00473991, standardError=0.00185855, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100224086_A_ACTCTCTCT', pValueMantissa=9.140999794006348, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0123865, standardError=0.00373569, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100224729_G_A', pValueMantissa=9.512999534606934, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0122608, standardError=0.00371028, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100224788_C_T', pValueMantissa=4.552000045776367, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00814496, standardError=0.00287083, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100224846_CA_C', pValueMantissa=4.505000114440918, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815457, standardError=0.00287084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100225330_C_T', pValueMantissa=4.679999828338623, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00811972, standardError=0.0028709, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100225460_G_A', pValueMantissa=1.4520000219345093, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0138989, standardError=0.00568655, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100227298_T_G', pValueMantissa=4.505000114440918, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815457, standardError=0.00287084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100227436_A_C', pValueMantissa=2.1089999675750732, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0204774, standardError=0.00887858, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100227710_T_C', pValueMantissa=2.624000072479248, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115932, standardError=0.00194728, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100228161_T_C', pValueMantissa=1.1180000305175781, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0127208, standardError=0.00390316, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100228188_C_G', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100228581_G_GGGA', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100228620_A_G', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100229692_G_T', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100229731_G_C', pValueMantissa=4.275000095367432, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.136751, standardError=0.0674941, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100230118_T_C', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100230958_C_A', pValueMantissa=3.694000005722046, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0264656, standardError=0.0126847, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100231193_T_TTTG', pValueMantissa=2.11299991607666, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0113217, standardError=0.00368321, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100231368_T_A', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100231561_G_GT', pValueMantissa=4.546000003814697, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815129, standardError=0.00287266, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100231791_G_C', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100233804_CAG_C', pValueMantissa=3.674999952316284, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0114848, standardError=0.00194718, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100235720_T_G', pValueMantissa=4.255000114440918, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0683751, standardError=0.0337137, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100236254_A_G', pValueMantissa=1.8240000009536743, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.621107, standardError=0.263096, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100236529_C_T', pValueMantissa=1.5670000314712524, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00698791, standardError=0.00184852, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100236538_A_G', pValueMantissa=1.5640000104904175, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00698861, standardError=0.00184852, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100236603_C_T', pValueMantissa=3.1489999294281006, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115356, standardError=0.00194738, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100237030_C_T', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100237733_G_A', pValueMantissa=4.505000114440918, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815457, standardError=0.00287084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100237772_G_A', pValueMantissa=9.51200008392334, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0122572, standardError=0.00370917, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100237893_A_T', pValueMantissa=4.275000095367432, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.136751, standardError=0.0674941, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100238295_A_G', pValueMantissa=4.505000114440918, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815457, standardError=0.00287084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100238581_A_G', pValueMantissa=6.859000205993652, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0255013, standardError=0.00751108, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100238657_T_C', pValueMantissa=2.809000015258789, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.01157, standardError=0.00194702, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100239610_G_T', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100239637_A_G', pValueMantissa=1.1369999647140503, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00600877, standardError=0.00184638, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100239850_T_C', pValueMantissa=1.9490000009536743, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011459, standardError=0.003699, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100239989_G_A', pValueMantissa=3.2009999752044678, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011529, standardError=0.00194715, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100240726_T_G', pValueMantissa=4.000999927520752, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0525952, standardError=0.0256104, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100240944_C_T', pValueMantissa=2.8889999389648438, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0115616, standardError=0.00194713, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100242014_C_T', pValueMantissa=4.505000114440918, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00815457, standardError=0.00287084, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100242478_A_G', pValueMantissa=9.51200008392334, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0122572, standardError=0.00370917, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100242630_C_T', pValueMantissa=1.253999948501587, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011921, standardError=0.00369498, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100242920_TGA_T', pValueMantissa=4.275000095367432, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.136751, standardError=0.0674941, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100243390_G_A', pValueMantissa=8.79800033569336, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0252612, standardError=0.00439142, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100244149_G_C', pValueMantissa=3.316999912261963, pValueExponent=-9, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.011525, standardError=0.0019484, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100244398_T_C', pValueMantissa=3.694000005722046, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00834099, standardError=0.00287304, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100244540_A_G', pValueMantissa=1.8240000009536743, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.621107, standardError=0.263096, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100244650_T_C', pValueMantissa=1.8240000009536743, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.621107, standardError=0.263096, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100244773_C_G', pValueMantissa=1.8240000009536743, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.621107, standardError=0.263096, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100244991_G_C', pValueMantissa=1.8240000009536743, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.621107, standardError=0.263096, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100245546_T_C', pValueMantissa=1.472000002861023, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0117796, standardError=0.00370405, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100245863_C_T', pValueMantissa=1.472000002861023, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0117796, standardError=0.00370405, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100246645_C_T', pValueMantissa=3.694000005722046, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00834099, standardError=0.00287304, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100246672_T_C', pValueMantissa=1.1399999856948853, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00600727, standardError=0.00184638, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100246924_C_T', pValueMantissa=3.4030001163482666, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0281666, standardError=0.00961714, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100247616_T_C', pValueMantissa=3.694000005722046, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00834089, standardError=0.00287304, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100248412_C_G', pValueMantissa=1.4470000267028809, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00702484, standardError=0.00184863, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100248726_A_G', pValueMantissa=3.694999933242798, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00834082, standardError=0.00287304, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100249239_A_C', pValueMantissa=2.0339999198913574, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0353474, standardError=0.0152353, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100249267_A_T', pValueMantissa=1.472000002861023, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0117796, standardError=0.00370405, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100249476_T_TC', pValueMantissa=3.694999933242798, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00834082, standardError=0.00287304, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100249705_T_C', pValueMantissa=1.472000002861023, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0117796, standardError=0.00370405, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100250941_T_C', pValueMantissa=1.8240000009536743, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.621107, standardError=0.263096, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100250976_T_A', pValueMantissa=6.709000110626221, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0690304, standardError=0.0254633, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100251390_T_A', pValueMantissa=2.9049999713897705, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0055723, standardError=0.00187139, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100251454_A_G', pValueMantissa=1.1790000200271606, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00712036, standardError=0.00184919, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100251790_C_T', pValueMantissa=3.497999906539917, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00593413, standardError=0.00281426, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100252662_G_A', pValueMantissa=1.5770000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100252768_G_A', pValueMantissa=1.5770000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100252920_T_G', pValueMantissa=1.5770000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100254207_CCAGA_C', pValueMantissa=3.382999897003174, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0921782, standardError=0.0434383, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100254700_T_C', pValueMantissa=1.3079999685287476, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.0884636, standardError=0.0275222, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100254749_C_A', pValueMantissa=3.940999984741211, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00710422, standardError=0.00344885, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100255058_C_G', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100255162_C_T', pValueMantissa=3.9679999351501465, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00827516, standardError=0.00287265, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100256000_C_A', pValueMantissa=1.5770000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100256287_C_T', pValueMantissa=4.053999900817871, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00825485, standardError=0.00287235, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100256511_G_A', pValueMantissa=1.0700000524520874, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00604016, standardError=0.00184634, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100257612_C_T', pValueMantissa=1.9170000553131104, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0082026, standardError=0.00350204, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100258083_A_G', pValueMantissa=2.7039999961853027, pValueExponent=-2, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0635849, standardError=0.0287601, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100258094_C_T', pValueMantissa=1.5770000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100258332_C_T', pValueMantissa=3.9760000705718994, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0082729, standardError=0.00287251, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100260000_A_G', pValueMantissa=1.6239999532699585, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0116706, standardError=0.00370306, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100260586_A_G', pValueMantissa=3.8919999599456787, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.00829222, standardError=0.00287251, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100260869_C_T', pValueMantissa=4.205999851226807, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0082302, standardError=0.00287541, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100262992_C_T', pValueMantissa=1.5770000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100263055_G_C', pValueMantissa=1.3109999895095825, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.0119039, standardError=0.00370431, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100263140_C_T', pValueMantissa=1.5770000219345093, pValueExponent=-3, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=0.011701, standardError=0.00370274, betaConditioned=None, standardErrorConditioned=None, r2Overall=None), Row(is95CredibleSet=None, is99CredibleSet=None, logABF=None, posteriorProbability=None, variantId='10_100263249_C_T', pValueMantissa=1.1790000200271606, pValueExponent=-4, pValueMantissaConditioned=None, pValueExponentConditioned=None, beta=-0.00711917, standardError=0.00184892, betaConditioned=None, standardErrorConditioned=None, r2Overall=None)], size=1001)]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" } + ], + "source": [ + "window_based_clumped_output = \"gs://ot-team/dsuveges/finngen/2023.10.13_window_clumped_w_locus\"\n", + "\n", + "(\n", + " session.spark.read.parquet(ld_clumped_output)\n", + " .filter(f.size(f.col(\"qualityControls\")) == 0)\n", + "# .show(1, False, True)\n", + " .select(\"locus\", f.size(f.col(\"locus\")).alias(\"size\"))\n", + " .limit(1)\n", + " .collect()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e242acdd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "gist": { + "data": { + "description": "GCS/dsuveges/PICS/2023.10.06 - PICS FINNGEN from top to bottom.ipynb", + "public": false + }, + "id": "" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 5 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/notebooks/Mapping_EFO_finngen.ipynb b/notebooks/Mapping_EFO_finngen.ipynb index 594a4f0ad..1feaf890c 100644 --- a/notebooks/Mapping_EFO_finngen.ipynb +++ b/notebooks/Mapping_EFO_finngen.ipynb @@ -109,15 +109,15 @@ ], "source": [ "import os\n", + "\n", "import hail as hl\n", - "import pyspark.sql.functions as f\n", "import pandas as pd\n", - "pd.set_option('display.max_colwidth', None)\n", - "pd.set_option('display.expand_frame_repr', False)\n", "\n", "from gentropy.common.session import Session\n", "from gentropy.dataset.study_index import StudyIndex\n", "\n", + "pd.set_option(\"display.max_colwidth\", None)\n", + "pd.set_option(\"display.expand_frame_repr\", False)\n", "\n", "hail_dir = os.path.dirname(hl.__file__)\n", "session = Session(hail_home=hail_dir, start_hail=True, extended_spark_conf={\"spark.driver.memory\": \"12g\",\n", @@ -228,10 +228,7 @@ ] } ], - "source": [ - "print(si_old.count())\n", - "print(si_new_df.count())" - ] + "source": [] }, { "cell_type": "code", @@ -297,8 +294,8 @@ } ], "source": [ - "si_old = si_old.dropDuplicates(['trait_reported_low'])\n", - "joined_df = si_new_df.join(si_old, \"trait_reported_low\", how='left')\n", + "si_old = si_old.dropDuplicates([\"trait_reported_low\"])\n", + "joined_df = si_new_df.join(si_old, \"trait_reported_low\", how=\"left\")\n", "joined_df.count()" ] }, @@ -379,8 +376,7 @@ } ], "source": [ - "num_non_null_rows = joined_df.filter(joined_df.trait_efos.isNotNull()).count()\n", - "print(num_non_null_rows)" + "num_non_null_rows = joined_df.filter(joined_df.trait_efos.isNotNull()).count()" ] }, { @@ -529,8 +525,7 @@ } ], "source": [ - "column_type = dict(joined_df.dtypes)[\"traitFromSourceMappedIds\"]\n", - "print(column_type)" + "column_type = dict(joined_df.dtypes)[\"traitFromSourceMappedIds\"]" ] }, { @@ -619,8 +614,7 @@ } ], "source": [ - "column_type = dict(joined_df.dtypes)[\"traitFromSourceMappedIds\"]\n", - "print(column_type)" + "column_type = dict(joined_df.dtypes)[\"traitFromSourceMappedIds\"]" ] }, { diff --git a/notebooks/Productionizing_LD_matrix.ipynb b/notebooks/Productionizing_LD_matrix.ipynb deleted file mode 100644 index d02f7c8fb..000000000 --- a/notebooks/Productionizing_LD_matrix.ipynb +++ /dev/null @@ -1,1504 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "baf71347", - "metadata": {}, - "source": [ - "# Extracting square sub-matrices from reference\n", - "\n", - "\n", - "### Conceptual considerations:\n", - "\n", - "```\n", - "Summary statistics\n", - "↓\n", - "[window based clumping]\n", - "[ld based clumping]\n", - "↓\n", - "StudyLocus\n", - "↓\n", - "[ld matrix extraction] <- StudyIndex (studyId, ldPopulations), ldIndex, ldMatrix\n", - "↓\n", - "\n", - "```\n", - "\n", - "\n", - "- Assume we have only one ancestry" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "d75ac655", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-25T11:18:41.257715Z", - "start_time": "2023-10-25T11:18:14.728571Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "SLF4J: No SLF4J providers were found.\n", - "SLF4J: Defaulting to no-operation (NOP) logger implementation\n", - "SLF4J: See https://www.slf4j.org/codes.html#noProviders for further details.\n", - "SLF4J: Class path contains SLF4J bindings targeting slf4j-api versions 1.7.x or earlier.\n", - "SLF4J: Ignoring binding found at [jar:file:/usr/lib/spark/jars/log4j-slf4j-impl-2.18.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]\n", - "SLF4J: See https://www.slf4j.org/codes.html#ignoredBindings for an explanation.\n", - "Setting default log level to \"WARN\".\n", - "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", - "pip-installed Hail requires additional configuration options in Spark referring\n", - " to the path to the Hail Python module directory HAIL_DIR,\n", - " e.g. /path/to/python/site-packages/hail:\n", - " spark.jars=HAIL_DIR/backend/hail-all-spark.jar\n", - " spark.driver.extraClassPath=HAIL_DIR/backend/hail-all-spark.jar\n", - " spark.executor.extraClassPath=./hail-all-spark.jarRunning on Apache Spark version 3.3.0\n", - "SparkUI available at http://ds-genetics-etl-test-m.c.open-targets-eu-dev.internal:37397\n", - "Welcome to\n", - " __ __ <>__\n", - " / /_/ /__ __/ /\n", - " / __ / _ `/ / /\n", - " /_/ /_/\\_,_/_/_/ version 0.2.122-be9d88a80695\n", - "LOGGING: writing to /dev/null\n" - ] - } - ], - "source": [ - "from gentropy.common.session import Session\n", - "from gentropy.dataset.study_locus import StudyLocus\n", - "from gentropy.dataset.study_index import StudyIndex\n", - "from gentropy.datasource.gnomad.ld import GnomADLDMatrix\n", - "from gentropy.datasource.finngen.study_index import FinnGenStudyIndex\n", - "\n", - "import hail as hl\n", - "from hail.linalg import BlockMatrix\n", - "\n", - "from pyspark.sql import Window\n", - "import pyspark.sql.functions as f\n", - "from pyspark.sql import Column, DataFrame\n", - "\n", - "from urllib.request import urlopen\n", - "\n", - "\n", - "session = Session(hail_home='/opt/conda/miniconda3/lib/python3.10/site-packages/hail', start_hail=True)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "9eac3419", - "metadata": {}, - "source": [ - "## Generate FINNGEN Study table\n", - "\n", - "- Executed in 2.22s\n", - "- Building a Finngen study table from source." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "51b4cd9c", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-25T11:19:13.439965Z", - "start_time": "2023-10-25T11:19:11.603603Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-RECORD 0--------------------------------------------------------------------------------------------------------------------\n", - " studyId | FINNGEN_R9_AB1_ACTINOMYCOSIS \n", - " traitFromSource | Actinomycosis \n", - " nCases | 93 \n", - " nControls | 332343 \n", - " nSamples | 332436 \n", - " projectId | FINNGEN_R9 \n", - " studyType | gwas \n", - " hasSumstats | true \n", - " initialSampleSize | 377,277 (210,870 females and 166,407 males) \n", - " discoverySamples | [{377277, Finnish}] \n", - " summarystatsLocation | https://storage.googleapis.com/finngen-public-data-r9/summary_stats/finngen_R9_AB1_ACTINOMYCOSIS.gz \n", - " ldPopulationStructure | [{fin, 1.0}] \n", - "only showing top 1 row\n", - "\n", - "+--------------------+------------+\n", - "| studyId|ldPopulation|\n", - "+--------------------+------------+\n", - "|FINNGEN_R9_ASTHMA...| fin|\n", - "|FINNGEN_R9_E4_PIT...| fin|\n", - "|FINNGEN_R9_E4_VIT...| fin|\n", - "|FINNGEN_R9_H8_MID...| fin|\n", - "|FINNGEN_R9_L12_PI...| fin|\n", - "|FINNGEN_R9_M13_HA...| fin|\n", - "|FINNGEN_R9_M13_SP...| fin|\n", - "|FINNGEN_R9_Q17_LV...| fin|\n", - "|FINNGEN_R9_ALCOHO...| fin|\n", - "|FINNGEN_R9_CHRONL...| fin|\n", - "|FINNGEN_R9_C_DIFF...| fin|\n", - "|FINNGEN_R9_E4_OBE...| fin|\n", - "|FINNGEN_R9_GOUT_S...| fin|\n", - "|FINNGEN_R9_I9_LYM...| fin|\n", - "|FINNGEN_R9_I9_VAR...| fin|\n", - "|FINNGEN_R9_J10_CH...| fin|\n", - "|FINNGEN_R9_K11_HY...| fin|\n", - "|FINNGEN_R9_K11_RE...| fin|\n", - "|FINNGEN_R9_M13_OS...| fin|\n", - "|FINNGEN_R9_O15_PR...| fin|\n", - "+--------------------+------------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ], - "source": [ - "# Processing studies:\n", - "def update_population(ld_pop: Column) -> Column:\n", - " return f.when(\n", - " f.size(ld_pop) == 1,\n", - " ld_pop.getItem(0).ldPopulation\n", - " )\n", - "\n", - "\n", - "# Read the JSON data from the URL.\n", - "json_data = urlopen('https://r9.finngen.fi/api/phenos').read().decode(\"utf-8\")\n", - "rdd = session.spark.sparkContext.parallelize([json_data])\n", - "df = session.spark.read.json(rdd)\n", - "\n", - "\n", - "finngen_release_prefix = 'FINNGEN_R9'\n", - "finngen_summary_stats_url_prefix = 'https://storage.googleapis.com/finngen-public-data-r9/summary_stats/finngen_R9_'\n", - "finngen_summary_stats_url_suffix = '.gz'\n", - "\n", - "# Parse the study index data.\n", - "studies = FinnGenStudyIndex.from_source(\n", - " df, \n", - " finngen_release_prefix, \n", - " finngen_summary_stats_url_prefix, \n", - " finngen_summary_stats_url_suffix\n", - ")\n", - "\n", - "studies.df.show(1, False, True)\n", - "\n", - "# From the study table we only need study identifier and the ld population\n", - "# We only need ld population if the study is based on single ancestry.\n", - "studies = (\n", - " studies.df\n", - " .select(\n", - " 'studyId',\n", - " update_population(f.col('ldPopulationStructure')).alias('ldPopulation'), \n", - " )\n", - " .distinct()\n", - " .persist()\n", - ")\n", - "\n", - "studies.show()" - ] - }, - { - "cell_type": "markdown", - "id": "6452e680", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-24T10:24:17.267975Z", - "start_time": "2023-10-24T10:24:11.432842Z" - } - }, - "source": [ - "## Reading distance and ld clumped Finngen dataset\n", - "\n", - "- Extract two random loci for prototyping purposes.\n", - "- Input dataset: 19,005 study locus from 1,387 studies." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "46e851ca", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-25T11:25:04.824517Z", - "start_time": "2023-10-25T11:25:00.848659Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 63:===================================================> (33 + 3) / 36]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+-----------------------------------------+---------------+--------------+--------------+\n", - "|studyId |variantId |pValueMantissa|pValueExponent|\n", - "+-----------------------------------------+---------------+--------------+--------------+\n", - "|FINNGEN_R9_C3_BASAL_CELL_CARCINOMA_EXALLC|15_29035680_G_A|2.076 |-21 |\n", - "|FINNGEN_R9_G6_SLEEPAPNO |16_53771295_C_A|2.149 |-26 |\n", - "+-----------------------------------------+---------------+--------------+--------------+\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "sl = StudyLocus(\n", - " _df= (\n", - " session.spark.read\n", - " .parquet('gs://genetics_etl_python_playground/XX.XX/output/python_etl/parquet/finngen/2023.10.13_ld_clumped_w_locus/')\n", - " .filter(\n", - " (f.size(f.col('qualityControls')) == 0) &\n", - " (f.size(f.col('locus')) > 100)\n", - " )\n", - " .orderBy(f.rand(seed=23))\n", - " .limit(2)\n", - " ),\n", - " _schema=StudyLocus.get_schema()\n", - ").persist()\n", - "\n", - "# Get list of leads and studies:\n", - "sl.df.select('studyId', 'variantId', 'pValueMantissa', 'pValueExponent', ).show(truncate=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 148, - "id": "661d9633", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T11:16:21.754731Z", - "start_time": "2023-10-27T11:16:20.553762Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+---------------+---------+---------+---------+-----------------------------------+\n", - "|studyLocusId |variantId |pValue |locusSize|ldSetSize|qualityControls |\n", - "+--------------------+---------------+---------+---------+---------+-----------------------------------+\n", - "|-6067699795164145074|15_27427129_A_G|9.592E-13|738 |35 |[] |\n", - "|-8027743839728879857|15_27983407_C_T|4.618E-61|837 |78 |[] |\n", - "|4889075444063922922 |15_28519016_G_A|2.786E-33|277 |-1 |[Variant not found in LD reference]|\n", - "|2224601896262245870 |15_29035680_G_A|2.076E-21|647 |5 |[] |\n", - "|-7676564604508510836|15_48134287_A_G|3.572E-12|442 |34 |[] |\n", - "+--------------------+---------------+---------+---------+---------+-----------------------------------+\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "# Let's get all top-loci from this study on chromosome 15:\n", - "(\n", - " session.spark.read\n", - " .parquet('gs://genetics_etl_python_playground/XX.XX/output/python_etl/parquet/finngen/2023.10.13_ld_clumped_w_locus/')\n", - " .filter(\n", - " (f.col('studyId') == 'FINNGEN_R9_C3_BASAL_CELL_CARCINOMA_EXALLC') &\n", - " (f.col('chromosome') == '15') \n", - "# f.col('ldSet').isNotNull()\n", - " )\n", - " .orderBy(f.col('position'))\n", - " .select(\n", - " 'studyLocusId',\n", - " 'variantId', \n", - " f.concat_ws('E','pValueMantissa', 'pValueExponent').alias('pValue'), \n", - "# 'studyLocusId', \n", - " f.size(f.col('locus')).alias('locusSize'),\n", - " f.size(f.col('ldSet')).alias('ldSetSize'),\n", - " f.col('qualityControls')\n", - " )\n", - "# .count()\n", - " .show(20, truncate=False)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d101a072", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "attachments": { - "image.png": { - "image/png": "" - } - }, - "cell_type": "markdown", - "id": "9db8b8ab", - "metadata": {}, - "source": [ - "## Comparing with Finngen finemapping.\n", - "\n", - "The same [trait](https://r9.finngen.fi/pheno/C3_BASAL_CELL_CARCINOMA_EXALLC) has 3 finemapped credible sets.\n", - "\n", - "![image.png](attachment:image.png)\n", - "\n", - "- `15:27983407:C:T` and `15:27985172:C:T` are very close, I could find `27983407`.\n", - "- Also `15_48134287_A_G` is found by both approach.\n", - "- One of the other three snps was not found in the LD set (`15_28519016_G_A`). \n", - "- `15_27427129_A_G` and `15_29035680_G_A` were resolved in the LD matrix, and were found to be not linked. But they are not in the.\n", - "\n", - "What are the r between these variants?\n", - "\n", - "- 15_27983407_C_T vs 15_27427129_A_G -> r = \n", - "- 15_27983407_C_T vs 15_29035680_G_A -> r =" - ] - }, - { - "cell_type": "code", - "execution_count": 144, - "id": "d8da5802", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T10:19:25.494510Z", - "start_time": "2023-10-27T10:19:23.020596Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---------------+---------+---------+---------+---------------+----------+--------+--------+------------+\n", - "| variantId| pValue|locusSize|ldSetSize|qualityControls|chromosome|position| idx|ldPopulation|\n", - "+---------------+---------+---------+---------+---------------+----------+--------+--------+------------+\n", - "|15_27427129_A_G|9.592E-13| 738| 35| []| 15|27427129|11130387| fin|\n", - "|15_27983407_C_T|4.618E-61| 837| 78| []| 15|27983407|11133253| fin|\n", - "|15_29035680_G_A|2.076E-21| 647| 5| []| 15|29035680|11136878| fin|\n", - "+---------------+---------+---------+---------+---------------+----------+--------+--------+------------+\n", - "\n" - ] - } - ], - "source": [ - "# Extracting and resolving ld:\n", - "resolved_ld_index = (\n", - " session.spark.read\n", - " .parquet('gs://genetics_etl_python_playground/XX.XX/output/python_etl/parquet/finngen/2023.10.13_ld_clumped_w_locus/')\n", - " .filter(\n", - " (f.col('studyId') == 'FINNGEN_R9_C3_BASAL_CELL_CARCINOMA_EXALLC') &\n", - " f.col('variantId').isin(['15_27983407_C_T', '15_27427129_A_G', '15_29035680_G_A'])\n", - " )\n", - " .orderBy(f.col('position'))\n", - " .select(\n", - " 'variantId', \n", - " f.concat_ws('E','pValueMantissa', 'pValueExponent').alias('pValue'), \n", - "# 'studyLocusId', \n", - " f.size(f.col('locus')).alias('locusSize'),\n", - " f.size(f.col('ldSet')).alias('ldSetSize'),\n", - " f.col('qualityControls')\n", - " )\n", - "# .count()\n", - " .join(ld_index, on = 'variantId', how='left')\n", - " .persist()\n", - ")\n", - "\n", - "resolved_ld_index.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 147, - "id": "722eb21d", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T10:24:03.123125Z", - "start_time": "2023-10-27T10:23:32.607310Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-10-27 10:23:48.619 Hail: INFO: Coerced sorted dataset \n", - "[Stage 942:> (0 + 1) / 1]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---+---+------------------+\n", - "|i |j |entry |\n", - "+---+---+------------------+\n", - "|0 |0 |2.0000000000000084|\n", - "|0 |1 |0.4201370447477838|\n", - "|0 |2 |0.0 |\n", - "|1 |0 |0.4201370447477838|\n", - "|1 |1 |2.0000000000000004|\n", - "|1 |2 |0.0 |\n", - "|2 |0 |0.0 |\n", - "|2 |1 |0.0 |\n", - "|2 |2 |1.9999999999999996|\n", - "+---+---+------------------+\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "# Accessing r data from the LD matrix:\n", - "half_matrix = (\n", - " BlockMatrix\n", - " .read(ld_matrix_template.format(POP=ld_population))\n", - " .filter(\n", - " [row['idx'] for row in resolved_ld_index.select('idx').collect()],\n", - " [row['idx'] for row in resolved_ld_index.select('idx').collect()]\n", - " )\n", - ")\n", - "\n", - "# matrix = half_matrix + half_matrix.T\n", - "matrix.entries().to_spark().show(100, truncate=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 153, - "id": "c316e0d0", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T11:18:11.801945Z", - "start_time": "2023-10-27T11:18:06.718163Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---------------+----------+--------------------+--------------------+--------+--------+---------+--------------------------------+--------------------------------+---------------------------+---------------------------+--------------+--------------+-------------------------------+-------------+-------------------+---------------+-----------------+--------------------+--------------------+\n", - "| variantId|chromosome| studyId| studyLocusId|position| beta|oddsRatio|oddsRatioConfidenceIntervalLower|oddsRatioConfidenceIntervalUpper|betaConfidenceIntervalLower|betaConfidenceIntervalUpper|pValueMantissa|pValueExponent|effectAlleleFrequencyFromSource|standardError|subStudyDescription|qualityControls|finemappingMethod| locus| ldSet|\n", - "+---------------+----------+--------------------+--------------------+--------+--------+---------+--------------------------------+--------------------------------+---------------------------+---------------------------+--------------+--------------+-------------------------------+-------------+-------------------+---------------+-----------------+--------------------+--------------------+\n", - "|15_27983407_C_T| 15|FINNGEN_R9_C3_BAS...|-8027743839728879857|27983407|0.413866| null| null| null| 0.388762| 0.43897| 4.618| -61| 0.0443583| 0.025104| null| []| null|[{null, null, nul...|[{15_28482553_T_C...|\n", - "+---------------+----------+--------------------+--------------------+--------+--------+---------+--------------------------------+--------------------------------+---------------------------+---------------------------+--------------+--------------+-------------------------------+-------------+-------------------+---------------+-----------------+--------------------+--------------------+\n", - "\n" - ] - } - ], - "source": [ - "study_locus_id = -8027743839728879857\n", - "\n", - "# Running susie for a specific study locus:\n", - "sl = StudyLocus(\n", - " _df= (\n", - " session.spark.read\n", - " .parquet('gs://genetics_etl_python_playground/XX.XX/output/python_etl/parquet/finngen/2023.10.13_ld_clumped_w_locus/')\n", - " .filter(f.col('studyLocusId') == study_locus_id)\n", - " ),\n", - " _schema=StudyLocus.get_schema()\n", - ").persist()\n", - "\n", - "sl.df.show()" - ] - }, - { - "cell_type": "markdown", - "id": "947d112a", - "metadata": {}, - "source": [ - "## Annotating study locus with ld population + Get list of ancestires" - ] - }, - { - "cell_type": "code", - "execution_count": 154, - "id": "8be0fd89", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T11:20:59.482466Z", - "start_time": "2023-10-27T11:20:59.062418Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-RECORD 0------------------------------------------------\n", - " studyId | FINNGEN_R9_C3_BAS... \n", - " variantId | 15_27983407_C_T \n", - " chromosome | 15 \n", - " studyLocusId | -8027743839728879857 \n", - " position | 27983407 \n", - " beta | 0.413866 \n", - " oddsRatio | null \n", - " oddsRatioConfidenceIntervalLower | null \n", - " oddsRatioConfidenceIntervalUpper | null \n", - " betaConfidenceIntervalLower | 0.388762 \n", - " betaConfidenceIntervalUpper | 0.43897 \n", - " pValueMantissa | 4.618 \n", - " pValueExponent | -61 \n", - " effectAlleleFrequencyFromSource | 0.0443583 \n", - " standardError | 0.025104 \n", - " subStudyDescription | null \n", - " qualityControls | [] \n", - " finemappingMethod | null \n", - " locus | [{null, null, nul... \n", - " ldSet | [{15_28482553_T_C... \n", - " ldPopulation | fin \n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "['fin']" - ] - }, - "execution_count": 154, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Adding ld population to study locus:\n", - "dl_df = (\n", - " sl.df\n", - " .join(studies, on='studyId', how='left')\n", - " .persist()\n", - ")\n", - "\n", - "dl_df.show(1, vertical=True)\n", - "\n", - "# Extract the required ld populations:\n", - "ld_populations = [row['ldPopulation'] for row in dl_df.select('ldPopulation').distinct().collect()]\n", - "\n", - "ld_populations" - ] - }, - { - "cell_type": "markdown", - "id": "b69aad94", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-24T10:34:37.443692Z", - "start_time": "2023-10-24T10:34:37.436917Z" - } - }, - "source": [ - "## Read ld index" - ] - }, - { - "cell_type": "code", - "execution_count": 155, - "id": "5465b9d3", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T11:21:03.778148Z", - "start_time": "2023-10-27T11:21:03.770967Z" - }, - "code_folding": [ - 2 - ] - }, - "outputs": [], - "source": [ - "from gentropy.common.utils import _liftover_loci, convert_gnomad_position_to_ensembl\n", - "\n", - "def _process_ld_indices(\n", - " ld_index_raw: hl.Table, \n", - " grch37_to_grch38_chain_path: str\n", - ") -> DataFrame:\n", - " \"\"\"Creates a look up table between variants and their coordinates in the LD Matrix.\n", - "\n", - " !!! info \"Gnomad's LD Matrix and Index are based on GRCh37 coordinates. This function will lift over the coordinates to GRCh38 to build the lookup table.\"\n", - "\n", - " Args:\n", - " ld_index_raw (hl.Table): LD index table from GnomAD\n", - " grch37_to_grch38_chain_path (str): Path to the chain file used to lift over the coordinates\n", - "\n", - " Returns:\n", - " DataFrame: Look up table between variants in build hg38 and their coordinates in the LD Matrix\n", - " \"\"\"\n", - " ld_index_38 = _liftover_loci(\n", - " ld_index_raw, grch37_to_grch38_chain_path, \"GRCh38\"\n", - " )\n", - "\n", - " return (\n", - " ld_index_38.to_spark()\n", - " # Filter out variants where the liftover failed\n", - " .filter(f.col(\"`locus_GRCh38.position`\").isNotNull())\n", - " .withColumn(\n", - " \"chromosome\", f.regexp_replace(\"`locus_GRCh38.contig`\", \"chr\", \"\")\n", - " )\n", - " # Temporary filter:\n", - " .filter(f.col('chromosome').isin(['16', '15']))\n", - " .withColumn(\n", - " \"position\",\n", - " convert_gnomad_position_to_ensembl(\n", - " f.col(\"`locus_GRCh38.position`\"),\n", - " f.col(\"`alleles`\").getItem(0),\n", - " f.col(\"`alleles`\").getItem(1),\n", - " ),\n", - " )\n", - " .select(\n", - " \"chromosome\",\n", - " \"position\",\n", - " f.concat_ws(\n", - " \"_\",\n", - " f.col(\"chromosome\"),\n", - " f.col(\"position\"),\n", - " f.col(\"`alleles`\").getItem(0),\n", - " f.col(\"`alleles`\").getItem(1),\n", - " ).alias(\"variantId\"),\n", - " f.col(\"idx\"),\n", - " )\n", - " # Filter out ambiguous liftover results: multiple indices for the same variant\n", - " .withColumn(\"count\", f.count(\"*\").over(Window.partitionBy([\"variantId\"])))\n", - " .filter(f.col(\"count\") == 1)\n", - " .drop(\"count\")\n", - " )\n" - ] - }, - { - "cell_type": "markdown", - "id": "15eab28a", - "metadata": {}, - "source": [ - "- Full genome: 5min" - ] - }, - { - "cell_type": "code", - "execution_count": 156, - "id": "e231cc9a", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T11:25:47.523343Z", - "start_time": "2023-10-27T11:21:07.729708Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 975:=================================================>(9996 + 4) / 10000]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+----------+---------+--------------------+--------+------------+\n", - "|chromosome| position| variantId| idx|ldPopulation|\n", - "+----------+---------+--------------------+--------+------------+\n", - "| 15|100068435| 15_100068435_C_T|11488485| fin|\n", - "| 15|100074914| 15_100074914_T_C|11488528| fin|\n", - "| 15|100082112| 15_100082112_C_G|11488600| fin|\n", - "| 15|100084933| 15_100084933_C_T|11488622| fin|\n", - "| 15|100086055| 15_100086055_C_T|11488643| fin|\n", - "| 15|100102338|15_100102338_CGAA...|11488802| fin|\n", - "| 15|100102474| 15_100102474_A_G|11488806| fin|\n", - "| 15|100140453| 15_100140453_G_GAC|11489087| fin|\n", - "| 15|100141050| 15_100141050_G_A|11489092| fin|\n", - "| 15|100149133| 15_100149133_G_T|11489141| fin|\n", - "| 15|100270659| 15_100270659_G_A|11490053| fin|\n", - "| 15|100317282| 15_100317282_T_C|11490464| fin|\n", - "| 15|100376040| 15_100376040_ACTT_A|11490958| fin|\n", - "| 15|100391254| 15_100391254_CA_C|11491027| fin|\n", - "| 15|100394474| 15_100394474_T_C|11491043| fin|\n", - "| 15|100466861| 15_100466861_T_C|11491614| fin|\n", - "| 15|100516200| 15_100516200_T_C|11492028| fin|\n", - "| 15|100525848| 15_100525848_T_G|11492086| fin|\n", - "| 15|100588385| 15_100588385_A_T|11492713| fin|\n", - "| 15|100605649| 15_100605649_A_T|11492813| fin|\n", - "+----------+---------+--------------------+--------+------------+\n", - "only showing top 20 rows\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "grch37_to_grch38_chain_path = 'gs://hail-common/references/grch37_to_grch38.over.chain.gz'\n", - "ld_index_raw_template = 'gs://gcp-public-data--gnomad/release/2.1.1/ld/gnomad.genomes.r2.1.1.{POP}.common.ld.variant_indices.ht'\n", - "ld_matrix_template ='gs://gcp-public-data--gnomad/release/2.1.1/ld/gnomad.genomes.r2.1.1.{POP}.common.adj.ld.bm'\n", - "\n", - "\n", - "ld_population = 'fin'\n", - "ld_index_raw_path = ld_index_raw_template.format(POP=ld_population)\n", - "\n", - "ld_index = (\n", - " _process_ld_indices(\n", - " hl.read_table(ld_index_raw_path),\n", - " grch37_to_grch38_chain_path,\n", - " )\n", - " .withColumn('ldPopulation', f.lit(ld_population))\n", - " .persist()\n", - ")\n", - "\n", - "ld_index.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 157, - "id": "e43d0dbf", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T11:26:04.794300Z", - "start_time": "2023-10-27T11:26:04.691498Z" - } - }, - "outputs": [], - "source": [ - "# how to do the join?\n", - "window_size = 250_000\n", - "tags = (\n", - " # Pre-process study locus:\n", - " f.broadcast(dl_df)\n", - " .select('studyLocusId', 'chromosome', 'position', 'ldPopulation')\n", - " .alias('study_locus')\n", - " .join(\n", - " (\n", - " ld_index\n", - " .selectExpr(*[f\"{col} as ld_index_{col}\" for col in ld_index.columns])\n", - " .alias('ld_index')\n", - " ),\n", - " on = [\n", - " (f.col('ld_index.ld_index_chromosome') == f.col('study_locus.chromosome')) & \n", - " (f.col('ld_index.ld_index_ldPopulation') == f.col('study_locus.ldPopulation')) &\n", - " (f.col('ld_index.ld_index_position') >= f.col('study_locus.position') - window_size) & \n", - " (f.col('ld_index.ld_index_position') <= f.col('study_locus.position') + window_size)\n", - " ],\n", - " how='left'\n", - " )\n", - " .select(\n", - " 'studyLocusId',\n", - " 'chromosome',\n", - " f.col('ld_index_variantId').alias('variantId'),\n", - " f.col('ld_index_idx').alias('idx'),\n", - " f.col('ld_index_position'),\n", - " f.col('ld_index_ldPopulation').alias('ldPopulation')\n", - " )\n", - " .persist()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 158, - "id": "45fb0df5", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T11:26:08.409160Z", - "start_time": "2023-10-27T11:26:06.312358Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 979:============================================> (173 + 27) / 200]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+-----+\n", - "| studyLocusId|count|\n", - "+--------------------+-----+\n", - "|-8027743839728879857| 2079|\n", - "+--------------------+-----+\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "tags.groupby('studyLocusId').count().show()" - ] - }, - { - "cell_type": "code", - "execution_count": 159, - "id": "858894fc", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T11:26:14.204291Z", - "start_time": "2023-10-27T11:26:12.440937Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+----------+--------------+-------------+-------------+---------+--------+--------------+\n", - "|studyLocusId |chromosome|first_position|last_position|window_length|first_idx|last_idx|ldVariantCount|\n", - "+--------------------+----------+--------------+-------------+-------------+---------+--------+--------------+\n", - "|-8027743839728879857|15 |27733460 |28232552 |499092 |11131979 |11134057|2079 |\n", - "+--------------------+----------+--------------+-------------+-------------+---------+--------+--------------+\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "1" - ] - }, - "execution_count": 159, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Collecting region:\n", - "ld_windows = (\n", - " tags\n", - " .orderBy('studyLocusId', 'ld_index_position')\n", - " .groupBy('studyLocusId', 'chromosome', 'ldPopulation')\n", - " .agg(\n", - " f.first(f.col('ld_index_position')).alias('first_position'),\n", - " f.first(f.col('variantId')).alias('first_variantId'),\n", - " f.first(f.col('idx')).alias('first_idx'),\n", - " f.last(f.col('ld_index_position')).alias('last_position'),\n", - " f.last(f.col('variantId')).alias('last_variantId'),\n", - " f.last(f.col('idx')).alias('last_idx'),\n", - " f.size(f.collect_list(f.col('variantId'))).alias('ldVariantCount')\n", - " )\n", - " .withColumn('window_length', f.col('last_position') - f.col('first_position'))\n", - " .persist()\n", - ")\n", - "\n", - "(\n", - " ld_windows\n", - " .select(\n", - " 'studyLocusId',\n", - " 'chromosome',\n", - " 'first_position',\n", - " 'last_position',\n", - " 'window_length',\n", - " 'first_idx',\n", - " 'last_idx',\n", - " 'ldVariantCount'\n", - " )\n", - " .show(truncate=False)\n", - ")\n", - "\n", - "\n", - "ld_windows.count()" - ] - }, - { - "cell_type": "code", - "execution_count": 160, - "id": "61d51efe", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T11:26:22.608155Z", - "start_time": "2023-10-27T11:26:22.421535Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Row(studyLocusId=-8027743839728879857, chromosome='15', ldPopulation='fin', first_position=27733460, first_variantId='15_27733460_AGCCAAACTGGCTCATGGCC_A', first_idx=11131979, last_position=28232552, last_variantId='15_28232552_G_GA', last_idx=11134057, ldVariantCount=2079, window_length=499092)" - ] - }, - "execution_count": 160, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "row = ld_windows.collect()[0]\n", - "row" - ] - }, - { - "cell_type": "code", - "execution_count": 161, - "id": "d4d006bd", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T11:26:31.400420Z", - "start_time": "2023-10-27T11:26:30.851547Z" - } - }, - "outputs": [], - "source": [ - "ld_population = row['ldPopulation']\n", - "study_locus_id = row['studyLocusId']\n", - "chromosome = row['chromosome']\n", - "first_index = row['first_idx']\n", - "last_index = row['last_idx']\n", - "\n", - "# For each row, we need to open the gnomad \n", - "half_matrix = (\n", - " BlockMatrix\n", - " .read(ld_matrix_template.format(POP=ld_population))\n", - " .filter(\n", - " range(first_index, last_index),\n", - " range(first_index, last_index)\n", - " )\n", - ")\n", - "\n", - "matrix = half_matrix + half_matrix.T\n" - ] - }, - { - "cell_type": "code", - "execution_count": 162, - "id": "e24da4ca", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T11:27:24.435664Z", - "start_time": "2023-10-27T11:26:35.945855Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-10-27 11:26:58.169 Hail: INFO: Coerced sorted dataset \n", - " \r" - ] - } - ], - "source": [ - "(\n", - " matrix.entries().to_spark()\n", - " .select(\n", - " (f.col('i')+first_index).alias('idx_i'),\n", - " (f.col('j')+first_index).alias('idx_j'),\n", - " f.when(f.col('i') == f.col('j'), f.col('entry')/2).otherwise(f.col('entry')).alias('r'),\n", - " f.lit(study_locus_id).alias('study_locus_ids')\n", - " )\n", - " # Joining with i:\n", - " .join(\n", - " (\n", - " tags\n", - " .select(\n", - " f.col('variantId').alias('variantIdLeft'), \n", - " f.col('idx').alias('idx_i')\n", - " )\n", - " ), \n", - " on='idx_i', how='outer'\n", - " )\n", - " # Joining with i:\n", - " .join(\n", - " (\n", - " tags\n", - " .select(\n", - " 'chromosome', \n", - " f.col('variantId').alias('variantIdRight'), \n", - " f.col('idx').alias('idx_j')\n", - " )\n", - " ), \n", - " on='idx_j', how='outer'\n", - " )\n", - " .select(\n", - " 'variantIdLeft',\n", - " 'variantIdRight',\n", - " 'study_locus_ids',\n", - " 'r'\n", - " )\n", - " .distinct()\n", - " .write.mode('overwrite').parquet(f'gs://ot-team/dsuveges/ld_matrix-{study_locus_id}')\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 164, - "id": "3cb94848", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T11:28:28.344710Z", - "start_time": "2023-10-27T11:28:27.670811Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-RECORD 0---------------------------------------------------\n", - " studyId | FINNGEN_R9_C3_BASAL_CELL_CARCINOMA_EXALLC \n", - " variantIdLead | 15_27983407_C_T \n", - " studyLocusId | -8027743839728879857 \n", - " variantId | 15_27733459_AGCCAAACTGGCTCATGGCC_A \n", - " pValueMantissa | 1.937 \n", - " pValueExponent | -2 \n", - " beta | 0.0267247 \n", - " standardError | 0.011429 \n", - "only showing top 1 row\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "837" - ] - }, - "execution_count": 164, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "study_locus_id = -8027743839728879857\n", - "\n", - "# Get data:\n", - "ld_matrix = session.spark.read.parquet(f'gs://ot-team/dsuveges/ld_matrix-{study_locus_id}')\n", - "\n", - "# Select one study locus:\n", - "selected_locus = (\n", - " dl_df\n", - " .filter(f.col('studyLocusId') == study_locus_id)\n", - " .select(\n", - " '*',\n", - " f.explode_outer('locus').alias('exploded_locus')\n", - " )\n", - " .select(\n", - " 'studyId',\n", - " f.col('variantId').alias('variantIdLead'),\n", - " f.col('studyLocusId'),\n", - " *[f'exploded_locus.{col}' for col in ['variantId', 'pValueMantissa', 'pValueExponent', 'beta', 'standardError']]\n", - " )\n", - " .persist()\n", - ")\n", - "\n", - "selected_locus.show(1, False, True)\n", - "selected_locus.count()" - ] - }, - { - "cell_type": "code", - "execution_count": 189, - "id": "0e571433", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T13:49:41.356331Z", - "start_time": "2023-10-27T13:49:40.463119Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+--------------------+---------------+---------------+---------------+-------------------+-------------------+----------+------------------+--------------------+\n", - "| studyId| studyLocusId| variantIdLead| variantIdLeft| variantIdRight|pValueMantissaRight|pValueExponentRight| betaRight|standardErrorRight| r|\n", - "+--------------------+--------------------+---------------+---------------+---------------+-------------------+-------------------+----------+------------------+--------------------+\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27792793_C_T|15_27772290_T_C| 9.979| -6| 0.0730063| 0.0165261| 0.3188276023578284|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27950388_C_T|15_27778556_C_T| 1.142| -2| 0.119731| 0.0473309| -0.1845689041174883|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_28104601_A_T|15_27809200_G_A| 1.397| -2|-0.0286999| 0.0116763| 0.01919714154020592|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27970325_G_A|15_27809200_G_A| 1.397| -2|-0.0286999| 0.0116763|-0.12413267238343029|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27899878_T_C|15_27851307_A_G| 5.606| -4|-0.0424098| 0.0122927|-0.45516734696600447|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27914364_C_T|15_27851307_A_G| 5.606| -4|-0.0424098| 0.0122927| -0.4029686644067302|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27828335_G_A|15_27858692_G_A| 3.599| -4| 0.0427448| 0.0119804| 0.09141477739652863|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27845815_A_G|15_27858692_G_A| 3.599| -4| 0.0427448| 0.0119804| 0.7803430284267874|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27820229_G_A|15_27858692_G_A| 3.599| -4| 0.0427448| 0.0119804|-0.08881336794530735|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27825631_G_C|15_27915426_C_A| 1.001| -6| 0.0654293| 0.0133765| 0.13879209804155437|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_28211160_A_T|15_27915426_C_A| 1.001| -6| 0.0654293| 0.0133765| 0.05996001018362227|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_28019021_G_T|15_27915426_C_A| 1.001| -6| 0.0654293| 0.0133765| 0.03843623265438152|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_28081390_A_T|15_27938863_T_A| 2.109| -3| 0.157002| 0.0510665|-0.04189151880774...|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27907664_A_T|15_27938914_G_T| 2.725| -2| 0.0355862| 0.0161172|-0.16859090364294946|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27819940_C_T|15_27938914_G_T| 2.725| -2| 0.0355862| 0.0161172| 0.11697321459267764|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27853556_T_G|15_27938914_G_T| 2.725| -2| 0.0355862| 0.0161172| -0.1404576556679461|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_28052911_T_C|15_27951206_G_A| 1.437| -2| 0.0695541| 0.0284131| 0.04461220378655805|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27790520_G_A|15_27951206_G_A| 1.437| -2| 0.0695541| 0.0284131| 0.04420892575950266|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27879868_C_A|15_27989264_G_T| 2.454| -17| 0.147291| 0.0173896| 0.2404542405419156|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27822551_G_A|15_27989264_G_T| 2.454| -17| 0.147291| 0.0173896|-0.04613729472121665|\n", - "+--------------------+--------------------+---------------+---------------+---------------+-------------------+-------------------+----------+------------------+--------------------+\n", - "only showing top 20 rows\n", - "\n", - "385641\n", - "621.0\n" - ] - }, - { - "data": { - "text/plain": [ - "621" - ] - }, - "execution_count": 189, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Filtering matrix:\n", - "processed_matrix = (\n", - " ld_matrix\n", - " .join(\n", - " selected_locus.select(f.col('variantId').alias('variantIdLeft')),\n", - " on='variantIdLeft', how='inner'\n", - " )\n", - " .join(\n", - " selected_locus.withColumnRenamed('variantId', 'variantIdRight'), \n", - " on='variantIdRight', how='inner'\n", - " )\n", - " .select(\n", - " 'studyId',\n", - " 'studyLocusId',\n", - " 'variantIdLead',\n", - " 'variantIdLeft',\n", - " 'variantIdRight',\n", - " f.col('pValueMantissa').alias('pValueMantissaRight'),\n", - " f.col('pValueExponent').alias('pValueExponentRight'),\n", - " f.col('beta').alias('betaRight'),\n", - " f.col('standardError').alias('standardErrorRight'),\n", - " 'r'\n", - " )\n", - " .persist()\n", - ")\n", - "\n", - "processed_matrix.show()\n", - "print(processed_matrix.count())\n", - "print(sqrt(processed_matrix.count()))\n", - "processed_matrix.select('variantIdRight').distinct().count()" - ] - }, - { - "cell_type": "code", - "execution_count": 167, - "id": "381f093b", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T11:29:01.779404Z", - "start_time": "2023-10-27T11:28:57.285168Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "data": { - "text/plain": [ - "array([[1. , 0.88019644, 0.28333008, 0.25666969, 0.26345801],\n", - " [0.88019644, 1. , 0.26027515, 0.24115952, 0.2442993 ],\n", - " [0.28333008, 0.26027515, 1. , 0.85898573, 0.89168227],\n", - " [0.25666969, 0.24115952, 0.85898573, 1. , 0.96268566],\n", - " [0.26345801, 0.2442993 , 0.89168227, 0.96268566, 1. ]])" - ] - }, - "execution_count": 167, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "pivoted_matrix = (\n", - " processed_matrix\n", - " .orderBy(f.col('variantIdLeft'), f.col('variantIdRight'))\n", - " .groupBy('variantIdLeft')\n", - " .pivot(\"variantIdRight\")\n", - " .agg(f.first('r'))\n", - " .orderBy('variantIdLeft')\n", - " # Convert to numpy array ingested by SuSie:\n", - " .toPandas().set_index('variantIdLeft').to_numpy()\n", - ")\n", - "\n", - "pivoted_matrix[0:5, 0:5]" - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "id": "444e2ca2", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-26T08:49:13.002374Z", - "start_time": "2023-10-26T08:49:12.604741Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 1. , 0.99851182, 0.85089485, ..., 0.04532387,\n", - " 0.02258468, 0.02476075],\n", - " [ 0.99851182, 1. , 0.85111292, ..., 0.0465783 ,\n", - " 0.02253374, 0.02470884],\n", - " [ 0.85089485, 0.85111292, 1. , ..., 0.04748182,\n", - " 0.01191697, 0.01652519],\n", - " ...,\n", - " [ 0.04532387, 0.0465783 , 0.04748182, ..., 1. ,\n", - " -0.05908013, -0.05417476],\n", - " [ 0.02258468, 0.02253374, 0.01191697, ..., -0.05908013,\n", - " 1. , 0.96019982],\n", - " [ 0.02476075, 0.02470884, 0.01652519, ..., -0.05417476,\n", - " 0.96019982, 1. ]])" - ] - }, - "execution_count": 90, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pivoted_matrix.toPandas().set_index('variantIdLeft').to_numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": 190, - "id": "8ed590f1", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T13:50:01.812451Z", - "start_time": "2023-10-27T13:49:58.985895Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "(\n", - " processed_matrix\n", - " # Convert to numpy array ingested by SuSie:\n", - " .write.mode('overwrite').parquet(f'gs://ot-team/dsuveges/processed-ld-matrix-{study_locus_id}')\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 169, - "id": "5fc2f7ac", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T11:30:11.297300Z", - "start_time": "2023-10-27T11:30:09.400659Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "# selected_locus.write.mode('overwrite').parquet('gs://ot-team/dsuveges/selected_studyLocus_2224601896262245870')\n", - "(\n", - " processed_matrix\n", - " .select(\n", - " 'studyId',\n", - " f.col('variantIdRight').alias('variantId'),\n", - " f.col('pValueMantissaRight').alias('pValueMantissa'),\n", - " f.col('pValueExponentRight').alias('pValueExponent'),\n", - " f.col('betaRight').alias('beta'),\n", - " f.col('standardErrorRight').alias('standardError'),\n", - " )\n", - " .distinct()\n", - " .write.mode('overwrite').parquet(f'gs://ot-team/dsuveges/selected_studyLocus_{study_locus_id}')\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 187, - "id": "a4d47348", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T13:33:39.686914Z", - "start_time": "2023-10-27T13:33:39.682284Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1.00, 0.18, 0.18, 0.97, 0.97, 0.15, 0.18, 0.11, 0.13, 0.12\n", - "0.18, 1.00, 0.99, 0.15, 0.15, 0.03, 0.03, 0.02, 0.01, 0.02\n", - "0.18, 0.99, 1.00, 0.15, 0.15, 0.03, 0.03, 0.02, 0.01, 0.02\n", - "0.97, 0.15, 0.15, 1.00, 1.00, 0.14, 0.17, 0.11, 0.12, 0.12\n", - "0.97, 0.15, 0.15, 1.00, 1.00, 0.14, 0.17, 0.11, 0.12, 0.12\n", - "0.15, 0.03, 0.03, 0.14, 0.14, 1.00, 0.32, 0.45, 0.50, 0.25\n", - "0.18, 0.03, 0.03, 0.17, 0.17, 0.32, 1.00, 0.29, 0.33, 0.52\n", - "0.11, 0.02, 0.02, 0.11, 0.11, 0.45, 0.29, 1.00, 0.91, 0.29\n", - "0.13, 0.01, 0.01, 0.12, 0.12, 0.50, 0.33, 0.91, 1.00, 0.33\n", - "0.12, 0.02, 0.02, 0.12, 0.12, 0.25, 0.52, 0.29, 0.33, 1.00\n" - ] - } - ], - "source": [ - "print(\n", - " '\\n'.join(\n", - " [\n", - " ', '.join([f'{value**2:.2f}' for value in row]) \n", - " for row in\n", - " (\n", - " pivoted_matrix\n", - " [[291, 293, 295, 322, 323, 508, 514, 529, 534, 515]]\n", - " [:,[291, 293, 295, 322, 323, 508, 514, 529, 534, 515]]\n", - " )\n", - " ]\n", - " )\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 188, - "id": "f3910534", - "metadata": { - "ExecuteTime": { - "end_time": "2023-10-27T13:45:22.792801Z", - "start_time": "2023-10-27T13:45:22.721043Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+--------------------+---------------+---------------+---------------+-------------------+-------------------+----------+------------------+--------------------+\n", - "| studyId| studyLocusId| variantIdLead| variantIdLeft| variantIdRight|pValueMantissaRight|pValueExponentRight| betaRight|standardErrorRight| r|\n", - "+--------------------+--------------------+---------------+---------------+---------------+-------------------+-------------------+----------+------------------+--------------------+\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27792793_C_T|15_27772290_T_C| 9.979| -6| 0.0730063| 0.0165261| 0.3188276023578284|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27950388_C_T|15_27778556_C_T| 1.142| -2| 0.119731| 0.0473309| -0.1845689041174883|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_28104601_A_T|15_27809200_G_A| 1.397| -2|-0.0286999| 0.0116763| 0.01919714154020592|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27970325_G_A|15_27809200_G_A| 1.397| -2|-0.0286999| 0.0116763|-0.12413267238343029|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27899878_T_C|15_27851307_A_G| 5.606| -4|-0.0424098| 0.0122927|-0.45516734696600447|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27914364_C_T|15_27851307_A_G| 5.606| -4|-0.0424098| 0.0122927| -0.4029686644067302|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27828335_G_A|15_27858692_G_A| 3.599| -4| 0.0427448| 0.0119804| 0.09141477739652863|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27845815_A_G|15_27858692_G_A| 3.599| -4| 0.0427448| 0.0119804| 0.7803430284267874|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27820229_G_A|15_27858692_G_A| 3.599| -4| 0.0427448| 0.0119804|-0.08881336794530735|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27825631_G_C|15_27915426_C_A| 1.001| -6| 0.0654293| 0.0133765| 0.13879209804155437|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_28211160_A_T|15_27915426_C_A| 1.001| -6| 0.0654293| 0.0133765| 0.05996001018362227|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_28019021_G_T|15_27915426_C_A| 1.001| -6| 0.0654293| 0.0133765| 0.03843623265438152|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_28081390_A_T|15_27938863_T_A| 2.109| -3| 0.157002| 0.0510665|-0.04189151880774...|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27907664_A_T|15_27938914_G_T| 2.725| -2| 0.0355862| 0.0161172|-0.16859090364294946|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27819940_C_T|15_27938914_G_T| 2.725| -2| 0.0355862| 0.0161172| 0.11697321459267764|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27853556_T_G|15_27938914_G_T| 2.725| -2| 0.0355862| 0.0161172| -0.1404576556679461|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_28052911_T_C|15_27951206_G_A| 1.437| -2| 0.0695541| 0.0284131| 0.04461220378655805|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27790520_G_A|15_27951206_G_A| 1.437| -2| 0.0695541| 0.0284131| 0.04420892575950266|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27879868_C_A|15_27989264_G_T| 2.454| -17| 0.147291| 0.0173896| 0.2404542405419156|\n", - "|FINNGEN_R9_C3_BAS...|-8027743839728879857|15_27983407_C_T|15_27822551_G_A|15_27989264_G_T| 2.454| -17| 0.147291| 0.0173896|-0.04613729472121665|\n", - "+--------------------+--------------------+---------------+---------------+---------------+-------------------+-------------------+----------+------------------+--------------------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ], - "source": [ - "processed_matrix.show()" - ] - } - ], - "metadata": { - "_draft": { - "nbviewer_url": "https://gist.github.com/DSuveges/df6768b7d5637842e009aa945f3ea062" - }, - "gist": { - "data": { - "description": "Issue-3131-Productionizing_LD_matrix.ipynb", - "public": false - }, - "id": "df6768b7d5637842e009aa945f3ea062" - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/Release_QC_metrics.ipynb b/notebooks/Release_QC_metrics.ipynb index 0052a3cc8..aa0924711 100644 --- a/notebooks/Release_QC_metrics.ipynb +++ b/notebooks/Release_QC_metrics.ipynb @@ -68,9 +68,11 @@ "\"\"\"notebook for release qc metrics.\"\"\"\n", "\n", "import sys\n", - "from gentropy.common.session import Session\n", + "\n", "from pyspark.sql import functions as f\n", "\n", + "from gentropy.common.session import Session\n", + "\n", "sys.path.append(\"../../gentropy/src/\")\n", "release_path=\"../../otg_releases\"\n", "release_ver=\"2403\"\n", @@ -119,7 +121,6 @@ "variant_index=session.spark.read.parquet(variant_index_path, recursiveFileLookup=True)\n", "\n", "# How many variants?\n", - "print(\"Variant index contains \", variant_index.select(f.col(\"variantId\")).distinct().count(), \" unique variants.\")\n", "\n", "# How many variants with MAF>=0.01 for EUR population?\n", "#variant_index.filter(variant_index[\"alleleFrequencies.populationName\"] > 0.05).show(10, False)" @@ -175,9 +176,7 @@ "v2g=session.spark.read.parquet(v2g_path, recursiveFileLookup=True)\n", "\n", "#How many variants?\n", - "print(\"Unique variants in v2g release: \", v2g.select(f.col(\"variantId\")).distinct().count(), \", total variant to gene assignments: \", v2g.count(), \", number of v2g assignments where score > 0.8: \", v2g.filter(v2g[\"score\"] > 0.8).count(), \"(\", round( v2g.filter(v2g[\"score\"] > 0.8).count()/v2g.select(f.col(\"variantId\")).distinct().count(), 3), \"%)\")\n", "sample_size_quartiles = v2g.stat.approxQuantile(\"score\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of v2g_score: Mean: \", v2g.select(f.mean(v2g[\"score\"])).collect()[0][0], \"L.quart: \", sample_size_quartiles[0], \"Median: \", sample_size_quartiles[1], \"U.quart: \", sample_size_quartiles[2])\n", "#v2g.select().toPandas().plot.hist()\n", "#v2g.show()" ] @@ -262,22 +261,16 @@ "finngen_susie_path=f\"{release_path}/{release_ver}/credible_set/finngen_susie\"\n", "\n", "finngen_susie=session.spark.read.parquet(finngen_susie_path, recursiveFileLookup=True)\n", - "print(\"Number of unique finngen susie CSs: \", finngen_susie.select(\"studyId\", \"region\", \"credibleSetIndex\").distinct().count())\n", "\n", "# FinnGen:\n", "finngen_index=session.spark.read.parquet(finngen_index_path, recursiveFileLookup=True)\n", "# Number of CSs, studies.\n", - "print(\"Ingested \", finngen_susie.select(\"studyId\", \"region\", \"credibleSetIndex\").distinct().count(), \" Credible sets from\", finngen_index.select(f.col(\"studyId\")).distinct().count(), \"finngen studies\")\n", "sample_size_quartiles = finngen_index.stat.approxQuantile(\"nSamples\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of finngen sample sizes: Mean: \", finngen_index.select(f.mean(finngen_index[\"nSamples\"])).collect()[0][0], \"L.quart: \", sample_size_quartiles[0], \"Median: \", sample_size_quartiles[1], \"U.quart: \", sample_size_quartiles[2])\n", "# Number of unique studyids with at leas one CS.\n", - "print(\"Number of finngen studies with at least one CS: \", finngen_susie.select(\"studyId\").distinct().count())\n", "# Number of CSs with at leas one SNP with PIP>0.9\n", - "print(\"Number of CS with top SNP PP > 0.9: \", finngen_susie.select(\"studyId\", \"region\", \"credibleSetIndex\", \"locus.posteriorProbability\").withColumn(\"top_PP\", f.col(\"posteriorProbability\").getItem(0)).filter(f.col(\"top_PP\") > 0.9).count())\n", "# The descriptive summary of 99% CS size and histogram/density plot\n", "\n", "credset_size_quartiles = finngen_susie.select(\"studyId\", \"region\", \"credibleSetIndex\", \"locus.posteriorProbability\").withColumn(\"credset_size\", f.size(f.col(\"posteriorProbability\"))).stat.approxQuantile(\"credset_size\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of finngen credible set sizes: L.quart: \", credset_size_quartiles[0], \"Median: \", credset_size_quartiles[1], \"U.quart: \", credset_size_quartiles[2])\n", "\n", "finngen_susie.select(\"studyId\", \"region\", \"credibleSetIndex\", \"locus.posteriorProbability\").withColumn(\"top_PP\", f.col(\"posteriorProbability\").getItem(0)).withColumn(\"credset_size\", f.size(f.col(\"posteriorProbability\"))).toPandas().plot.scatter(x=\"credset_size\", y=\"top_PP\", xlim=[0, 500], alpha=0.05, label=\"finngen susie CSs\", title=\"finngen susie credsets\")\n", "\n", @@ -334,17 +327,13 @@ "# Number of CSs.\n", "finngen_pics=session.spark.read.parquet(finngen_pics_path, recursiveFileLookup=True)\n", "#gwascat_sumstats.printSchema()\n", - "print(\"Number of unique finngen pics CSs: \", finngen_pics.select(\"studyLocusId\").distinct().count(), \" in \", finngen_pics.select(\"studyId\").distinct().count(), \" studies.\")\n", "# keep only credible sets snps\n", "\n", "finngen_pics_fm=finngen_pics.select(\"studyId\", \"studyLocusId\", \"locus.posteriorProbability\").withColumn(\"top_PP\", f.col(\"posteriorProbability\").getItem(0)).withColumn(\"credset_size\", f.size(f.col(\"posteriorProbability\")))\n", "finngen_pics_fm.select(\"credset_size\", \"top_PP\").toPandas().plot.scatter(x=\"credset_size\", y=\"top_PP\", alpha=0.05, xlim=[0, 500], label=\"finngen PICS CS\", title=\"finngen_pics CS\")\n", - "print(\"Number of finngen_pics CS with top SNP PP > 0.9: \", finngen_pics_fm.filter(f.col(\"top_PP\") > 0.9).distinct().count())\n", "sample_size_quartiles = finngen_index.stat.approxQuantile(\"nSamples\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of finngen_pics sample sizes: L.quart: \", sample_size_quartiles[0], \"Median: \", sample_size_quartiles[1], \"U.quart: \", sample_size_quartiles[2])\n", "\n", - "sample_size_quartiles = finngen_pics_fm.stat.approxQuantile(\"credset_size\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of finngen_pics credset sizes: Mean: \", finngen_pics_fm.select(f.mean(finngen_pics_fm[\"credset_size\"])).collect()[0][0], \"L.quart: \", sample_size_quartiles[0], \"Median: \", sample_size_quartiles[1], \"U.quart: \", sample_size_quartiles[2])" + "sample_size_quartiles = finngen_pics_fm.stat.approxQuantile(\"credset_size\", [0.25, 0.5, 0.75], 0.01)" ] }, { @@ -360,9 +349,7 @@ ] } ], - "source": [ - "print(\"There are \", finngen_susie.join(finngen_pics, on=[\"studyId\", \"studyLocusId\"], how=\"inner\").count(), \" common loci between finngen susie and finngen pics\")" - ] + "source": [] }, { "cell_type": "code", @@ -389,8 +376,7 @@ ], "source": [ "finngen_matching=finngen_pics_fm.withColumnRenamed(\"top_PP\", \"pics_PP\").join(finngen_susie_fm.withColumnRenamed(\"top_PP\", \"susie_PP\"), on=[\"studyId\", \"studyLocusId\"], how=\"inner\")\n", - "finngen_matching.select(\"pics_PP\", \"susie_PP\").toPandas().plot.scatter(x=\"susie_PP\", y=\"pics_PP\", alpha=0.05, title=\"finngen_pics vs finngen_susie CS\")\n", - "print(\"correlation coef: \", finngen_matching.stat.corr(\"pics_PP\", \"susie_PP\"))" + "finngen_matching.select(\"pics_PP\", \"susie_PP\").toPandas().plot.scatter(x=\"susie_PP\", y=\"pics_PP\", alpha=0.05, title=\"finngen_pics vs finngen_susie CS\")" ] }, { @@ -509,9 +495,7 @@ "# eQTLcat:\n", "# Number of studies\n", "eqtl_index=session.spark.read.parquet(eqtl_index_path, recursiveFileLookup=True)\n", - "print(\"Number of unique eQTLcat studies: \", eqtl_index.select(f.col(\"studyId\")).distinct().count())\n", "# Number of tissues, list of tissues\n", - "print(\"Number of unqiue eQTLcat tissues: \", eqtl_index.select(f.col(\"tissueFromSourceId\")).distinct().count())\n", "#eqtl_index.select(f.col(\"tissueFromSourceId\")).distinct().show(truncate=False)\n", "\n", "# Credible_set. Please use Daniels’ notebook as a reference. For each subfolder:\n", @@ -522,17 +506,12 @@ "\n", "\n", "# Number of CSs, studies.\n", - "print(\"Ingested \", eqtlcat_susie.select(\"studyId\", \"region\", \"credibleSetIndex\").distinct().count(), \" Credible sets from\", eqtl_index.select(f.col(\"studyId\")).distinct().count(), \"eQTL catalog studies\")\n", "sample_size_quartiles = eqtl_index.stat.approxQuantile(\"nSamples\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of eQTL catalog sample sizes: Mean: \", eqtl_index.select(f.mean(eqtl_index[\"nSamples\"])).collect()[0][0], \"L.quart: \", sample_size_quartiles[0], \"Median: \", sample_size_quartiles[1], \"U.quart: \", sample_size_quartiles[2])\n", "# Number of unique studyids with at leas one CS.\n", - "print(\"Number of eQTL catalog studies with at least one CS: \", eqtlcat_susie.select(\"studyId\").distinct().count())\n", "# Number of CSs with at leas one SNP with PIP>0.9\n", - "print(\"Number of CS with top SNP PP > 0.9: \", eqtlcat_susie.select(\"studyId\", \"region\", \"credibleSetIndex\", \"locus.posteriorProbability\").withColumn(\"top_PP\", f.col(\"posteriorProbability\").getItem(0)).filter(f.col(\"top_PP\") > 0.9).count())\n", "# The descriptive summary of 99% CS size and histogram/density plot\n", "\n", "credset_size_quartiles = eqtlcat_susie.select(\"studyId\", \"region\", \"credibleSetIndex\", \"locus.posteriorProbability\").withColumn(\"credset_size\", f.size(f.col(\"posteriorProbability\"))).stat.approxQuantile(\"credset_size\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of eQTL credible set sizes: L.quart: \", credset_size_quartiles[0], \"Median: \", credset_size_quartiles[1], \"U.quart: \", credset_size_quartiles[2])\n", "\n", "# Out of mem error:\n", "#eqtlcat_susie.select(\"studyId\", \"region\", \"credibleSetIndex\", \"locus.posteriorProbability\").withColumn(\"top_PP\", f.col(\"posteriorProbability\").getItem(0)).withColumn(\"credset_size\", f.size(f.col(\"posteriorProbability\"))).toPandas().plot.scatter(x=\"credset_size\", y=\"top_PP\", xlim=[0, 500], alpha=0.05, label=\"finngen susie CSs\", title=\"finngen susie credsets\")\n" @@ -663,9 +642,7 @@ "# Gwas Catalog:\n", "gwascat_index=session.spark.read.parquet(gwascat_path, recursiveFileLookup=True)\n", "# Number of GWAS curated studies\n", - "print(\"Number of unique gwascat studies: \", gwascat_index.select(f.col(\"studyId\")).distinct().count())\n", "# Number of studies with full GWAS sumstats\n", - "print(\"Number of unique SUMSTATS gwascat studies: \", gwascat_index.filter(f.col(\"hasSumstats\") == True).select(f.col(\"studyId\")).distinct().count())\n", "#gwascat_index\n", "# The histogram/density plot for total sample size separately for curated studies and full GWAS\n", "#gwascat_index.filter(f.col(\"hasSumstats\") == True).select(f.col(\"nSamples\")).toPandas().plot.hist(bins=25, alpha=0.5, label=\"Sumstats GWAScat sample size\", title=\"Sumstats GWAScat sample size\")\n", @@ -677,20 +654,15 @@ "# Number of CSs.\n", "gwascat_sumstats=session.spark.read.parquet(gwascat_sumstats_path, recursiveFileLookup=True)\n", "\n", - "print(\"Number of unique gwas catalog sumstats CSs: \", gwascat_sumstats.select(\"studyLocusId\").distinct().count(), \" in \", gwascat_sumstats.select(\"studyId\").distinct().count(), \" studies.\")\n", "\n", "\n", "sample_size_quartiles = gwascat_index.join(gwascat_sumstats, how=\"inner\", on=\"studyId\").stat.approxQuantile(\"nSamples\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of SUMSTATS gwas sample sizes: L.quart: \", sample_size_quartiles[0], \"Median: \", sample_size_quartiles[1], \"U.quart: \", sample_size_quartiles[2])\n", - "#\n", "\n", "\n", "gwascat_sumstats_fm=gwascat_sumstats.select(\"studyId\", \"studyLocusId\", \"locus.posteriorProbability\").withColumn(\"top_PP\", f.col(\"posteriorProbability\").getItem(0)).withColumn(\"credset_size\", f.size(f.col(\"posteriorProbability\")))\n", "gwascat_sumstats_fm.select(\"credset_size\", \"top_PP\").toPandas().plot.scatter(x=\"credset_size\", y=\"top_PP\", alpha=0.05, label=\"gwascat sumstats PICS CS\", title=\"gwascat sumstats PICS CS\")\n", - "print(\"Number of SUMSTATS CS with top SNP PP > 0.9: \", gwascat_sumstats_fm.filter(f.col(\"top_PP\") > 0.9).distinct().count())\n", "\n", "sample_size_quartiles = gwascat_sumstats_fm.stat.approxQuantile(\"credset_size\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of SUMSTATS gwascat pics credset sizes: Mean: \", gwascat_sumstats_fm.select(f.mean(gwascat_sumstats_fm[\"credset_size\"])).collect()[0][0], \"L.quart: \", sample_size_quartiles[0], \"Median: \", sample_size_quartiles[1], \"U.quart: \", sample_size_quartiles[2])\n", "\n", "\n", "# gwas catalog curated (PICs):\n", @@ -698,17 +670,13 @@ "# Number of CSs.\n", "gwascat_curated=session.spark.read.parquet(gwascat_curated_path, recursiveFileLookup=True)\n", "#gwascat_sumstats.printSchema()\n", - "print(\"Number of unique gwas catalog curated CSs: \", gwascat_curated.select(\"studyLocusId\").distinct().count(), \" in \", gwascat_curated.select(\"studyId\").distinct().count(), \" studies.\")\n", "# keep only credible sets snps\n", "\n", "gwascat_curated_fm=gwascat_curated.select(\"studyId\", \"studyLocusId\", \"locus.posteriorProbability\").withColumn(\"top_PP\", f.col(\"posteriorProbability\").getItem(0)).withColumn(\"credset_size\", f.size(f.col(\"posteriorProbability\")))\n", "gwascat_curated_fm.select(\"credset_size\", \"top_PP\").toPandas().plot.scatter(x=\"credset_size\", y=\"top_PP\", alpha=0.05, label=\"gwascat curated PICS CS\", title=\"gwascat curated PICS CS\")\n", - "print(\"Number of CURATED CS with top SNP PP > 0.9: \", gwascat_curated_fm.filter(f.col(\"top_PP\") > 0.9).distinct().count())\n", "sample_size_quartiles = gwascat_index.join(gwascat_sumstats, how=\"anti\", on=\"studyId\").stat.approxQuantile(\"nSamples\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of CURATED gwas sample sizes: L.quart: \", sample_size_quartiles[0], \"Median: \", sample_size_quartiles[1], \"U.quart: \", sample_size_quartiles[2])\n", "\n", - "sample_size_quartiles = gwascat_curated_fm.stat.approxQuantile(\"credset_size\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of CURATED gwascat pics credset sizes: Mean: \", gwascat_curated_fm.select(f.mean(gwascat_curated_fm[\"credset_size\"])).collect()[0][0], \"L.quart: \", sample_size_quartiles[0], \"Median: \", sample_size_quartiles[1], \"U.quart: \", sample_size_quartiles[2])" + "sample_size_quartiles = gwascat_curated_fm.stat.approxQuantile(\"credset_size\", [0.25, 0.5, 0.75], 0.01)" ] }, { @@ -758,9 +726,7 @@ "coloc_path=f\"{release_path}/{release_ver}/colocalisation\"\n", "coloc=session.spark.read.parquet(coloc_path, recursiveFileLookup=True)\n", "\n", - "print(\"Number of colocalisations: \", coloc.count(), \" , of which, \", coloc.filter(f.col(\"clpp\") > 0.8).count(), \" > 0.8 clpp (\", round((coloc.filter(f.col(\"clpp\") > 0.8).count()/coloc.count()), 3)*100, \"%)\")\n", "Avg_overlaps=coloc.groupBy(\"leftStudyLocusId\").count().agg(f.avg(\"count\")).collect()[0][0]\n", - "print(\"Average number of overlaps per CS: \", Avg_overlaps)\n", "\n" ] }, @@ -850,10 +816,6 @@ "l2g_path=f\"{release_path}/{release_ver}/locus_to_gene_predictions\"\n", "l2g=session.spark.read.parquet(l2g_path, recursiveFileLookup=True)\n", "l2g.select(\"score\").toPandas().plot.hist(bins=10, alpha=0.5, title=\"l2g scores\")\n", - "print(\"A total of \", l2g.select(\"studyLocusId\", \"geneId\").count(), \"l2g predictions were computed.\")\n", - "print(\"There are\", l2g.select(\"studyLocusId\", \"geneId\").distinct().count(), \" UNIQUE locus to gene predictions for\", l2g.select(\"studyLocusId\").distinct().count(), \" unique studyloci\")\n", - "print(\"Where \", l2g.filter(f.col(\"score\") > 0.5).select(\"studyLocusId\").distinct().count(), \" studyloci contains at least one gene with score > 0.5\")\n", - "print(\"Of these, \", l2g.filter(f.col(\"score\") > 0.5).groupBy(\"studyLocusId\").count().filter(f.col(\"count\") > 1).count(), \" studyloci contains more than one gene with score > 0.5\")\n", "\n", "# There are duplicated l2g predictions studyLocusId with finngen pics and susie" ] @@ -911,9 +873,9 @@ "source": [ "from pyspark.sql import Window\n", "\n", - "window = Window.partitionBy(l2g['studyLocusId']).orderBy(l2g['score'].desc())\n", - "l2g = l2g.withColumn('rn', f.row_number().over(window))\n", - "l2g_max_scores = l2g.filter(l2g['rn'] == 1).drop('rn')\n", + "window = Window.partitionBy(l2g[\"studyLocusId\"]).orderBy(l2g[\"score\"].desc())\n", + "l2g = l2g.withColumn(\"rn\", f.row_number().over(window))\n", + "l2g_max_scores = l2g.filter(l2g[\"rn\"] == 1).drop(\"rn\")\n", "l2g_max_scores.select(\"score\").toPandas().plot.hist(bins=10, alpha=0.5, title=\"l2g scores (top gene assignment)\")" ] }, @@ -960,8 +922,7 @@ "l2g_finngen_pics=l2g_max_scores.join(finngen_pics.select(\"studyLocusId\", \"studyId\"), on=\"studyLocusId\", how=\"inner\")\n", "l2g_finngen_pics.select(\"score\").toPandas().plot.hist(bins=10, alpha=0.5, title=\"l2g scores (top gene assignment), finngen_pics\")\n", "\n", - "sample_size_quartiles = l2g_finngen_pics.stat.approxQuantile(\"score\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of finngen PICS l2g scores: mean:\", l2g_finngen_pics.select(f.mean(l2g_finngen_pics[\"score\"])).collect()[0][0], \"L.quart: \", sample_size_quartiles[0], \"Median: \", sample_size_quartiles[1], \"U.quart: \", sample_size_quartiles[2])" + "sample_size_quartiles = l2g_finngen_pics.stat.approxQuantile(\"score\", [0.25, 0.5, 0.75], 0.01)" ] }, { @@ -998,8 +959,7 @@ "l2g_finngen_susie=l2g_max_scores.join(finngen_susie.select(\"studyLocusId\", \"studyId\"), on=\"studyLocusId\", how=\"inner\")\n", "l2g_finngen_susie.select(\"score\").toPandas().plot.hist(bins=10, alpha=0.5, title=\"(top gene assignment), l2g_finngen_susie\")\n", "\n", - "sample_size_quartiles = l2g_finngen_susie.stat.approxQuantile(\"score\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of l2g_finngen_susie: mean:\", l2g_finngen_susie.select(f.mean(l2g_finngen_susie[\"score\"])).collect()[0][0], \"L.quart: \", sample_size_quartiles[0], \"Median: \", sample_size_quartiles[1], \"U.quart: \", sample_size_quartiles[2])" + "sample_size_quartiles = l2g_finngen_susie.stat.approxQuantile(\"score\", [0.25, 0.5, 0.75], 0.01)" ] }, { @@ -1036,8 +996,7 @@ "l2g_gwas_curated=l2g_max_scores.join(gwascat_curated.select(\"studyLocusId\", \"studyId\"), on=\"studyLocusId\", how=\"inner\")\n", "l2g_gwas_curated.select(\"score\").toPandas().plot.hist(bins=10, alpha=0.5, title=\"(top gene assignment), l2g_gwas_curated_pics\")\n", "\n", - "sample_size_quartiles = l2g_gwas_curated.stat.approxQuantile(\"score\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of l2g_gwas_curated PICS l2g scores: mean:\", l2g_gwas_curated.select(f.mean(l2g_gwas_curated[\"score\"])).collect()[0][0], \"L.quart: \", sample_size_quartiles[0], \"Median: \", sample_size_quartiles[1], \"U.quart: \", sample_size_quartiles[2])" + "sample_size_quartiles = l2g_gwas_curated.stat.approxQuantile(\"score\", [0.25, 0.5, 0.75], 0.01)" ] }, { @@ -1074,8 +1033,7 @@ "l2g_gwas_sumstats=l2g_max_scores.join(gwascat_sumstats.select(\"studyLocusId\", \"studyId\"), on=\"studyLocusId\", how=\"inner\")\n", "l2g_gwas_sumstats.select(\"score\").toPandas().plot.hist(bins=10, alpha=0.5, title=\"(top gene assignment), l2g_gwas_sumstats\")\n", "\n", - "sample_size_quartiles = l2g_gwas_sumstats.stat.approxQuantile(\"score\", [0.25, 0.5, 0.75], 0.01)\n", - "print(\"Summary of l2g_gwas_sumstats PICS l2g scores: mean:\", l2g_gwas_sumstats.select(f.mean(l2g_gwas_sumstats[\"score\"])).collect()[0][0], \"L.quart: \", sample_size_quartiles[0], \"Median: \", sample_size_quartiles[1], \"U.quart: \", sample_size_quartiles[2])" + "sample_size_quartiles = l2g_gwas_sumstats.stat.approxQuantile(\"score\", [0.25, 0.5, 0.75], 0.01)" ] } ], diff --git a/notebooks/gwas_cat_benchmark.ipynb b/notebooks/gwas_cat_benchmark.ipynb index cce01a050..a2ecf5455 100644 --- a/notebooks/gwas_cat_benchmark.ipynb +++ b/notebooks/gwas_cat_benchmark.ipynb @@ -37,6 +37,7 @@ "source": [ "# import matplotlib.pyplot as plt\n", "import pyspark.sql.functions as f\n", + "\n", "from gentropy.common.session import Session\n", "from gentropy.common.spark_helpers import order_array_of_structs_by_field\n", "from gentropy.dataset.ld_index import LDIndex\n", @@ -319,24 +320,24 @@ "source": [ "panda_df = df.select(\"locusSize\", \"locusLength\").toPandas()\n", "\n", - "plt.figure(figsize=(12, 6))\n", + "# plt.figure(figsize=(12, 6))\n", "\n", - "# Histogram for locusLength\n", - "plt.subplot(1, 2, 1) # 1 row, 2 columns, 1st subplot\n", - "plt.hist(panda_df[\"locusLength\"], bins=30, alpha=0.7)\n", - "plt.xlabel(\"Locus Length\")\n", - "plt.ylabel(\"Frequency\")\n", - "plt.title(\"Histogram of Locus Length\")\n", + "# # Histogram for locusLength\n", + "# plt.subplot(1, 2, 1) # 1 row, 2 columns, 1st subplot\n", + "# plt.hist(panda_df[\"locusLength\"], bins=30, alpha=0.7)\n", + "# plt.xlabel(\"Locus Length\")\n", + "# plt.ylabel(\"Frequency\")\n", + "# plt.title(\"Histogram of Locus Length\")\n", "\n", - "# Histogram for locusSize\n", - "plt.subplot(1, 2, 2) # 1 row, 2 columns, 2nd subplot\n", - "plt.hist(panda_df[\"locusSize\"], bins=30, alpha=0.7)\n", - "plt.xlabel(\"Locus Size\")\n", - "plt.ylabel(\"Frequency\")\n", - "plt.title(\"Histogram of Locus Size\")\n", + "# # Histogram for locusSize\n", + "# plt.subplot(1, 2, 2) # 1 row, 2 columns, 2nd subplot\n", + "# plt.hist(panda_df[\"locusSize\"], bins=30, alpha=0.7)\n", + "# plt.xlabel(\"Locus Size\")\n", + "# plt.ylabel(\"Frequency\")\n", + "# plt.title(\"Histogram of Locus Size\")\n", "\n", - "plt.tight_layout()\n", - "plt.show()" + "# plt.tight_layout()\n", + "# plt.show()" ] }, { @@ -356,13 +357,13 @@ } ], "source": [ - "plt.figure(figsize=(10, 6))\n", - "plt.scatter(panda_df[\"locusSize\"], panda_df[\"locusLength\"], alpha=0.5)\n", - "plt.title(\"Scatter Plot of Locus Size vs Locus Length\")\n", - "plt.xlabel(\"Locus Size\")\n", - "plt.ylabel(\"Locus Length\")\n", - "plt.grid(True)\n", - "plt.show()" + "# plt.figure(figsize=(10, 6))\n", + "# plt.scatter(panda_df[\"locusSize\"], panda_df[\"locusLength\"], alpha=0.5)\n", + "# plt.title(\"Scatter Plot of Locus Size vs Locus Length\")\n", + "# plt.xlabel(\"Locus Size\")\n", + "# plt.ylabel(\"Locus Length\")\n", + "# plt.grid(True)\n", + "# plt.show()" ] }, { @@ -458,9 +459,7 @@ ], "source": [ "nan = susie_fm.df.filter(f.isnan(\"credibleSetlog10BF\"))\n", - "null = susie_fm.df.filter(f.isnull(\"credibleSetlog10BF\"))\n", - "print(\"Number of credible sets with 'not a number' as the logBF: \", nan.count())\n", - "print(\"Number of credible sets with 'null' as the logBF: \", null.count())" + "null = susie_fm.df.filter(f.isnull(\"credibleSetlog10BF\"))" ] }, { @@ -639,39 +638,39 @@ ], "source": [ "pdf = susie_results.select(\"purityMinR2\", \"purityMeanR2\", \"topPP\", \"credSetSize\").toPandas()\n", - "plt.figure(figsize=(12, 12))\n", + "# plt.figure(figsize=(12, 12))\n", "\n", - "# Histogram for purityMinR2\n", - "plt.subplot(2, 2, 1)\n", - "plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of purityMinR2\")\n", - "plt.xlabel(\"purityMinR2\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for purityMinR2\n", + "# plt.subplot(2, 2, 1)\n", + "# plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of purityMinR2\")\n", + "# plt.xlabel(\"purityMinR2\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for purityMeanR2\n", - "plt.subplot(2, 2, 2)\n", - "plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of purityMeanR2\")\n", - "plt.xlabel(\"purityMeanR2\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for purityMeanR2\n", + "# plt.subplot(2, 2, 2)\n", + "# plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of purityMeanR2\")\n", + "# plt.xlabel(\"purityMeanR2\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for topPP\n", - "plt.subplot(2, 2, 3)\n", - "plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of topPP\")\n", - "plt.xlabel(\"topPP\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for topPP\n", + "# plt.subplot(2, 2, 3)\n", + "# plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of topPP\")\n", + "# plt.xlabel(\"topPP\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for credSetSize\n", - "plt.subplot(2, 2, 4)\n", - "plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of credSetSize\")\n", - "plt.xlabel(\"credSetSize\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for credSetSize\n", + "# plt.subplot(2, 2, 4)\n", + "# plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of credSetSize\")\n", + "# plt.xlabel(\"credSetSize\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Adjust layout to prevent overlap\n", - "plt.tight_layout()\n", - "plt.show()" + "# # Adjust layout to prevent overlap\n", + "# plt.tight_layout()\n", + "# plt.show()" ] }, { @@ -730,9 +729,7 @@ } ], "source": [ - "first_credset = susie_results.filter(f.col(\"credibleSetIndex\") == 1)\n", - "print(\"Number of primary credible sets: \", first_credset.count())\n", - "print(\"Number of unique studyIds in primary credible sets: \", first_credset.select(\"studyId\").distinct().count())" + "first_credset = susie_results.filter(f.col(\"credibleSetIndex\") == 1)" ] }, { @@ -900,39 +897,39 @@ ], "source": [ "pdf = first_credset.select(\"purityMinR2\", \"purityMeanR2\", \"topPP\", \"credSetSize\").toPandas()\n", - "plt.figure(figsize=(12, 12))\n", + "# plt.figure(figsize=(12, 12))\n", "\n", - "# Histogram for purityMinR2\n", - "plt.subplot(2, 2, 1)\n", - "plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of purityMinR2\")\n", - "plt.xlabel(\"purityMinR2\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for purityMinR2\n", + "# plt.subplot(2, 2, 1)\n", + "# plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of purityMinR2\")\n", + "# plt.xlabel(\"purityMinR2\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for purityMeanR2\n", - "plt.subplot(2, 2, 2)\n", - "plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of purityMeanR2\")\n", - "plt.xlabel(\"purityMeanR2\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for purityMeanR2\n", + "# plt.subplot(2, 2, 2)\n", + "# plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of purityMeanR2\")\n", + "# plt.xlabel(\"purityMeanR2\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for topPP\n", - "plt.subplot(2, 2, 3)\n", - "plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of topPP\")\n", - "plt.xlabel(\"topPP\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for topPP\n", + "# plt.subplot(2, 2, 3)\n", + "# plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of topPP\")\n", + "# plt.xlabel(\"topPP\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for credSetSize\n", - "plt.subplot(2, 2, 4)\n", - "plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of credSetSize\")\n", - "plt.xlabel(\"credSetSize\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for credSetSize\n", + "# plt.subplot(2, 2, 4)\n", + "# plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of credSetSize\")\n", + "# plt.xlabel(\"credSetSize\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Adjust layout to prevent overlap\n", - "plt.tight_layout()\n", - "plt.show()" + "# # Adjust layout to prevent overlap\n", + "# plt.tight_layout()\n", + "# plt.show()" ] }, { @@ -985,13 +982,7 @@ " )\n", " .withColumn(\"topPP\", f.col(\"locus\").getField(\"posteriorProbability\"))\n", " .filter(~f.isnan(\"topPP\"))\n", - ")\n", - "\n", - "print(\"Number of high quality credible sets: \", qc_credsets.count())\n", - "print(\n", - " \"Number of unique studyIds in high quality credible sets: \",\n", - " qc_credsets.select(\"studyId\").distinct().count(),\n", - ")" + ")\n" ] }, { @@ -1139,39 +1130,39 @@ ], "source": [ "pdf = qc_credsets.select(\"purityMinR2\", \"purityMeanR2\", \"topPP\", \"credSetSize\").toPandas()\n", - "plt.figure(figsize=(12, 12))\n", + "# plt.figure(figsize=(12, 12))\n", "\n", - "# Histogram for purityMinR2\n", - "plt.subplot(2, 2, 1)\n", - "plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of purityMinR2\")\n", - "plt.xlabel(\"purityMinR2\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for purityMinR2\n", + "# plt.subplot(2, 2, 1)\n", + "# plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of purityMinR2\")\n", + "# plt.xlabel(\"purityMinR2\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for purityMeanR2\n", - "plt.subplot(2, 2, 2)\n", - "plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of purityMeanR2\")\n", - "plt.xlabel(\"purityMeanR2\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for purityMeanR2\n", + "# plt.subplot(2, 2, 2)\n", + "# plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of purityMeanR2\")\n", + "# plt.xlabel(\"purityMeanR2\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for topPP\n", - "plt.subplot(2, 2, 3)\n", - "plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of topPP\")\n", - "plt.xlabel(\"topPP\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for topPP\n", + "# plt.subplot(2, 2, 3)\n", + "# plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of topPP\")\n", + "# plt.xlabel(\"topPP\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for credSetSize\n", - "plt.subplot(2, 2, 4)\n", - "plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of credSetSize\")\n", - "plt.xlabel(\"credSetSize\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for credSetSize\n", + "# plt.subplot(2, 2, 4)\n", + "# plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of credSetSize\")\n", + "# plt.xlabel(\"credSetSize\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Adjust layout to prevent overlap\n", - "plt.tight_layout()\n", - "plt.show()" + "# # Adjust layout to prevent overlap\n", + "# plt.tight_layout()\n", + "# plt.show()" ] }, { @@ -1436,24 +1427,24 @@ "source": [ "panda_df = df.select(\"locusSize\", \"locusLength\").toPandas()\n", "\n", - "plt.figure(figsize=(12, 6))\n", + "# plt.figure(figsize=(12, 6))\n", "\n", - "# Histogram for locusLength\n", - "plt.subplot(1, 2, 1) # 1 row, 2 columns, 1st subplot\n", - "plt.hist(panda_df[\"locusLength\"], bins=30, alpha=0.7)\n", - "plt.xlabel(\"Locus Length\")\n", - "plt.ylabel(\"Frequency\")\n", - "plt.title(\"Histogram of Locus Length\")\n", + "# # Histogram for locusLength\n", + "# plt.subplot(1, 2, 1) # 1 row, 2 columns, 1st subplot\n", + "# plt.hist(panda_df[\"locusLength\"], bins=30, alpha=0.7)\n", + "# plt.xlabel(\"Locus Length\")\n", + "# plt.ylabel(\"Frequency\")\n", + "# plt.title(\"Histogram of Locus Length\")\n", "\n", - "# Histogram for locusSize\n", - "plt.subplot(1, 2, 2) # 1 row, 2 columns, 2nd subplot\n", - "plt.hist(panda_df[\"locusSize\"], bins=30, alpha=0.7)\n", - "plt.xlabel(\"Locus Size\")\n", - "plt.ylabel(\"Frequency\")\n", - "plt.title(\"Histogram of Locus Size\")\n", + "# # Histogram for locusSize\n", + "# plt.subplot(1, 2, 2) # 1 row, 2 columns, 2nd subplot\n", + "# plt.hist(panda_df[\"locusSize\"], bins=30, alpha=0.7)\n", + "# plt.xlabel(\"Locus Size\")\n", + "# plt.ylabel(\"Frequency\")\n", + "# plt.title(\"Histogram of Locus Size\")\n", "\n", - "plt.tight_layout()\n", - "plt.show()" + "# plt.tight_layout()\n", + "# plt.show()" ] }, { @@ -1473,13 +1464,13 @@ } ], "source": [ - "plt.figure(figsize=(10, 6))\n", - "plt.scatter(panda_df[\"locusSize\"], panda_df[\"locusLength\"], alpha=0.5)\n", - "plt.title(\"Scatter Plot of Locus Size vs Locus Length\")\n", - "plt.xlabel(\"Locus Size\")\n", - "plt.ylabel(\"Locus Length\")\n", - "plt.grid(True)\n", - "plt.show()" + "# plt.figure(figsize=(10, 6))\n", + "# plt.scatter(panda_df[\"locusSize\"], panda_df[\"locusLength\"], alpha=0.5)\n", + "# plt.title(\"Scatter Plot of Locus Size vs Locus Length\")\n", + "# plt.xlabel(\"Locus Size\")\n", + "# plt.ylabel(\"Locus Length\")\n", + "# plt.grid(True)\n", + "# plt.show()" ] } ], diff --git a/notebooks/l2g_benchmark.ipynb b/notebooks/l2g_benchmark.ipynb deleted file mode 100644 index 857d25088..000000000 --- a/notebooks/l2g_benchmark.ipynb +++ /dev/null @@ -1,603 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Benchmarking L2G predictions\n", - "\n", - "The objective of this notebook is to compare the new implementation of L2G with the last results we display in production (22.09.1).\n", - "\n", - "The notebook is divided in 3 parts:\n", - "1. Data preparation\n", - "2. Describe the data\n", - "3. Compare the results\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Setting default log level to \"WARN\".\n", - "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "23/12/11 16:18:49 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" - ] - } - ], - "source": [ - "from datetime import datetime\n", - "from gentropy.dataset.study_locus import StudyLocus\n", - "from gentropy.dataset.l2g_prediction import L2GPrediction\n", - "from gentropy.common.session import Session\n", - "\n", - "import pyspark.sql.functions as f\n", - "from pyspark.sql import DataFrame\n", - "\n", - "import wandb\n", - "\n", - "%matplotlib inline\n", - "\n", - "session = Session(spark_uri=\"local[*]\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data preparation" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "def prepare_predictions(credible_set: StudyLocus, predictions: L2GPrediction) -> DataFrame:\n", - " \"\"\"Prepares predictions dataframe for testing and comparison.\"\"\"\n", - " return (\n", - " credible_set.df\n", - " .select(\"studyLocusId\", \"variantId\", \"studyId\").distinct()\n", - " .join(\n", - " predictions.df, on=\"studyLocusId\"\n", - " )\n", - " .select(\"studyLocusId\", \"variantId\", \"studyId\", \"geneId\", \"score\")\n", - " .distinct()\n", - " )\n", - "\n", - "def prepare_production_predictions(old_predictions: DataFrame) -> DataFrame:\n", - " \"\"\"Prepares L2G predictions for testing and comparison.\"\"\"\n", - " return (\n", - " old_predictions\n", - " .select(\n", - " f.col(\"study_id\").alias(\"studyId\"),\n", - " f.concat_ws(\"_\", f.col(\"chrom\"), f.col(\"pos\"), f.col(\"ref\"), f.col(\"alt\")).alias(\"variantId\"),\n", - " f.col(\"gene_id\").alias(\"geneId\"),\n", - " f.col(\"y_proba_full_model\").alias(\"score\"),\n", - " )\n", - " .withColumn(\"studyLocusId\", StudyLocus.assign_study_locus_id(f.col(\"studyId\"), f.col(\"variantId\")))\n", - " .distinct()\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "# RAW DATA\n", - "credible_set_path = \"gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/credible_set\"\n", - "predictions_path = \"gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/l2g_predictions\"\n", - "old_predictions_path = \"gs://genetics-portal-dev-data/22.09.1/outputs/l2g\"\n", - "\n", - "predictions = L2GPrediction.from_parquet(session, predictions_path)\n", - "old_predictions = session.spark.read.parquet(old_predictions_path)\n", - "credible_set = StudyLocus.from_parquet(session, credible_set_path, recursiveFileLookup=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "predictions_prepared = prepare_predictions(credible_set, predictions).persist()\n", - "old_predictions_prepared = prepare_production_predictions(old_predictions).persist()\n", - "\n", - "joining_cols = [\"studyLocusId\", \"geneId\", \"variantId\", \"studyId\"]\n", - "comparison_df = (\n", - " predictions_prepared.selectExpr(*joining_cols, \"score as new_score\")\n", - " .join(\n", - " old_predictions_prepared.selectExpr(*joining_cols, \"score as old_score\"), on=joining_cols, how=\"inner\"\n", - " )\n", - " .distinct()\n", - " .persist()\n", - " )\n", - "\n", - "comparison_output_path = f\"gs://ot-team/irene/l2g_results_comparison-{datetime.today().strftime('%Y-%m-%d')}\"\n", - "# comparison_df.write.parquet(\"gs://ot-team/irene/l2g_results_comparison\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data description\n", - "\n", - "Note: Comparison_df is the result of intersecting the L2G datasets with both the production and the new implementation." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "COUNT PER DATASET\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "predictions_prepared: 9061746\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "old_predictions_prepared: 4083797\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 23:====================================================> (196 + 4) / 200]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "comparison_df: 2194371\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "## 1. L2G predicted credible sets count\n", - "\n", - "datasets = {\n", - " \"predictions_prepared\": predictions_prepared,\n", - " \"old_predictions_prepared\": old_predictions_prepared,\n", - " \"comparison_df\": comparison_df\n", - "}\n", - "\n", - "print(\"COUNT PER DATASET\")\n", - "for dataset in datasets.items():\n", - " print(f\"{dataset[0]}: {dataset[1].count()}\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "AVERAGE SCORES PER DATASET\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "predictions_prepared: 0.20072320074989783\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "old_predictions_prepared: 0.0720378814046517\n", - "MEDIAN SCORE PER DATASET\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "predictions_prepared: 0.023215007036924362\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 74:==============================================> (171 + 8) / 200]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "old_predictions_prepared: 0.012324056588113308\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "## 2. Descriptive stats for L2G scores\n", - "\n", - "print(\"AVERAGE SCORES PER DATASET\")\n", - "for dataset in datasets.items():\n", - " if dataset[0] != \"comparison_df\":\n", - " print(f\"{dataset[0]}: {dataset[1].agg(f.avg('score')).collect()[0][0]}\")\n", - "\n", - "print(\"MEDIAN SCORE PER DATASET\")\n", - "for dataset in datasets.items():\n", - " if dataset[0] != \"comparison_df\":\n", - " print(f\"{dataset[0]}: {dataset[1].approxQuantile('score',[0.5],0.1)[0]}\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "# OF ASSOCIATIONS WITH SCORE > 0.9\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "predictions_prepared: 5275 (0.06 from total)\n", - "old_predictions_prepared: 3942 (0.1 from total)\n" - ] - } - ], - "source": [ - "## How many associations with a score > 0.9\n", - "\n", - "print(\"# OF ASSOCIATIONS WITH SCORE > 0.9\")\n", - "for dataset in datasets.items():\n", - " if dataset[0] != \"comparison_df\":\n", - " print(f\"{dataset[0]}: {dataset[1].filter(f.col('score') >= 0.9).count()} ({round(dataset[1].filter(f.col('score') >= 0.9).count()/dataset[1].count() * 100, 2)} from total)\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Results comparison\n", - "\n", - "The comparisons dataset represents the intersection of the predictions of the production and the new implementation. The objective is to compare the L2G scores of both implementations for the same studyLoci." - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
studyLocusIdgeneIdvariantIdstudyIdnew_scoreold_score
0-9221872607204368224ENSG0000006571719_3414090_G_AGCST0102410.0018870.008250
1-9219397145747036852ENSG0000016805611_65619907_T_AGCST0072340.0381770.014908
2-9219397145747036852ENSG0000017537611_65619907_T_AGCST0072340.0158460.011380
3-9217975156633736203ENSG000001874756_26325235_T_CGCST0094560.0396680.010985
4-9216978755013122322ENSG0000015761721_41816125_G_AGCST0059450.2710090.024520
\n", - "
" - ], - "text/plain": [ - " studyLocusId geneId variantId studyId \\\n", - "0 -9221872607204368224 ENSG00000065717 19_3414090_G_A GCST010241 \n", - "1 -9219397145747036852 ENSG00000168056 11_65619907_T_A GCST007234 \n", - "2 -9219397145747036852 ENSG00000175376 11_65619907_T_A GCST007234 \n", - "3 -9217975156633736203 ENSG00000187475 6_26325235_T_C GCST009456 \n", - "4 -9216978755013122322 ENSG00000157617 21_41816125_G_A GCST005945 \n", - "\n", - " new_score old_score \n", - "0 0.001887 0.008250 \n", - "1 0.038177 0.014908 \n", - "2 0.015846 0.011380 \n", - "3 0.039668 0.010985 \n", - "4 0.271009 0.024520 " - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "comparison_pdf = comparison_df.toPandas()\n", - "comparison_pdf.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SCORES CORRELATION 0.3968047070256464\n", - "Split by chromosome...\n", - "Chromosome 1: 0.4283002148537705\n", - "Chromosome 10: 0.4548083324805242\n", - "Chromosome 11: 0.3952137911268187\n", - "Chromosome 12: 0.4185634370780096\n", - "Chromosome 13: 0.42117917239984404\n", - "Chromosome 14: 0.39845735792304493\n", - "Chromosome 15: 0.40346861517903765\n", - "Chromosome 16: 0.39846240000598415\n", - "Chromosome 17: 0.395304979654295\n", - "Chromosome 18: 0.41437506238370103\n", - "Chromosome 19: 0.4425118135710343\n", - "Chromosome 2: 0.3889598461816933\n", - "Chromosome 20: 0.43686865328167446\n", - "Chromosome 21: 0.3849138600053731\n", - "Chromosome 22: 0.43046292359639526\n", - "Chromosome 3: 0.4111657046267522\n", - "Chromosome 4: 0.4291713768129795\n", - "Chromosome 5: 0.39581618467547613\n", - "Chromosome 6: 0.37446130946623823\n", - "Chromosome 7: 0.4059565003311735\n", - "Chromosome 8: 0.44203370064434944\n", - "Chromosome 9: 0.40815294157407295\n", - "Chromosome X: 0.12625008528673395\n" - ] - } - ], - "source": [ - "## Correlation between old and new scores\n", - "\n", - "overall_corr = comparison_pdf[\"old_score\"].corr(comparison_pdf[\"new_score\"])\n", - "print(\"SCORES CORRELATION\", overall_corr)\n", - "\n", - "print(\"Split by chromosome...\")\n", - "\n", - "comparison_pdf[\"chromosome\"] = comparison_pdf[\"variantId\"].str.split(\"_\", expand=True)[0]\n", - "\n", - "for group in comparison_pdf.sort_values(\"chromosome\").groupby(\"chromosome\"):\n", - " corr = group[1][\"old_score\"].corr(group[1][\"new_score\"])\n", - " print(f\"Chromosome {group[0]}: {corr}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "## Add correlation coefficient to finished run in W&B\n", - "# See dashboard here to find run ID: https://wandb.ai/open-targets/otg_l2g/table?workspace=user-opentargets\n", - "my_run = \"4cyi1qvz\"\n", - "\n", - "wandb.init(id=my_run, project=\"otg_l2g\", resume=True)\n", - "wandb.log({\"correlationProduction\": overall_corr})\n", - "wandb.finish()" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "comparison_pdf.plot.scatter(\n", - " x=\"new_score\",\n", - " y=\"old_score\",\n", - " title=\"Distribution of L2G scores\",\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/pics_benchmark.ipynb b/notebooks/pics_benchmark.ipynb index d8a2f6e83..0dd05b18b 100644 --- a/notebooks/pics_benchmark.ipynb +++ b/notebooks/pics_benchmark.ipynb @@ -1,904 +1,878 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Benchmarking new PICS implementation\n", - "\n", - "The objective of this notebook is to compare the new implementation of PICS estimated on GWAS Catalog associations using gnomAD LD reference, against the previous implementation using 1000 genomes phase III LD reference. \n", - "\n", - "1. Describe the new dataset\n", - " - Number of signals covered.\n", - " - Number of signals dropped.\n", - "2. Copare with old PICS Dataset.\n", - " - Δ number of covered study (not particularly relevant given updates in GWAS Catalog)\n", - " - Δ number of covered peaks from studies found in the old release - might see increased coverage.\n", - " - Δ in the recovered credible set: number of variants, change in posterior probability.\n", - " - Δ in the average number of credible sets.\n", - "\n", - " " - ] - }, + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Benchmarking new PICS implementation\n", + "\n", + "The objective of this notebook is to compare the new implementation of PICS estimated on GWAS Catalog associations using gnomAD LD reference, against the previous implementation using 1000 genomes phase III LD reference. \n", + "\n", + "1. Describe the new dataset\n", + " - Number of signals covered.\n", + " - Number of signals dropped.\n", + "2. Copare with old PICS Dataset.\n", + " - Δ number of covered study (not particularly relevant given updates in GWAS Catalog)\n", + " - Δ number of covered peaks from studies found in the old release - might see increased coverage.\n", + " - Δ in the recovered credible set: number of variants, change in posterior probability.\n", + " - Δ in the average number of credible sets.\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import pyspark.sql.functions as f\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.sql.window import Window\n", + "\n", + "spark = SparkSession.builder.getOrCreate()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1 Describing the new dataset\n", + "\n", + "1. Study count.\n", + "2. Association count.\n", + "3. Studies split.\n", + "4. Associations not resolved in LD set." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "import pyspark.sql.functions as f\n", - "import pyspark.sql.types as t\n", - "from pyspark.sql import SparkSession, DataFrame\n", - "from pyspark.sql.window import Window\n", - "\n", - "spark = SparkSession.builder.getOrCreate()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "root\n", + " |-- chromosome: string (nullable = true)\n", + " |-- variantId: string (nullable = true)\n", + " |-- studyId: string (nullable = true)\n", + " |-- position: string (nullable = true)\n", + " |-- referenceAllele: string (nullable = true)\n", + " |-- alternateAllele: string (nullable = true)\n", + " |-- pValueMantissa: float (nullable = true)\n", + " |-- pValueExponent: integer (nullable = true)\n", + " |-- beta: string (nullable = true)\n", + " |-- beta_ci_lower: double (nullable = true)\n", + " |-- beta_ci_upper: double (nullable = true)\n", + " |-- odds_ratio: string (nullable = true)\n", + " |-- odds_ratio_ci_lower: double (nullable = true)\n", + " |-- odds_ratio_ci_upper: double (nullable = true)\n", + " |-- qualityControl: array (nullable = true)\n", + " | |-- element: string (containsNull = true)\n", + " |-- sampleSize: double (nullable = true)\n", + " |-- tagVariantId: string (nullable = true)\n", + " |-- R_overall: double (nullable = true)\n", + " |-- pics_mu: double (nullable = true)\n", + " |-- pics_std: double (nullable = true)\n", + " |-- pics_postprob: double (nullable = true)\n", + " |-- pics_95_perc_credset: boolean (nullable = true)\n", + " |-- pics_99_perc_credset: boolean (nullable = true)\n", + " |-- hasResolvedCredibleSet: boolean (nullable = false)\n", + "\n" + ] }, { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1 Describing the new dataset\n", - "\n", - "1. Study count.\n", - "2. Association count.\n", - "3. Studies split.\n", - "4. Associations not resolved in LD set." - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 82:> (0 + 1) / 1]\r" + ] }, { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "root\n", - " |-- chromosome: string (nullable = true)\n", - " |-- variantId: string (nullable = true)\n", - " |-- studyId: string (nullable = true)\n", - " |-- position: string (nullable = true)\n", - " |-- referenceAllele: string (nullable = true)\n", - " |-- alternateAllele: string (nullable = true)\n", - " |-- pValueMantissa: float (nullable = true)\n", - " |-- pValueExponent: integer (nullable = true)\n", - " |-- beta: string (nullable = true)\n", - " |-- beta_ci_lower: double (nullable = true)\n", - " |-- beta_ci_upper: double (nullable = true)\n", - " |-- odds_ratio: string (nullable = true)\n", - " |-- odds_ratio_ci_lower: double (nullable = true)\n", - " |-- odds_ratio_ci_upper: double (nullable = true)\n", - " |-- qualityControl: array (nullable = true)\n", - " | |-- element: string (containsNull = true)\n", - " |-- sampleSize: double (nullable = true)\n", - " |-- tagVariantId: string (nullable = true)\n", - " |-- R_overall: double (nullable = true)\n", - " |-- pics_mu: double (nullable = true)\n", - " |-- pics_std: double (nullable = true)\n", - " |-- pics_postprob: double (nullable = true)\n", - " |-- pics_95_perc_credset: boolean (nullable = true)\n", - " |-- pics_99_perc_credset: boolean (nullable = true)\n", - " |-- hasResolvedCredibleSet: boolean (nullable = false)\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 82:> (0 + 1) / 1]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-RECORD 0------------------------------------------\n", - " chromosome | 6 \n", - " variantId | 6_13215826_A_G \n", - " studyId | GCST000101_1 \n", - " position | 13215826 \n", - " referenceAllele | A \n", - " alternateAllele | G \n", - " pValueMantissa | 3.0 \n", - " pValueExponent | -6 \n", - " beta | null \n", - " beta_ci_lower | null \n", - " beta_ci_upper | null \n", - " odds_ratio | null \n", - " odds_ratio_ci_lower | null \n", - " odds_ratio_ci_upper | null \n", - " qualityControl | [Subsignificant p-value] \n", - " sampleSize | 1094.0 \n", - " tagVariantId | 6_13215826_A_G \n", - " R_overall | 1.0 \n", - " pics_mu | 5.522878745280337 \n", - " pics_std | 0.0 \n", - " pics_postprob | 0.12718888994626093 \n", - " pics_95_perc_credset | true \n", - " pics_99_perc_credset | true \n", - " hasResolvedCredibleSet | true \n", - "only showing top 1 row\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "new_study_locus = (\n", - " spark.read.parquet(\"gs://genetics_etl_python_playground/XX.XX/output/python_etl/parquet/pics_credible_set/\")\n", - " .withColumn('pics_99_perc_credset', f.when(f.col('tagVariantId').isNull(), False).otherwise(f.col('pics_99_perc_credset')))\n", - " .withColumn(\n", - " 'hasResolvedCredibleSet', \n", - " f.when(\n", - " f.array_contains(\n", - " f.collect_set(f.col('pics_99_perc_credset')).over(Window.partitionBy('studyId', 'variantId')), \n", - " True\n", - " ),\n", - " True\n", - " ).otherwise(False)\n", - " )\n", - " .persist()\n", - ")\n", - "\n", - "\n", - "new_study_locus.printSchema()\n", - "new_study_locus.show(1, False, True)" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "-RECORD 0------------------------------------------\n", + " chromosome | 6 \n", + " variantId | 6_13215826_A_G \n", + " studyId | GCST000101_1 \n", + " position | 13215826 \n", + " referenceAllele | A \n", + " alternateAllele | G \n", + " pValueMantissa | 3.0 \n", + " pValueExponent | -6 \n", + " beta | null \n", + " beta_ci_lower | null \n", + " beta_ci_upper | null \n", + " odds_ratio | null \n", + " odds_ratio_ci_lower | null \n", + " odds_ratio_ci_upper | null \n", + " qualityControl | [Subsignificant p-value] \n", + " sampleSize | 1094.0 \n", + " tagVariantId | 6_13215826_A_G \n", + " R_overall | 1.0 \n", + " pics_mu | 5.522878745280337 \n", + " pics_std | 0.0 \n", + " pics_postprob | 0.12718888994626093 \n", + " pics_95_perc_credset | true \n", + " pics_99_perc_credset | true \n", + " hasResolvedCredibleSet | true \n", + "only showing top 1 row\n", + "\n" + ] }, { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "22/12/19 11:45:49 WARN org.apache.spark.sql.execution.CacheManager: Asked to cache already cached data.\n", - "[Stage 224:============================================> (173 + 16) / 200]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Study count: 35956\n", - "Association (unique study/variant pairs) count: 433108\n", - "Associations with resolved credible set: 381056 (88.0%)\n", - "Number of good (non-flagged) associations without resolved credible set: 39763 (9.2%)\n", - "Number of good (non-flagged) associations with resolved credible set: 260736 (60.2%)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "study_count = new_study_locus.select('studyId').distinct().count()\n", - "association_count = new_study_locus.select('studyId', 'variantId').distinct().count()\n", - "association_w_credible_set = new_study_locus.filter(f.col('hasResolvedCredibleSet')).persist()\n", - "credible_set_count = association_w_credible_set.select('studyId', 'variantId').distinct().count()\n", - "failed_w_ld = (\n", - " new_study_locus\n", - " # Selecting good associations without credible sets:\n", - " .filter(\n", - " (~f.col('hasResolvedCredibleSet')) & \n", - " (f.size(f.col('qualityControl'))>0)\n", - " )\n", - " # Get associations:\n", - " .select('studyId', 'variantId')\n", - " .distinct()\n", - " .count()\n", - ")\n", - "good_association_count = (\n", - " association_w_credible_set\n", - " # Drop failed associations:\n", - " .filter(f.size(f.col('qualityControl')) == 0)\n", - " .select('studyId', 'variantId')\n", - " .distinct()\n", - " .count()\n", - ")\n", - "\n", - "print(f'Study count: {study_count}')\n", - "print(f'Association (unique study/variant pairs) count: {association_count}')\n", - "print(f'Associations with resolved credible set: {credible_set_count} ({round(credible_set_count/association_count*100, 1)}%)')\n", - "print(f'Number of good (non-flagged) associations without resolved credible set: {failed_w_ld} ({round(failed_w_ld/association_count*100, 1)}%)')\n", - "print(f'Number of good (non-flagged) associations with resolved credible set: {good_association_count} ({round(good_association_count/association_count*100, 1)}%)')\n" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "new_study_locus = (\n", + " spark.read.parquet(\"gs://genetics_etl_python_playground/XX.XX/output/python_etl/parquet/pics_credible_set/\")\n", + " .withColumn(\"pics_99_perc_credset\", f.when(f.col(\"tagVariantId\").isNull(), False).otherwise(f.col(\"pics_99_perc_credset\")))\n", + " .withColumn(\n", + " \"hasResolvedCredibleSet\",\n", + " f.when(\n", + " f.array_contains(\n", + " f.collect_set(f.col(\"pics_99_perc_credset\")).over(Window.partitionBy(\"studyId\", \"variantId\")),\n", + " True\n", + " ),\n", + " True\n", + " ).otherwise(False)\n", + " )\n", + " .persist()\n", + ")\n", + "\n", + "\n", + "new_study_locus.printSchema()\n", + "new_study_locus.show(1, False, True)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "22/12/19 11:45:49 WARN org.apache.spark.sql.execution.CacheManager: Asked to cache already cached data.\n", + "[Stage 224:============================================> (173 + 16) / 200]\r" + ] }, { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Focusing only on the actual credible sets." - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Study count: 35956\n", + "Association (unique study/variant pairs) count: 433108\n", + "Associations with resolved credible set: 381056 (88.0%)\n", + "Number of good (non-flagged) associations without resolved credible set: 39763 (9.2%)\n", + "Number of good (non-flagged) associations with resolved credible set: 260736 (60.2%)\n" + ] }, { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "22/12/19 11:57:56 WARN org.apache.spark.sql.execution.CacheManager: Asked to cache already cached data.\n", - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of resolved credible sets: 381056\n", - "Studies with resolved credible sets: 33723\n", - "Number of lead/tag pairs: 18722043\n" - ] - } - ], - "source": [ - "# Thu\n", - "credible_sets = new_study_locus.filter(f.col('pics_99_perc_credset')).persist()\n", - "resolved_assoc_count = credible_sets.select('studyId', 'variantId').distinct().count()\n", - "resolved_study_count = credible_sets.select('studyId').distinct().count()\n", - "lead_tag_pair_count = credible_sets.select('studyId', 'variantId', 'tagVariantId').distinct().count()\n", - "\n", - "grouped_credset_pdf = credible_sets.groupBy('studyId', 'variantId').count().toPandas()\n", - "\n", - "print(f'Number of resolved credible sets: {resolved_assoc_count}')\n", - "print(f'Studies with resolved credible sets: {resolved_study_count}')\n", - "print(f'Number of lead/tag pairs: {lead_tag_pair_count}')\n", - "\n" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "study_count = new_study_locus.select(\"studyId\").distinct().count()\n", + "association_count = new_study_locus.select(\"studyId\", \"variantId\").distinct().count()\n", + "association_w_credible_set = new_study_locus.filter(f.col(\"hasResolvedCredibleSet\")).persist()\n", + "credible_set_count = association_w_credible_set.select(\"studyId\", \"variantId\").distinct().count()\n", + "failed_w_ld = (\n", + " new_study_locus\n", + " # Selecting good associations without credible sets:\n", + " .filter(\n", + " (~f.col(\"hasResolvedCredibleSet\")) &\n", + " (f.size(f.col(\"qualityControl\"))>0)\n", + " )\n", + " # Get associations:\n", + " .select(\"studyId\", \"variantId\")\n", + " .distinct()\n", + " .count()\n", + ")\n", + "good_association_count = (\n", + " association_w_credible_set\n", + " # Drop failed associations:\n", + " .filter(f.size(f.col(\"qualityControl\")) == 0)\n", + " .select(\"studyId\", \"variantId\")\n", + " .distinct()\n", + " .count()\n", + ")\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Focusing only on the actual credible sets." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "22/12/19 11:57:56 WARN org.apache.spark.sql.execution.CacheManager: Asked to cache already cached data.\n", + " \r" + ] }, { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD4CAYAAADsKpHdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAARlUlEQVR4nO3dbYxc5XnG8f9VmxAHAuElrBBGXSKstLw0TbAoLVW0qtPihijmA0iWkuBWriwhkpIWKTKN1KgfLEFVQgIqSFZIMZQGKElkK4g2yGRVVQITE0iNcVycQMHBxaEQglEhmN79MM+S8bJej9dr7+7M/yeN5sw95zl77pHg2uc5Z8epKiRJ+rWZPgFJ0uxgIEiSAANBktQYCJIkwECQJDXzZ/oEpurkk0+u4eHhKY197bXXOOaYY6b3hGY5ex4M9jwYDqXnRx999MWqev9E783ZQBgeHmbz5s1TGjs6OsrIyMj0ntAsZ8+DwZ4Hw6H0nOS/9veeS0aSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkYEADYctPX2F49X0Mr75vpk9FkmaNgQwESdI7GQiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNT0FQpK/SLI1yRNJvpHk3UlOTPJAkqfa8wld+1+TZEeS7Uku6qqfl2RLe+/GJGn1o5Pc3eqbkgxPe6eSpEkdMBCSnAb8ObC4qs4B5gHLgdXAxqpaBGxsr0lyVnv/bGApcHOSee1wtwCrgEXtsbTVVwIvV9WZwA3AddPSnSSpZ70uGc0HFiSZD7wHeB5YBqxr768DLmnby4C7quqNqnoa2AGcn+RU4LiqeqiqCrh93JixY90LLBmbPUiSjoz5B9qhqn6a5O+AZ4H/Bb5bVd9NMlRVu9o+u5Kc0oacBjzcdYidrfZm2x5fHxvzXDvW3iSvACcBL3afS5JVdGYYDA0NMTo6ehCt/srQArj63L0AUz7GXLNnz56B6XWMPQ8Ge54+BwyEdm1gGXAG8HPgn5N8erIhE9RqkvpkY/YtVK0F1gIsXry4RkZGJjmN/bvpzvVcv6XT+jOfmtox5prR0VGm+nnNVfY8GOx5+vSyZPQx4Omq+llVvQl8C/g94IW2DER73t323wmc3jV+IZ0lpp1te3x9nzFtWep44KWpNCRJmppeAuFZ4IIk72nr+kuAbcAGYEXbZwWwvm1vAJa3O4fOoHPx+JG2vPRqkgvacS4fN2bsWJcCD7brDJKkI6SXawibktwL/ADYCzxGZ9nmWOCeJCvphMZlbf+tSe4Bnmz7X1lVb7XDXQHcBiwA7m8PgFuBO5LsoDMzWD4t3UmSenbAQACoqi8BXxpXfoPObGGi/dcAayaobwbOmaD+Oi1QJEkzw79UliQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEtBjICR5X5J7k/woybYkv5vkxCQPJHmqPZ/Qtf81SXYk2Z7koq76eUm2tPduTJJWPzrJ3a2+KcnwtHcqSZpUrzOErwL/UlW/AXwI2AasBjZW1SJgY3tNkrOA5cDZwFLg5iTz2nFuAVYBi9pjaauvBF6uqjOBG4DrDrEvSdJBOmAgJDkO+ChwK0BV/bKqfg4sA9a13dYBl7TtZcBdVfVGVT0N7ADOT3IqcFxVPVRVBdw+bszYse4FlozNHiRJR8b8Hvb5APAz4B+SfAh4FLgKGKqqXQBVtSvJKW3/04CHu8bvbLU32/b4+tiY59qx9iZ5BTgJeLH7RJKsojPDYGhoiNHR0d66HGdoAVx97l6AKR9jrtmzZ8/A9DrGngeDPU+fXgJhPvAR4HNVtSnJV2nLQ/sx0W/2NUl9sjH7FqrWAmsBFi9eXCMjI5Ocxv7ddOd6rt/Saf2ZT03tGHPN6OgoU/285ip7Hgz2PH16uYawE9hZVZva63vpBMQLbRmI9ry7a//Tu8YvBJ5v9YUT1PcZk2Q+cDzw0sE2I0maugMGQlX9N/Bckg+20hLgSWADsKLVVgDr2/YGYHm7c+gMOhePH2nLS68muaBdH7h83JixY10KPNiuM0iSjpBelowAPgfcmeRdwE+AP6UTJvckWQk8C1wGUFVbk9xDJzT2AldW1VvtOFcAtwELgPvbAzoXrO9IsoPOzGD5IfYlSTpIPQVCVT0OLJ7grSX72X8NsGaC+mbgnAnqr9MCRZI0M/xLZUkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSmvkzfQIzbXj1fW9vP3PtxTN4JpI0s5whSJIAA0GS1PQcCEnmJXksyXfa6xOTPJDkqfZ8Qte+1yTZkWR7kou66ucl2dLeuzFJWv3oJHe3+qYkw9PYoySpBwczQ7gK2Nb1ejWwsaoWARvba5KcBSwHzgaWAjcnmdfG3AKsAha1x9JWXwm8XFVnAjcA102pG0nSlPUUCEkWAhcDX+sqLwPWte11wCVd9buq6o2qehrYAZyf5FTguKp6qKoKuH3cmLFj3QssGZs9SJKOjF7vMvoK8AXgvV21oaraBVBVu5Kc0uqnAQ937bez1d5s2+PrY2Oea8fam+QV4CTgxe6TSLKKzgyDoaEhRkdHezz9fQ0tgKvP3fuO+lSPNxfs2bOnr/ubiD0PBnuePgcMhCSfAHZX1aNJRno45kS/2dck9cnG7FuoWgusBVi8eHGNjPRyOu90053ruX7LO1t/5lNTO95cMDo6ylQ/r7nKngeDPU+fXmYIFwKfTPJx4N3AcUn+EXghyaltdnAqsLvtvxM4vWv8QuD5Vl84Qb17zM4k84HjgZem2JMkaQoOeA2hqq6pqoVVNUznYvGDVfVpYAOwou22AljftjcAy9udQ2fQuXj8SFteejXJBe36wOXjxowd69L2M94xQ5AkHT6H8pfK1wL3JFkJPAtcBlBVW5PcAzwJ7AWurKq32pgrgNuABcD97QFwK3BHkh10ZgbLD+G8JElTcFCBUFWjwGjb/h9gyX72WwOsmaC+GThngvrrtECRJM0M/1JZkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpOZQ/k3lvjO8+r63t5+59uIZPBNJOvKcIUiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUuMfpu2Hf6QmadA4Q5AkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBPQRCktOTfC/JtiRbk1zV6icmeSDJU+35hK4x1yTZkWR7kou66ucl2dLeuzFJWv3oJHe3+qYkw4ehV0nSJHqZIewFrq6q3wQuAK5MchawGthYVYuAje017b3lwNnAUuDmJPPasW4BVgGL2mNpq68EXq6qM4EbgOumoTdJ0kE4YCBU1a6q+kHbfhXYBpwGLAPWtd3WAZe07WXAXVX1RlU9DewAzk9yKnBcVT1UVQXcPm7M2LHuBZaMzR4kSUfGQX25XVvK+TCwCRiqql3QCY0kp7TdTgMe7hq2s9XebNvj62NjnmvH2pvkFeAk4MVxP38VnRkGQ0NDjI6OHszpv21oAVx97t6e95/qz5lN9uzZ0xd9HAx7Hgz2PH16DoQkxwLfBD5fVb+Y5Bf4id6oSeqTjdm3ULUWWAuwePHiGhkZOcBZT+ymO9dz/ZaDyMItr+3zci5+++no6ChT/bzmKnseDPY8fXq6yyjJUXTC4M6q+lYrv9CWgWjPu1t9J3B61/CFwPOtvnCC+j5jkswHjgdeOthmJElT18tdRgFuBbZV1Ze73toArGjbK4D1XfXl7c6hM+hcPH6kLS+9muSCdszLx40ZO9alwIPtOoMk6QjpZd3kQuAzwJYkj7faXwHXAvckWQk8C1wGUFVbk9wDPEnnDqUrq+qtNu4K4DZgAXB/e0AncO5IsoPOzGD5obUlSTpYBwyEqvp3Jl7jB1iynzFrgDUT1DcD50xQf50WKJKkmeE/oTkF/vOakvqRX10hSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ13nZ6iLwFVVK/cIYgSQIMBElSYyBIkgADQZLUeFF5GnmBWdJc5gxBkgQYCJKkxkCQJAFeQzhsvJ4gaa4xEI4Aw0HSXOCSkSQJMBAkSY1LRkeYy0eSZitnCJIkwBnCrOHMQdJMMxBmUHcISNJMc8lIkgQ4Q5iVXD6SNBOcIUiSAANBktS4ZDTLuXwk6UgxEOaQ/d2VZFBImg4GQh9wFiFpOhgIfcZwkDRVBkIf6w6H25YeM4NnImkuMBAGxJafvsKfTHANwlmEpDEGwoDzQrWkMQaCJtTL9ywZGlJ/MRA0Zb1+OZ/BIc0NsyYQkiwFvgrMA75WVdfO8ClpmhyOb3U1ZKTpNysCIck84O+BPwR2At9PsqGqnpzZM9Ns1UvIXH3u3gkvpB8Kg0j9bFYEAnA+sKOqfgKQ5C5gGWAgaFaZ7f+GxeEIwdmoO5h7uYPOmyd6k6qa6XMgyaXA0qr6s/b6M8DvVNVnx+23CljVXn4Q2D7FH3ky8OIUx85V9jwY7HkwHErPv15V75/ojdkyQ8gEtXckVVWtBdYe8g9LNlfV4kM9zlxiz4PBngfD4ep5tnz99U7g9K7XC4HnZ+hcJGkgzZZA+D6wKMkZSd4FLAc2zPA5SdJAmRVLRlW1N8lngX+lc9vp16tq62H8kYe87DQH2fNgsOfBcFh6nhUXlSVJM2+2LBlJkmaYgSBJAgYsEJIsTbI9yY4kq2f6fKZLkq8n2Z3kia7aiUkeSPJUez6h671r2mewPclFM3PWhybJ6Um+l2Rbkq1Jrmr1vu07ybuTPJLkh63nv2n1vu15TJJ5SR5L8p32ehB6fibJliSPJ9ncaoe376oaiAedi9U/Bj4AvAv4IXDWTJ/XNPX2UeAjwBNdtb8FVrft1cB1bfus1vvRwBntM5k30z1MoedTgY+07fcC/9l669u+6fy9zrFt+yhgE3BBP/fc1ftfAv8EfKe9HoSenwFOHlc7rH0P0gzh7a/HqKpfAmNfjzHnVdW/AS+NKy8D1rXtdcAlXfW7quqNqnoa2EHns5lTqmpXVf2gbb8KbANOo4/7ro497eVR7VH0cc8ASRYCFwNf6yr3dc+TOKx9D1IgnAY81/V6Z6v1q6Gq2gWd/3kCp7R6330OSYaBD9P5jbmv+25LJ48Du4EHqqrvewa+AnwB+L+uWr/3DJ2w/26SR9vX9sBh7ntW/B3CEdLT12MMgL76HJIcC3wT+HxV/SKZqL3OrhPU5lzfVfUW8NtJ3gd8O8k5k+w+53tO8glgd1U9mmSklyET1OZUz10urKrnk5wCPJDkR5PsOy19D9IMYdC+HuOFJKcCtOfdrd43n0OSo+iEwZ1V9a1W7vu+Aarq58AosJT+7vlC4JNJnqGzzPsHSf6R/u4ZgKp6vj3vBr5NZwnosPY9SIEwaF+PsQFY0bZXAOu76suTHJ3kDGAR8MgMnN8hSWcqcCuwraq+3PVW3/ad5P1tZkCSBcDHgB/Rxz1X1TVVtbCqhun8N/tgVX2aPu4ZIMkxSd47tg38EfAEh7vvmb6SfoSv2n+czt0oPwa+ONPnM419fQPYBbxJ5zeFlcBJwEbgqfZ8Ytf+X2yfwXbgj2f6/KfY8+/TmRL/B/B4e3y8n/sGfgt4rPX8BPDXrd63PY/rf4Rf3WXU1z3TuRvyh+2xdez/V4e7b7+6QpIEDNaSkSRpEgaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLU/D99c2F/4EPbtQAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "grouped_credset_pdf.query('count < 500')['count'].hist(bins=100)" + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of resolved credible sets: 381056\n", + "Studies with resolved credible sets: 33723\n", + "Number of lead/tag pairs: 18722043\n" + ] + } + ], + "source": [ + "# Thu\n", + "credible_sets = new_study_locus.filter(f.col(\"pics_99_perc_credset\")).persist()\n", + "resolved_assoc_count = credible_sets.select(\"studyId\", \"variantId\").distinct().count()\n", + "resolved_study_count = credible_sets.select(\"studyId\").distinct().count()\n", + "lead_tag_pair_count = credible_sets.select(\"studyId\", \"variantId\", \"tagVariantId\").distinct().count()\n", + "\n", + "grouped_credset_pdf = credible_sets.groupBy(\"studyId\", \"variantId\").count().toPandas()\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" }, { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Credible sets with only one variant: 29536 (7.8)%\n", - "Median size of credible sets: 21.0\n" - ] - } - ], - "source": [ - "median_credset_size = grouped_credset_pdf['count'].median()\n", - "credsets_with_single = len(grouped_credset_pdf.query('count == 1'))\n", - "\n", - "print(f'Credible sets with only one variant: {credsets_with_single} ({round(credsets_with_single/len(grouped_credset_pdf)*100, 1)})%')\n", - "print(f'Median size of credible sets: {median_credset_size}')" + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD4CAYAAADsKpHdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAARlUlEQVR4nO3dbYxc5XnG8f9VmxAHAuElrBBGXSKstLw0TbAoLVW0qtPihijmA0iWkuBWriwhkpIWKTKN1KgfLEFVQgIqSFZIMZQGKElkK4g2yGRVVQITE0iNcVycQMHBxaEQglEhmN79MM+S8bJej9dr7+7M/yeN5sw95zl77pHg2uc5Z8epKiRJ+rWZPgFJ0uxgIEiSAANBktQYCJIkwECQJDXzZ/oEpurkk0+u4eHhKY197bXXOOaYY6b3hGY5ex4M9jwYDqXnRx999MWqev9E783ZQBgeHmbz5s1TGjs6OsrIyMj0ntAsZ8+DwZ4Hw6H0nOS/9veeS0aSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkYEADYctPX2F49X0Mr75vpk9FkmaNgQwESdI7GQiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNT0FQpK/SLI1yRNJvpHk3UlOTPJAkqfa8wld+1+TZEeS7Uku6qqfl2RLe+/GJGn1o5Pc3eqbkgxPe6eSpEkdMBCSnAb8ObC4qs4B5gHLgdXAxqpaBGxsr0lyVnv/bGApcHOSee1wtwCrgEXtsbTVVwIvV9WZwA3AddPSnSSpZ70uGc0HFiSZD7wHeB5YBqxr768DLmnby4C7quqNqnoa2AGcn+RU4LiqeqiqCrh93JixY90LLBmbPUiSjoz5B9qhqn6a5O+AZ4H/Bb5bVd9NMlRVu9o+u5Kc0oacBjzcdYidrfZm2x5fHxvzXDvW3iSvACcBL3afS5JVdGYYDA0NMTo6ehCt/srQArj63L0AUz7GXLNnz56B6XWMPQ8Ge54+BwyEdm1gGXAG8HPgn5N8erIhE9RqkvpkY/YtVK0F1gIsXry4RkZGJjmN/bvpzvVcv6XT+jOfmtox5prR0VGm+nnNVfY8GOx5+vSyZPQx4Omq+llVvQl8C/g94IW2DER73t323wmc3jV+IZ0lpp1te3x9nzFtWep44KWpNCRJmppeAuFZ4IIk72nr+kuAbcAGYEXbZwWwvm1vAJa3O4fOoHPx+JG2vPRqkgvacS4fN2bsWJcCD7brDJKkI6SXawibktwL/ADYCzxGZ9nmWOCeJCvphMZlbf+tSe4Bnmz7X1lVb7XDXQHcBiwA7m8PgFuBO5LsoDMzWD4t3UmSenbAQACoqi8BXxpXfoPObGGi/dcAayaobwbOmaD+Oi1QJEkzw79UliQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEtBjICR5X5J7k/woybYkv5vkxCQPJHmqPZ/Qtf81SXYk2Z7koq76eUm2tPduTJJWPzrJ3a2+KcnwtHcqSZpUrzOErwL/UlW/AXwI2AasBjZW1SJgY3tNkrOA5cDZwFLg5iTz2nFuAVYBi9pjaauvBF6uqjOBG4DrDrEvSdJBOmAgJDkO+ChwK0BV/bKqfg4sA9a13dYBl7TtZcBdVfVGVT0N7ADOT3IqcFxVPVRVBdw+bszYse4FlozNHiRJR8b8Hvb5APAz4B+SfAh4FLgKGKqqXQBVtSvJKW3/04CHu8bvbLU32/b4+tiY59qx9iZ5BTgJeLH7RJKsojPDYGhoiNHR0d66HGdoAVx97l6AKR9jrtmzZ8/A9DrGngeDPU+fXgJhPvAR4HNVtSnJV2nLQ/sx0W/2NUl9sjH7FqrWAmsBFi9eXCMjI5Ocxv7ddOd6rt/Saf2ZT03tGHPN6OgoU/285ip7Hgz2PH16uYawE9hZVZva63vpBMQLbRmI9ry7a//Tu8YvBJ5v9YUT1PcZk2Q+cDzw0sE2I0maugMGQlX9N/Bckg+20hLgSWADsKLVVgDr2/YGYHm7c+gMOhePH2nLS68muaBdH7h83JixY10KPNiuM0iSjpBelowAPgfcmeRdwE+AP6UTJvckWQk8C1wGUFVbk9xDJzT2AldW1VvtOFcAtwELgPvbAzoXrO9IsoPOzGD5IfYlSTpIPQVCVT0OLJ7grSX72X8NsGaC+mbgnAnqr9MCRZI0M/xLZUkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSmvkzfQIzbXj1fW9vP3PtxTN4JpI0s5whSJIAA0GS1PQcCEnmJXksyXfa6xOTPJDkqfZ8Qte+1yTZkWR7kou66ucl2dLeuzFJWv3oJHe3+qYkw9PYoySpBwczQ7gK2Nb1ejWwsaoWARvba5KcBSwHzgaWAjcnmdfG3AKsAha1x9JWXwm8XFVnAjcA102pG0nSlPUUCEkWAhcDX+sqLwPWte11wCVd9buq6o2qehrYAZyf5FTguKp6qKoKuH3cmLFj3QssGZs9SJKOjF7vMvoK8AXgvV21oaraBVBVu5Kc0uqnAQ937bez1d5s2+PrY2Oea8fam+QV4CTgxe6TSLKKzgyDoaEhRkdHezz9fQ0tgKvP3fuO+lSPNxfs2bOnr/ubiD0PBnuePgcMhCSfAHZX1aNJRno45kS/2dck9cnG7FuoWgusBVi8eHGNjPRyOu90053ruX7LO1t/5lNTO95cMDo6ylQ/r7nKngeDPU+fXmYIFwKfTPJx4N3AcUn+EXghyaltdnAqsLvtvxM4vWv8QuD5Vl84Qb17zM4k84HjgZem2JMkaQoOeA2hqq6pqoVVNUznYvGDVfVpYAOwou22AljftjcAy9udQ2fQuXj8SFteejXJBe36wOXjxowd69L2M94xQ5AkHT6H8pfK1wL3JFkJPAtcBlBVW5PcAzwJ7AWurKq32pgrgNuABcD97QFwK3BHkh10ZgbLD+G8JElTcFCBUFWjwGjb/h9gyX72WwOsmaC+GThngvrrtECRJM0M/1JZkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpOZQ/k3lvjO8+r63t5+59uIZPBNJOvKcIUiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUuMfpu2Hf6QmadA4Q5AkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBPQRCktOTfC/JtiRbk1zV6icmeSDJU+35hK4x1yTZkWR7kou66ucl2dLeuzFJWv3oJHe3+qYkw4ehV0nSJHqZIewFrq6q3wQuAK5MchawGthYVYuAje017b3lwNnAUuDmJPPasW4BVgGL2mNpq68EXq6qM4EbgOumoTdJ0kE4YCBU1a6q+kHbfhXYBpwGLAPWtd3WAZe07WXAXVX1RlU9DewAzk9yKnBcVT1UVQXcPm7M2LHuBZaMzR4kSUfGQX25XVvK+TCwCRiqql3QCY0kp7TdTgMe7hq2s9XebNvj62NjnmvH2pvkFeAk4MVxP38VnRkGQ0NDjI6OHszpv21oAVx97t6e95/qz5lN9uzZ0xd9HAx7Hgz2PH16DoQkxwLfBD5fVb+Y5Bf4id6oSeqTjdm3ULUWWAuwePHiGhkZOcBZT+ymO9dz/ZaDyMItr+3zci5+++no6ChT/bzmKnseDPY8fXq6yyjJUXTC4M6q+lYrv9CWgWjPu1t9J3B61/CFwPOtvnCC+j5jkswHjgdeOthmJElT18tdRgFuBbZV1Ze73toArGjbK4D1XfXl7c6hM+hcPH6kLS+9muSCdszLx40ZO9alwIPtOoMk6QjpZd3kQuAzwJYkj7faXwHXAvckWQk8C1wGUFVbk9wDPEnnDqUrq+qtNu4K4DZgAXB/e0AncO5IsoPOzGD5obUlSTpYBwyEqvp3Jl7jB1iynzFrgDUT1DcD50xQf50WKJKkmeE/oTkF/vOakvqRX10hSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ13nZ6iLwFVVK/cIYgSQIMBElSYyBIkgADQZLUeFF5GnmBWdJc5gxBkgQYCJKkxkCQJAFeQzhsvJ4gaa4xEI4Aw0HSXOCSkSQJMBAkSY1LRkeYy0eSZitnCJIkwBnCrOHMQdJMMxBmUHcISNJMc8lIkgQ4Q5iVXD6SNBOcIUiSAANBktS4ZDTLuXwk6UgxEOaQ/d2VZFBImg4GQh9wFiFpOhgIfcZwkDRVBkIf6w6H25YeM4NnImkuMBAGxJafvsKfTHANwlmEpDEGwoDzQrWkMQaCJtTL9ywZGlJ/MRA0Zb1+OZ/BIc0NsyYQkiwFvgrMA75WVdfO8ClpmhyOb3U1ZKTpNysCIck84O+BPwR2At9PsqGqnpzZM9Ns1UvIXH3u3gkvpB8Kg0j9bFYEAnA+sKOqfgKQ5C5gGWAgaFaZ7f+GxeEIwdmoO5h7uYPOmyd6k6qa6XMgyaXA0qr6s/b6M8DvVNVnx+23CljVXn4Q2D7FH3ky8OIUx85V9jwY7HkwHErPv15V75/ojdkyQ8gEtXckVVWtBdYe8g9LNlfV4kM9zlxiz4PBngfD4ep5tnz99U7g9K7XC4HnZ+hcJGkgzZZA+D6wKMkZSd4FLAc2zPA5SdJAmRVLRlW1N8lngX+lc9vp16tq62H8kYe87DQH2fNgsOfBcFh6nhUXlSVJM2+2LBlJkmaYgSBJAgYsEJIsTbI9yY4kq2f6fKZLkq8n2Z3kia7aiUkeSPJUez6h671r2mewPclFM3PWhybJ6Um+l2Rbkq1Jrmr1vu07ybuTPJLkh63nv2n1vu15TJJ5SR5L8p32ehB6fibJliSPJ9ncaoe376oaiAedi9U/Bj4AvAv4IXDWTJ/XNPX2UeAjwBNdtb8FVrft1cB1bfus1vvRwBntM5k30z1MoedTgY+07fcC/9l669u+6fy9zrFt+yhgE3BBP/fc1ftfAv8EfKe9HoSenwFOHlc7rH0P0gzh7a/HqKpfAmNfjzHnVdW/AS+NKy8D1rXtdcAlXfW7quqNqnoa2EHns5lTqmpXVf2gbb8KbANOo4/7ro497eVR7VH0cc8ASRYCFwNf6yr3dc+TOKx9D1IgnAY81/V6Z6v1q6Gq2gWd/3kCp7R6330OSYaBD9P5jbmv+25LJ48Du4EHqqrvewa+AnwB+L+uWr/3DJ2w/26SR9vX9sBh7ntW/B3CEdLT12MMgL76HJIcC3wT+HxV/SKZqL3OrhPU5lzfVfUW8NtJ3gd8O8k5k+w+53tO8glgd1U9mmSklyET1OZUz10urKrnk5wCPJDkR5PsOy19D9IMYdC+HuOFJKcCtOfdrd43n0OSo+iEwZ1V9a1W7vu+Aarq58AosJT+7vlC4JNJnqGzzPsHSf6R/u4ZgKp6vj3vBr5NZwnosPY9SIEwaF+PsQFY0bZXAOu76suTHJ3kDGAR8MgMnN8hSWcqcCuwraq+3PVW3/ad5P1tZkCSBcDHgB/Rxz1X1TVVtbCqhun8N/tgVX2aPu4ZIMkxSd47tg38EfAEh7vvmb6SfoSv2n+czt0oPwa+ONPnM419fQPYBbxJ5zeFlcBJwEbgqfZ8Ytf+X2yfwXbgj2f6/KfY8+/TmRL/B/B4e3y8n/sGfgt4rPX8BPDXrd63PY/rf4Rf3WXU1z3TuRvyh+2xdez/V4e7b7+6QpIEDNaSkSRpEgaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLU/D99c2F/4EPbtQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "grouped_credset_pdf.query(\"count < 500\")[\"count\"].hist(bins=100)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Credible sets with only one variant: 29536 (7.8)%\n", + "Median size of credible sets: 21.0\n" + ] + } + ], + "source": [ + "median_credset_size = grouped_credset_pdf[\"count\"].median()\n", + "credsets_with_single = len(grouped_credset_pdf.query(\"count == 1\"))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 285:====================================> (139 + 8) / 200]\r" + ] }, { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 285:====================================> (139 + 8) / 200]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+------------+---------------+-----+\n", - "| studyId| variantId|count|\n", - "+------------+---------------+-----+\n", - "|GCST90095125|17_46142465_T_A|11930|\n", - "|GCST90095124|17_46142465_T_A|11221|\n", - "|GCST006483_1|17_45608332_A_G| 9666|\n", - "| GCST011766|17_45846834_C_G| 9566|\n", - "|GCST006481_2|17_45608332_A_G| 9445|\n", - "|GCST006483_1|17_45605039_C_G| 8912|\n", - "|GCST006483_3|17_45605039_C_G| 8602|\n", - "|GCST006481_4|17_45605039_C_G| 8545|\n", - "|GCST006483_1|17_46770468_T_G| 7465|\n", - "|GCST006481_2|17_46770468_T_G| 7396|\n", - "|GCST006483_3|17_46770468_T_G| 7374|\n", - "|GCST006481_4|17_46770468_T_G| 7327|\n", - "|GCST001651_9|17_46257341_G_A| 6926|\n", - "|GCST90134596|17_45707983_T_C| 6748|\n", - "| GCST012099|17_45610951_A_G| 6603|\n", - "|GCST90104034|17_46152620_T_C| 6545|\n", - "| GCST012101|17_45610951_A_G| 6374|\n", - "|GCST90134597|17_45707983_T_C| 6372|\n", - "| GCST007692|17_45846834_C_G| 6331|\n", - "|GCST90013445|17_45996523_A_G| 5668|\n", - "|GCST008675_1|17_45733530_C_T| 5196|\n", - "|GCST004008_1|17_45749271_G_A| 5101|\n", - "|GCST006483_1|17_46785767_T_C| 4913|\n", - "|GCST006481_2|17_46785767_T_C| 4880|\n", - "| GCST007065|11_55736589_G_A| 4071|\n", - "|GCST90100220|10_73256607_T_A| 3897|\n", - "|GCST90095190|17_45913906_A_G| 3858|\n", - "|GCST90095190|17_46055092_G_A| 3855|\n", - "|GCST90095190|17_45609706_G_A| 3764|\n", - "|GCST000996_1|11_55368743_C_T| 3727|\n", - "+------------+---------------+-----+\n", - "only showing top 30 rows\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "credible_sets.groupBy('studyId', 'variantId').count().filter(f.col('count') > 1000).orderBy('count', ascending=False).show(30)" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "+------------+---------------+-----+\n", + "| studyId| variantId|count|\n", + "+------------+---------------+-----+\n", + "|GCST90095125|17_46142465_T_A|11930|\n", + "|GCST90095124|17_46142465_T_A|11221|\n", + "|GCST006483_1|17_45608332_A_G| 9666|\n", + "| GCST011766|17_45846834_C_G| 9566|\n", + "|GCST006481_2|17_45608332_A_G| 9445|\n", + "|GCST006483_1|17_45605039_C_G| 8912|\n", + "|GCST006483_3|17_45605039_C_G| 8602|\n", + "|GCST006481_4|17_45605039_C_G| 8545|\n", + "|GCST006483_1|17_46770468_T_G| 7465|\n", + "|GCST006481_2|17_46770468_T_G| 7396|\n", + "|GCST006483_3|17_46770468_T_G| 7374|\n", + "|GCST006481_4|17_46770468_T_G| 7327|\n", + "|GCST001651_9|17_46257341_G_A| 6926|\n", + "|GCST90134596|17_45707983_T_C| 6748|\n", + "| GCST012099|17_45610951_A_G| 6603|\n", + "|GCST90104034|17_46152620_T_C| 6545|\n", + "| GCST012101|17_45610951_A_G| 6374|\n", + "|GCST90134597|17_45707983_T_C| 6372|\n", + "| GCST007692|17_45846834_C_G| 6331|\n", + "|GCST90013445|17_45996523_A_G| 5668|\n", + "|GCST008675_1|17_45733530_C_T| 5196|\n", + "|GCST004008_1|17_45749271_G_A| 5101|\n", + "|GCST006483_1|17_46785767_T_C| 4913|\n", + "|GCST006481_2|17_46785767_T_C| 4880|\n", + "| GCST007065|11_55736589_G_A| 4071|\n", + "|GCST90100220|10_73256607_T_A| 3897|\n", + "|GCST90095190|17_45913906_A_G| 3858|\n", + "|GCST90095190|17_46055092_G_A| 3855|\n", + "|GCST90095190|17_45609706_G_A| 3764|\n", + "|GCST000996_1|11_55368743_C_T| 3727|\n", + "+------------+---------------+-----+\n", + "only showing top 30 rows\n", + "\n" + ] }, { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Comparing with old dataset\n", - "\n", - "- Data: `gs://genetics-portal-dev-staging/v2d/220210/ld.parquet`" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "credible_sets.groupBy(\"studyId\", \"variantId\").count().filter(f.col(\"count\") > 1000).orderBy(\"count\", ascending=False).show(30)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Comparing with old dataset\n", + "\n", + "- Data: `gs://genetics-portal-dev-staging/v2d/220210/ld.parquet`" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 336:===================================================> (16 + 1) / 17]\r" + ] }, { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 336:===================================================> (16 + 1) / 17]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of lead/tag count: 19406519\n", - "NUmber of studies covered: 18349\n", - "Number of associations covered: 265715\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "old_study_locus = (\n", - " spark.read.parquet(\"gs://genetics-portal-dev-staging/v2d/220210/ld.parquet\")\n", - " .select(\n", - " f.col('study_id').alias(\"studyId\"), \n", - " f.concat_ws(\"_\", f.col(\"lead_chrom\"), f.col(\"lead_pos\"), f.col(\"lead_ref\"), f.col(\"lead_alt\")).alias(\"variantId\"), \n", - " f.concat_ws(\"_\", f.col(\"tag_chrom\"), f.col(\"tag_pos\"), f.col(\"tag_ref\"), f.col(\"tag_alt\")).alias(\"tagVariantId\"),\n", - " 'pics_postprob',\n", - " 'pics_95perc_credset',\n", - " 'pics_99perc_credset'\n", - " )\n", - " .distinct()\n", - ")\n", - "lead_tag_pair_count = old_study_locus.count()\n", - "study_count = old_study_locus.select('studyId').distinct().count()\n", - "association_count = old_study_locus.select('studyId', 'variantId').distinct().count()\n", - "\n", - "print(f'Number of lead/tag count: {lead_tag_pair_count}')\n", - "print(f'NUmber of studies covered: {study_count}')\n", - "print(f'Number of associations covered: {association_count}')\n" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of lead/tag count: 19406519\n", + "NUmber of studies covered: 18349\n", + "Number of associations covered: 265715\n" + ] }, { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "22/12/19 13:53:16 WARN org.apache.spark.sql.execution.CacheManager: Asked to cache already cached data.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The median number of tag size: 21.0\n", - "Number of associations with single credible set: 9231\n", - "Number of associations with more than 1000 tags set: 441\n", - "+-------------+---------------+-----+\n", - "| studyId| variantId|count|\n", - "+-------------+---------------+-----+\n", - "| GCST001482|17_45900461_C_T| 3685|\n", - "| GCST90018953|17_45856424_G_T| 3684|\n", - "| GCST007692|17_45846834_C_G| 3649|\n", - "| GCST90018960|17_45761354_C_T| 3360|\n", - "| GCST90018996|17_46112544_A_G| 3348|\n", - "| GCST90091060|17_45873075_C_A| 3295|\n", - "| GCST002970|17_45846317_A_G| 3294|\n", - "| GCST001548|17_45846853_T_C| 3294|\n", - "| GCST007328|17_45887201_A_C| 3294|\n", - "| GCST007430|17_45887201_A_C| 3294|\n", - "| GCST010701|17_45855805_C_T| 3294|\n", - "| GCST001126|17_45846317_A_G| 3294|\n", - "| GCST012009|17_45862033_A_C| 3294|\n", - "| GCST006941|17_45841739_C_T| 3293|\n", - "| GCST004601|17_45841730_A_G| 3293|\n", - "| GCST010002|17_45895867_C_T| 3293|\n", - "| GCST90025948|17_45834077_T_C| 3293|\n", - "| GCST008733|17_45834077_T_C| 3293|\n", - "| GCST008734|17_45834077_T_C| 3293|\n", - "|GCST009518_66|17_45841730_A_G| 3293|\n", - "+-------------+---------------+-----+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ], - "source": [ - "tag_count = old_study_locus.groupBy('studyId', 'variantId').count().persist()\n", - "median_tag_count = tag_count.toPandas()['count'].median()\n", - "single_count = tag_count.filter(f.col('count') == 1).count()\n", - "over_1000 = tag_count.filter(f.col('count') >= 1000).count()\n", - "\n", - "print(f'The median number of tag size: {median_credset_size}')\n", - "print(f'Number of associations with single credible set: {single_count}')\n", - "print(f'Number of associations with more than 1000 tags set: {over_1000}')\n", - "\n", - "tag_count.orderBy('count',ascending=False).show(20)" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "old_study_locus = (\n", + " spark.read.parquet(\"gs://genetics-portal-dev-staging/v2d/220210/ld.parquet\")\n", + " .select(\n", + " f.col(\"study_id\").alias(\"studyId\"),\n", + " f.concat_ws(\"_\", f.col(\"lead_chrom\"), f.col(\"lead_pos\"), f.col(\"lead_ref\"), f.col(\"lead_alt\")).alias(\"variantId\"),\n", + " f.concat_ws(\"_\", f.col(\"tag_chrom\"), f.col(\"tag_pos\"), f.col(\"tag_ref\"), f.col(\"tag_alt\")).alias(\"tagVariantId\"),\n", + " \"pics_postprob\",\n", + " \"pics_95perc_credset\",\n", + " \"pics_99perc_credset\"\n", + " )\n", + " .distinct()\n", + ")\n", + "lead_tag_pair_count = old_study_locus.count()\n", + "study_count = old_study_locus.select(\"studyId\").distinct().count()\n", + "association_count = old_study_locus.select(\"studyId\", \"variantId\").distinct().count()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "22/12/19 13:53:16 WARN org.apache.spark.sql.execution.CacheManager: Asked to cache already cached data.\n" + ] }, { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Compare credible sets\n", - "\n", - "To make datasets comparable, both datasets need to updated with `studyAccession`: getting the GWAS Catalog study identifier by removing the suffix." - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "The median number of tag size: 21.0\n", + "Number of associations with single credible set: 9231\n", + "Number of associations with more than 1000 tags set: 441\n", + "+-------------+---------------+-----+\n", + "| studyId| variantId|count|\n", + "+-------------+---------------+-----+\n", + "| GCST001482|17_45900461_C_T| 3685|\n", + "| GCST90018953|17_45856424_G_T| 3684|\n", + "| GCST007692|17_45846834_C_G| 3649|\n", + "| GCST90018960|17_45761354_C_T| 3360|\n", + "| GCST90018996|17_46112544_A_G| 3348|\n", + "| GCST90091060|17_45873075_C_A| 3295|\n", + "| GCST002970|17_45846317_A_G| 3294|\n", + "| GCST001548|17_45846853_T_C| 3294|\n", + "| GCST007328|17_45887201_A_C| 3294|\n", + "| GCST007430|17_45887201_A_C| 3294|\n", + "| GCST010701|17_45855805_C_T| 3294|\n", + "| GCST001126|17_45846317_A_G| 3294|\n", + "| GCST012009|17_45862033_A_C| 3294|\n", + "| GCST006941|17_45841739_C_T| 3293|\n", + "| GCST004601|17_45841730_A_G| 3293|\n", + "| GCST010002|17_45895867_C_T| 3293|\n", + "| GCST90025948|17_45834077_T_C| 3293|\n", + "| GCST008733|17_45834077_T_C| 3293|\n", + "| GCST008734|17_45834077_T_C| 3293|\n", + "|GCST009518_66|17_45841730_A_G| 3293|\n", + "+-------------+---------------+-----+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "tag_count = old_study_locus.groupBy(\"studyId\", \"variantId\").count().persist()\n", + "median_tag_count = tag_count.toPandas()[\"count\"].median()\n", + "single_count = tag_count.filter(f.col(\"count\") == 1).count()\n", + "over_1000 = tag_count.filter(f.col(\"count\") >= 1000).count()\n", + "\n", + "\n", + "tag_count.orderBy(\"count\",ascending=False).show(20)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compare credible sets\n", + "\n", + "To make datasets comparable, both datasets need to updated with `studyAccession`: getting the GWAS Catalog study identifier by removing the suffix." + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 414:> (0 + 1) / 1][Stage 1072:========> (9 + 7) / 16]\r" + ] + } + ], + "source": [ + "processed_new = (\n", + " credible_sets\n", + " # Dropping leads with sub-significant p-values:\n", + " .filter(f.size(f.col(\"qualityControl\")) == 0)\n", + " .select(\n", + " f.split(f.col(\"studyId\"), \"_\").getItem(0).alias(\"studyAccession\"),\n", + " \"variantId\",\n", + " \"tagVariantId\",\n", + " \"pics_mu\",\n", + " \"pics_postprob\",\n", + " \"pics_95_perc_credset\",\n", + " \"pics_99_perc_credset\"\n", + " )\n", + " .persist()\n", + ")\n", + "\n", + "processed_old = (\n", + " old_study_locus\n", + " .select(\n", + " f.split(f.col(\"studyId\"), \"_\").getItem(0).alias(\"studyAccession\"),\n", + " \"variantId\",\n", + " \"tagVariantId\",\n", + " \"pics_postprob\",\n", + " \"pics_95perc_credset\",\n", + " \"pics_99perc_credset\"\n", + " )\n", + " .persist()\n", + ")\n", + "\n", + "processed_old.show(1, False, True)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "22/12/19 14:19:35 WARN org.apache.spark.sql.execution.CacheManager: Asked to cache already cached data.\n", + "22/12/19 14:19:35 WARN org.apache.spark.sql.execution.CacheManager: Asked to cache already cached data.\n", + "[Stage 414:> (0 + 1) / 1][Stage 431:============>(187 + 7) / 200]\r" + ] }, { - "cell_type": "code", - "execution_count": 97, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 414:> (0 + 1) / 1][Stage 1072:========> (9 + 7) / 16]\r" - ] - } - ], - "source": [ - "processed_new = (\n", - " credible_sets\n", - " # Dropping leads with sub-significant p-values:\n", - " .filter(f.size(f.col('qualityControl')) == 0)\n", - " .select(\n", - " f.split(f.col('studyId'), '_').getItem(0).alias('studyAccession'),\n", - " 'variantId', \n", - " 'tagVariantId', \n", - " 'pics_mu', \n", - " 'pics_postprob', \n", - " 'pics_95_perc_credset', \n", - " 'pics_99_perc_credset'\n", - " )\n", - " .persist()\n", - ")\n", - "\n", - "processed_old = (\n", - " old_study_locus\n", - " .select(\n", - " f.split(f.col('studyId'), '_').getItem(0).alias('studyAccession'),\n", - " 'variantId', \n", - " 'tagVariantId',\n", - " 'pics_postprob',\n", - " 'pics_95perc_credset',\n", - " 'pics_99perc_credset' \n", - " )\n", - " .persist()\n", - ")\n", - "\n", - "processed_old.show(1, False, True)" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------+---------------+---------------------+---------------------+\n", + "|studyAccession| variantId|new_credible_set_size|old_credible_set_size|\n", + "+--------------+---------------+---------------------+---------------------+\n", + "| GCST000114|15_48099968_A_G| 13| 38|\n", + "| GCST000172|3_190632672_A_G| 12| null|\n", + "| GCST000184|18_60217517_G_A| 233| 214|\n", + "| GCST000189|16_81270154_T_C| 6| null|\n", + "| GCST000189|9_105892815_G_T| 26| null|\n", + "| GCST000282|19_11100236_C_T| 34| 69|\n", + "| GCST000425|16_23055939_T_G| 227| null|\n", + "| GCST000452|2_156696348_A_C| 19| null|\n", + "| GCST000679| 10_6056986_C_T| 12| null|\n", + "| GCST000817|9_136220024_G_T| 23| 27|\n", + "| GCST000876|11_18349351_G_C| 2| 6|\n", + "| GCST000943| 20_1960525_G_A| null| 2|\n", + "| GCST000957|22_49692725_G_A| 16| null|\n", + "| GCST000964|13_77957479_G_A| null| 47|\n", + "| GCST000998|10_44280376_C_T| 193| null|\n", + "| GCST000998|21_34226827_C_T| 29| 32|\n", + "| GCST001010| 6_32689801_T_C| 1| 112|\n", + "| GCST001040|17_37738049_G_A| null| 21|\n", + "| GCST001057|13_66393490_A_G| 7| null|\n", + "| GCST001059|2_198123211_C_A| 8| null|\n", + "+--------------+---------------+---------------------+---------------------+\n", + "only showing top 20 rows\n", + "\n" + ] }, { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "22/12/19 14:19:35 WARN org.apache.spark.sql.execution.CacheManager: Asked to cache already cached data.\n", - "22/12/19 14:19:35 WARN org.apache.spark.sql.execution.CacheManager: Asked to cache already cached data.\n", - "[Stage 414:> (0 + 1) / 1][Stage 431:============>(187 + 7) / 200]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------+---------------+---------------------+---------------------+\n", - "|studyAccession| variantId|new_credible_set_size|old_credible_set_size|\n", - "+--------------+---------------+---------------------+---------------------+\n", - "| GCST000114|15_48099968_A_G| 13| 38|\n", - "| GCST000172|3_190632672_A_G| 12| null|\n", - "| GCST000184|18_60217517_G_A| 233| 214|\n", - "| GCST000189|16_81270154_T_C| 6| null|\n", - "| GCST000189|9_105892815_G_T| 26| null|\n", - "| GCST000282|19_11100236_C_T| 34| 69|\n", - "| GCST000425|16_23055939_T_G| 227| null|\n", - "| GCST000452|2_156696348_A_C| 19| null|\n", - "| GCST000679| 10_6056986_C_T| 12| null|\n", - "| GCST000817|9_136220024_G_T| 23| 27|\n", - "| GCST000876|11_18349351_G_C| 2| 6|\n", - "| GCST000943| 20_1960525_G_A| null| 2|\n", - "| GCST000957|22_49692725_G_A| 16| null|\n", - "| GCST000964|13_77957479_G_A| null| 47|\n", - "| GCST000998|10_44280376_C_T| 193| null|\n", - "| GCST000998|21_34226827_C_T| 29| 32|\n", - "| GCST001010| 6_32689801_T_C| 1| 112|\n", - "| GCST001040|17_37738049_G_A| null| 21|\n", - "| GCST001057|13_66393490_A_G| 7| null|\n", - "| GCST001059|2_198123211_C_A| 8| null|\n", - "+--------------+---------------+---------------------+---------------------+\n", - "only showing top 20 rows\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 414:> (0 + 1) / 1]\r" - ] - } - ], - "source": [ - "aggregated_new = (\n", - " processed_new\n", - " .join(processed_old.select('studyAccession').distinct(), on='studyAccession', how='right')\n", - " .groupBy('studyAccession', 'variantId')\n", - " .agg(f.size(f.collect_list(f.col('tagVariantId'))).alias('new_credible_set_size'))\n", - " .persist()\n", - ")\n", - "\n", - "aggregated_old = (\n", - " processed_old\n", - " .groupBy('studyAccession', 'variantId')\n", - " .agg(f.size(f.collect_list(f.col('tagVariantId'))).alias('old_credible_set_size'))\n", - " .persist()\n", - ")\n", - "\n", - "credset_compare = (\n", - " aggregated_new\n", - " .join(aggregated_old.filter(f.col('studyAccession').startswith('GCST')), on=['studyAccession', 'variantId'], how='outer')\n", - " .persist()\n", - ")\n", - "\n", - "credset_compare.show()" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] }, { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The number of extra credible sets covered by the new dataset: 104508 (53.0%)\n", - "Number of lost credible sets in the new datasets: 49292 (25.0%)\n", - "The number of extra credible sets with more than 1 tags covered by the new dataset: 94745 (48.1%)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 414:> (0 + 1) / 1]\r" - ] - } - ], - "source": [ - "extra_coverage = credset_compare.filter(f.col('old_credible_set_size').isNull()).count()\n", - "lost_coverage = credset_compare.filter(f.col('new_credible_set_size').isNull()).count()\n", - "old_full_count = aggregated_old.filter(f.col('studyAccession').startswith('GCST')).count()\n", - "\n", - "print(f'The number of extra credible sets covered by the new dataset: {extra_coverage} ({round(extra_coverage/old_full_count * 100, 1)}%)')\n", - "print(f'Number of lost credible sets in the new datasets: {lost_coverage} ({round(lost_coverage/old_full_count*100, 1)}%)')\n", - "\n", - "extra_coverage_more = credset_compare.filter(f.col('old_credible_set_size').isNull() & (f.col('new_credible_set_size')>1)).count()\n", - "\n", - "print(f'The number of extra credible sets with more than 1 tags covered by the new dataset: {extra_coverage_more} ({round(extra_coverage_more/old_full_count * 100, 1)}%)')\n" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 414:> (0 + 1) / 1]\r" + ] + } + ], + "source": [ + "aggregated_new = (\n", + " processed_new\n", + " .join(processed_old.select(\"studyAccession\").distinct(), on=\"studyAccession\", how=\"right\")\n", + " .groupBy(\"studyAccession\", \"variantId\")\n", + " .agg(f.size(f.collect_list(f.col(\"tagVariantId\"))).alias(\"new_credible_set_size\"))\n", + " .persist()\n", + ")\n", + "\n", + "aggregated_old = (\n", + " processed_old\n", + " .groupBy(\"studyAccession\", \"variantId\")\n", + " .agg(f.size(f.collect_list(f.col(\"tagVariantId\"))).alias(\"old_credible_set_size\"))\n", + " .persist()\n", + ")\n", + "\n", + "credset_compare = (\n", + " aggregated_new\n", + " .join(aggregated_old.filter(f.col(\"studyAccession\").startswith(\"GCST\")), on=[\"studyAccession\", \"variantId\"], how=\"outer\")\n", + " .persist()\n", + ")\n", + "\n", + "credset_compare.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] }, { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------+--------------------+---------------------+---------------------+\n", - "|studyAccession| variantId|new_credible_set_size|old_credible_set_size|\n", - "+--------------+--------------------+---------------------+---------------------+\n", - "| GCST000943| 20_1960525_G_A| null| 2|\n", - "| GCST000964| 13_77957479_G_A| null| 47|\n", - "| GCST001040| 17_37738049_G_A| null| 21|\n", - "| GCST002216| 7_73450539_A_G| null| 94|\n", - "| GCST002221| 9_133372523_G_C| null| 20|\n", - "| GCST002223| 8_19973410_C_T| null| 106|\n", - "| GCST002223| 8_20009083_C_T| null| 98|\n", - "| GCST003043| 16_11271643_C_T| null| 14|\n", - "| GCST003191| 20_22824423_G_A| null| 30|\n", - "| GCST003879| 22_23030688_C_G| null| 104|\n", - "| GCST004132| 16_10871740_T_C| null| 74|\n", - "| GCST004365| 3_186755027_C_T| null| 12|\n", - "| GCST004600| 6_35756341_T_C| null| 17|\n", - "| GCST004601| 11_8721318_TC_T| null| 171|\n", - "| GCST004601| 6_27878966_G_C| null| 267|\n", - "| GCST004603|16_88730362_G_GGG...| null| 10|\n", - "| GCST004603| 4_17777672_A_T| null| 9|\n", - "| GCST004605| 6_28489735_CT_C| null| 1|\n", - "| GCST004607| 20_56413821_A_G| null| 15|\n", - "| GCST004607| 2_218258320_T_A| null| 235|\n", - "+--------------+--------------------+---------------------+---------------------+\n", - "only showing top 20 rows\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 414:> (0 + 1) / 1]\r" - ] - } - ], - "source": [ - "credset_compare.filter(f.col('new_credible_set_size').isNull()).show()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "The number of extra credible sets covered by the new dataset: 104508 (53.0%)\n", + "Number of lost credible sets in the new datasets: 49292 (25.0%)\n", + "The number of extra credible sets with more than 1 tags covered by the new dataset: 94745 (48.1%)\n" + ] }, { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Conclusion:**\n", - "- The reason of the disagreement is the fact that the old dataset contains data from summary stats finemapping.\n", - "- To resolve this problem, we exclude those studies which have summary stats. These credible sets should be in a better agreement." - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 414:> (0 + 1) / 1]\r" + ] + } + ], + "source": [ + "extra_coverage = credset_compare.filter(f.col(\"old_credible_set_size\").isNull()).count()\n", + "lost_coverage = credset_compare.filter(f.col(\"new_credible_set_size\").isNull()).count()\n", + "old_full_count = aggregated_old.filter(f.col(\"studyAccession\").startswith(\"GCST\")).count()\n", + "\n", + "\n", + "extra_coverage_more = credset_compare.filter(f.col(\"old_credible_set_size\").isNull() & (f.col(\"new_credible_set_size\")>1)).count()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------+--------------------+---------------------+---------------------+\n", + "|studyAccession| variantId|new_credible_set_size|old_credible_set_size|\n", + "+--------------+--------------------+---------------------+---------------------+\n", + "| GCST000943| 20_1960525_G_A| null| 2|\n", + "| GCST000964| 13_77957479_G_A| null| 47|\n", + "| GCST001040| 17_37738049_G_A| null| 21|\n", + "| GCST002216| 7_73450539_A_G| null| 94|\n", + "| GCST002221| 9_133372523_G_C| null| 20|\n", + "| GCST002223| 8_19973410_C_T| null| 106|\n", + "| GCST002223| 8_20009083_C_T| null| 98|\n", + "| GCST003043| 16_11271643_C_T| null| 14|\n", + "| GCST003191| 20_22824423_G_A| null| 30|\n", + "| GCST003879| 22_23030688_C_G| null| 104|\n", + "| GCST004132| 16_10871740_T_C| null| 74|\n", + "| GCST004365| 3_186755027_C_T| null| 12|\n", + "| GCST004600| 6_35756341_T_C| null| 17|\n", + "| GCST004601| 11_8721318_TC_T| null| 171|\n", + "| GCST004601| 6_27878966_G_C| null| 267|\n", + "| GCST004603|16_88730362_G_GGG...| null| 10|\n", + "| GCST004603| 4_17777672_A_T| null| 9|\n", + "| GCST004605| 6_28489735_CT_C| null| 1|\n", + "| GCST004607| 20_56413821_A_G| null| 15|\n", + "| GCST004607| 2_218258320_T_A| null| 235|\n", + "+--------------+--------------------+---------------------+---------------------+\n", + "only showing top 20 rows\n", + "\n" + ] }, { - "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "data": { - "text/plain": [ - "141" - ] - }, - "execution_count": 89, - "metadata": {}, - "output_type": "execute_result" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 414:> (0 + 1) / 1]\r" - ] - } - ], - "source": [ - "(\n", - " spark.read.parquet(\"gs://genetics-portal-dev-staging/v2d/220401/ld.parquet\")\n", - " .filter(f.col('study_id') == 'GCST002223')\n", - " .select('lead_chrom', 'lead_pos', 'lead_ref', 'lead_alt')\n", - " .distinct()\n", - " .count()\n", - ")" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] }, { - "cell_type": "code", - "execution_count": 96, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "22/12/19 15:57:00 WARN org.apache.spark.sql.execution.CacheManager: Asked to cache already cached data.\n", - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of credible sets in the selected studies (27054): 105658\n", - "The number of extra credible sets covered by the new dataset in the same studies: 73250 (69.3%)\n", - "Number of lost credible sets in the new datasets: 8454 (8.0%)\n", - "The number of extra credible sets with more than 1 tags covered by the new dataset: 67183 (63.6%)\n" - ] - } - ], - "source": [ - "studies_with_no_sumstats = (\n", - " spark.read.parquet('gs://genetics_etl_python_playground/XX.XX/output/python_etl/parquet/gwas_catalog_studies/')\n", - " .filter(~f.col('hasSumstats'))\n", - " .select(f.split(f.col('studyId'), '_').getItem(0).alias('studyAccession'))\n", - " .distinct()\n", - ")\n", - "\n", - "# Dropping studies with summary statistics:\n", - "credset_compare_update = credset_compare.join(studies_with_no_sumstats, on='studyAccession', how='inner').distinct().persist()\n", - "\n", - "old_full_count = credset_compare_update.filter(f.col('old_credible_set_size').isNotNull()).count()\n", - "extra_coverage = credset_compare_update.filter(f.col('old_credible_set_size').isNull()).count()\n", - "lost_coverage = credset_compare_update.filter(f.col('new_credible_set_size').isNull()).count()\n", - "\n", - "print(f'Number of credible sets in the selected studies ({studies_with_no_sumstats.count()}): {old_full_count}')\n", - "print(f'The number of extra credible sets covered by the new dataset in the same studies: {extra_coverage} ({round(extra_coverage/old_full_count * 100, 1)}%)')\n", - "print(f'Number of lost credible sets in the new datasets: {lost_coverage} ({round(lost_coverage/old_full_count*100, 1)}%)')\n", - "\n", - "extra_coverage_more = credset_compare_update.filter(f.col('old_credible_set_size').isNull() & (f.col('new_credible_set_size')>1)).count()\n", - "\n", - "print(f'The number of extra credible sets with more than 1 tags covered by the new dataset: {extra_coverage_more} ({round(extra_coverage_more/old_full_count * 100, 1)}%)')" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 414:> (0 + 1) / 1]\r" + ] + } + ], + "source": [ + "credset_compare.filter(f.col(\"new_credible_set_size\").isNull()).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Conclusion:**\n", + "- The reason of the disagreement is the fact that the old dataset contains data from summary stats finemapping.\n", + "- To resolve this problem, we exclude those studies which have summary stats. These credible sets should be in a better agreement." + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] }, { - "cell_type": "code", - "execution_count": 91, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2" - ] - }, - "execution_count": 91, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "1+1" + "data": { + "text/plain": [ + "141" ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 414:> (0 + 1) / 1]\r" + ] } - ], - "metadata": { - "kernelspec": { - "display_name": "otgenetics-Z1loiStc-py3.8", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16 (default, Dec 7 2022, 01:39:17) \n[Clang 14.0.0 (clang-1400.0.29.202)]" + ], + "source": [ + "(\n", + " spark.read.parquet(\"gs://genetics-portal-dev-staging/v2d/220401/ld.parquet\")\n", + " .filter(f.col(\"study_id\") == \"GCST002223\")\n", + " .select(\"lead_chrom\", \"lead_pos\", \"lead_ref\", \"lead_alt\")\n", + " .distinct()\n", + " .count()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "22/12/19 15:57:00 WARN org.apache.spark.sql.execution.CacheManager: Asked to cache already cached data.\n", + " \r" + ] }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "5a448d06c31dd563cc2d2f896cd972f1626bb3e0fbcfc3d2f2ab4cc41131eab9" - } + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of credible sets in the selected studies (27054): 105658\n", + "The number of extra credible sets covered by the new dataset in the same studies: 73250 (69.3%)\n", + "Number of lost credible sets in the new datasets: 8454 (8.0%)\n", + "The number of extra credible sets with more than 1 tags covered by the new dataset: 67183 (63.6%)\n" + ] } + ], + "source": [ + "studies_with_no_sumstats = (\n", + " spark.read.parquet(\"gs://genetics_etl_python_playground/XX.XX/output/python_etl/parquet/gwas_catalog_studies/\")\n", + " .filter(~f.col(\"hasSumstats\"))\n", + " .select(f.split(f.col(\"studyId\"), \"_\").getItem(0).alias(\"studyAccession\"))\n", + " .distinct()\n", + ")\n", + "\n", + "# Dropping studies with summary statistics:\n", + "credset_compare_update = credset_compare.join(studies_with_no_sumstats, on=\"studyAccession\", how=\"inner\").distinct().persist()\n", + "\n", + "old_full_count = credset_compare_update.filter(f.col(\"old_credible_set_size\").isNotNull()).count()\n", + "extra_coverage = credset_compare_update.filter(f.col(\"old_credible_set_size\").isNull()).count()\n", + "lost_coverage = credset_compare_update.filter(f.col(\"new_credible_set_size\").isNull()).count()\n", + "\n", + "\n", + "extra_coverage_more = credset_compare_update.filter(f.col(\"old_credible_set_size\").isNull() & (f.col(\"new_credible_set_size\")>1)).count()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "1+1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "otgenetics-Z1loiStc-py3.8", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16 (default, Dec 7 2022, 01:39:17) \n[Clang 14.0.0 (clang-1400.0.29.202)]" }, - "nbformat": 4, - "nbformat_minor": 2 + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "5a448d06c31dd563cc2d2f896cd972f1626bb3e0fbcfc3d2f2ab4cc41131eab9" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/notebooks/susie_inf_benchmark.ipynb b/notebooks/susie_inf_benchmark.ipynb index 21de23b47..d9039d0d1 100644 --- a/notebooks/susie_inf_benchmark.ipynb +++ b/notebooks/susie_inf_benchmark.ipynb @@ -28,8 +28,9 @@ "source": [ "import numpy as np\n", "import pandas as pd\n", - "from gentropy.method.susie_inf import SUSIE_inf\n", - "from scipy.stats import pearsonr\n" + "from scipy.stats import pearsonr\n", + "\n", + "from gentropy.method.susie_inf import SUSIE_inf\n" ] }, { @@ -83,10 +84,10 @@ } ], "source": [ - "plt.scatter(x=first_lbf_susie_r, y=first_lbf_susie_inf)\n", - "plt.xlabel(\"susie_r\")\n", - "plt.ylabel(\"susie_inf\")\n", - "plt.show()\n", + "# plt.scatter(x=first_lbf_susie_r, y=first_lbf_susie_inf)\n", + "# plt.xlabel(\"susie_r\")\n", + "# plt.ylabel(\"susie_inf\")\n", + "# plt.show()\n", "\n", "corr = pearsonr(first_lbf_susie_r, first_lbf_susie_inf)\n", "print(\"Pearson's correlation coefficient: \" + str(corr[0])) # noqa: T201" @@ -116,10 +117,10 @@ } ], "source": [ - "plt.scatter(x=second_lbf_susie_r, y=second_lbf_susie_inf)\n", - "plt.xlabel(\"susie_r\")\n", - "plt.ylabel(\"susie_inf\")\n", - "plt.show()\n", + "# plt.scatter(x=second_lbf_susie_r, y=second_lbf_susie_inf)\n", + "# plt.xlabel(\"susie_r\")\n", + "# plt.ylabel(\"susie_inf\")\n", + "# plt.show()\n", "\n", "corr = pearsonr(second_lbf_susie_r, second_lbf_susie_inf)\n", "print(\"Pearson's correlation coefficient: \" + str(corr[0])) # noqa: T201" @@ -142,7 +143,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.10.14" }, "orig_nbformat": 4 }, diff --git a/notebooks/ukb_ppp_benchmark.ipynb b/notebooks/ukb_ppp_benchmark.ipynb index 4a3e200db..3e2f4a23e 100644 --- a/notebooks/ukb_ppp_benchmark.ipynb +++ b/notebooks/ukb_ppp_benchmark.ipynb @@ -37,6 +37,7 @@ "source": [ "# import matplotlib.pyplot as plt\n", "import pyspark.sql.functions as f\n", + "\n", "from gentropy.common.session import Session\n", "from gentropy.common.spark_helpers import order_array_of_structs_by_field\n", "from gentropy.dataset.ld_index import LDIndex\n", @@ -322,24 +323,24 @@ "source": [ "panda_df = df.select(\"locusSize\", \"locusLength\").toPandas()\n", "\n", - "plt.figure(figsize=(12, 6))\n", + "# plt.figure(figsize=(12, 6))\n", "\n", - "# Histogram for locusLength\n", - "plt.subplot(1, 2, 1) # 1 row, 2 columns, 1st subplot\n", - "plt.hist(panda_df[\"locusLength\"], bins=30, alpha=0.7)\n", - "plt.xlabel(\"Locus Length\")\n", - "plt.ylabel(\"Frequency\")\n", - "plt.title(\"Histogram of Locus Length\")\n", + "# # Histogram for locusLength\n", + "# plt.subplot(1, 2, 1) # 1 row, 2 columns, 1st subplot\n", + "# plt.hist(panda_df[\"locusLength\"], bins=30, alpha=0.7)\n", + "# plt.xlabel(\"Locus Length\")\n", + "# plt.ylabel(\"Frequency\")\n", + "# plt.title(\"Histogram of Locus Length\")\n", "\n", - "# Histogram for locusSize\n", - "plt.subplot(1, 2, 2) # 1 row, 2 columns, 2nd subplot\n", - "plt.hist(panda_df[\"locusSize\"], bins=30, alpha=0.7)\n", - "plt.xlabel(\"Locus Size\")\n", - "plt.ylabel(\"Frequency\")\n", - "plt.title(\"Histogram of Locus Size\")\n", + "# # Histogram for locusSize\n", + "# plt.subplot(1, 2, 2) # 1 row, 2 columns, 2nd subplot\n", + "# plt.hist(panda_df[\"locusSize\"], bins=30, alpha=0.7)\n", + "# plt.xlabel(\"Locus Size\")\n", + "# plt.ylabel(\"Frequency\")\n", + "# plt.title(\"Histogram of Locus Size\")\n", "\n", - "plt.tight_layout()\n", - "plt.show()" + "# plt.tight_layout()\n", + "# plt.show()" ] }, { @@ -359,13 +360,13 @@ } ], "source": [ - "plt.figure(figsize=(10, 6))\n", - "plt.scatter(panda_df[\"locusSize\"], panda_df[\"locusLength\"], alpha=0.5)\n", - "plt.title(\"Scatter Plot of Locus Size vs Locus Length\")\n", - "plt.xlabel(\"Locus Size\")\n", - "plt.ylabel(\"Locus Length\")\n", - "plt.grid(True)\n", - "plt.show()" + "# plt.figure(figsize=(10, 6))\n", + "# plt.scatter(panda_df[\"locusSize\"], panda_df[\"locusLength\"], alpha=0.5)\n", + "# plt.title(\"Scatter Plot of Locus Size vs Locus Length\")\n", + "# plt.xlabel(\"Locus Size\")\n", + "# plt.ylabel(\"Locus Length\")\n", + "# plt.grid(True)\n", + "# plt.show()" ] }, { @@ -480,9 +481,7 @@ ], "source": [ "nan = susie_fm.df.filter(f.isnan(\"credibleSetlog10BF\"))\n", - "null = susie_fm.df.filter(f.isnull(\"credibleSetlog10BF\"))\n", - "print(\"Number of credible sets with 'not a number' as the logBF: \", nan.count())\n", - "print(\"Number of credible sets with 'null' as the logBF: \", null.count())" + "null = susie_fm.df.filter(f.isnull(\"credibleSetlog10BF\"))" ] }, { @@ -662,39 +661,39 @@ "source": [ "\n", "pdf = susie_results.select(\"purityMinR2\", \"purityMeanR2\", \"topPP\", \"credSetSize\").toPandas()\n", - "plt.figure(figsize=(12, 12))\n", + "# plt.figure(figsize=(12, 12))\n", "\n", "# Histogram for purityMinR2\n", - "plt.subplot(2, 2, 1)\n", - "plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of purityMinR2\")\n", - "plt.xlabel(\"purityMinR2\")\n", - "plt.ylabel(\"Frequency\")\n", + "# plt.subplot(2, 2, 1)\n", + "# plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of purityMinR2\")\n", + "# plt.xlabel(\"purityMinR2\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for purityMeanR2\n", - "plt.subplot(2, 2, 2)\n", - "plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of purityMeanR2\")\n", - "plt.xlabel(\"purityMeanR2\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for purityMeanR2\n", + "# plt.subplot(2, 2, 2)\n", + "# plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of purityMeanR2\")\n", + "# plt.xlabel(\"purityMeanR2\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for topPP\n", - "plt.subplot(2, 2, 3)\n", - "plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of topPP\")\n", - "plt.xlabel(\"topPP\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for topPP\n", + "# plt.subplot(2, 2, 3)\n", + "# plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of topPP\")\n", + "# plt.xlabel(\"topPP\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for credSetSize\n", - "plt.subplot(2, 2, 4)\n", - "plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of credSetSize\")\n", - "plt.xlabel(\"credSetSize\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for credSetSize\n", + "# plt.subplot(2, 2, 4)\n", + "# plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of credSetSize\")\n", + "# plt.xlabel(\"credSetSize\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Adjust layout to prevent overlap\n", - "plt.tight_layout()\n", - "plt.show()" + "# # Adjust layout to prevent overlap\n", + "# plt.tight_layout()\n", + "# plt.show()" ] }, { @@ -753,9 +752,7 @@ } ], "source": [ - "first_credset = susie_results.filter(f.col(\"credibleSetIndex\") == 1)\n", - "print(\"Number of primary credible sets: \", first_credset.count())\n", - "print(\"Number of unique studyIds in primary credible sets: \", first_credset.select(\"studyId\").distinct().count())" + "first_credset = susie_results.filter(f.col(\"credibleSetIndex\") == 1)" ] }, { @@ -923,39 +920,39 @@ ], "source": [ "pdf = first_credset.select(\"purityMinR2\", \"purityMeanR2\", \"topPP\", \"credSetSize\").toPandas()\n", - "plt.figure(figsize=(12, 12))\n", + "# plt.figure(figsize=(12, 12))\n", "\n", - "# Histogram for purityMinR2\n", - "plt.subplot(2, 2, 1)\n", - "plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of purityMinR2\")\n", - "plt.xlabel(\"purityMinR2\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for purityMinR2\n", + "# plt.subplot(2, 2, 1)\n", + "# plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of purityMinR2\")\n", + "# plt.xlabel(\"purityMinR2\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for purityMeanR2\n", - "plt.subplot(2, 2, 2)\n", - "plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of purityMeanR2\")\n", - "plt.xlabel(\"purityMeanR2\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for purityMeanR2\n", + "# plt.subplot(2, 2, 2)\n", + "# plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of purityMeanR2\")\n", + "# plt.xlabel(\"purityMeanR2\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for topPP\n", - "plt.subplot(2, 2, 3)\n", - "plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of topPP\")\n", - "plt.xlabel(\"topPP\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for topPP\n", + "# plt.subplot(2, 2, 3)\n", + "# plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of topPP\")\n", + "# plt.xlabel(\"topPP\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for credSetSize\n", - "plt.subplot(2, 2, 4)\n", - "plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of credSetSize\")\n", - "plt.xlabel(\"credSetSize\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for credSetSize\n", + "# plt.subplot(2, 2, 4)\n", + "# plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of credSetSize\")\n", + "# plt.xlabel(\"credSetSize\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Adjust layout to prevent overlap\n", - "plt.tight_layout()\n", - "plt.show()" + "# # Adjust layout to prevent overlap\n", + "# plt.tight_layout()\n", + "# plt.show()" ] }, { @@ -1008,13 +1005,7 @@ " )\n", " .withColumn(\"topPP\", f.col(\"locus\").getField(\"posteriorProbability\"))\n", " .filter(~f.isnan(\"topPP\"))\n", - ")\n", - "\n", - "print(\"Number of high quality credible sets: \", qc_credsets.count())\n", - "print(\n", - " \"Number of unique studyIds in high quality credible sets: \",\n", - " qc_credsets.select(\"studyId\").distinct().count(),\n", - ")" + ")\n" ] }, { @@ -1155,39 +1146,39 @@ ], "source": [ "pdf = qc_credsets.select(\"purityMinR2\", \"purityMeanR2\", \"topPP\", \"credSetSize\").toPandas()\n", - "plt.figure(figsize=(12, 12))\n", + "# plt.figure(figsize=(12, 12))\n", "\n", - "# Histogram for purityMinR2\n", - "plt.subplot(2, 2, 1)\n", - "plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of purityMinR2\")\n", - "plt.xlabel(\"purityMinR2\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for purityMinR2\n", + "# plt.subplot(2, 2, 1)\n", + "# plt.hist(pdf[\"purityMinR2\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of purityMinR2\")\n", + "# plt.xlabel(\"purityMinR2\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for purityMeanR2\n", - "plt.subplot(2, 2, 2)\n", - "plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of purityMeanR2\")\n", - "plt.xlabel(\"purityMeanR2\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for purityMeanR2\n", + "# plt.subplot(2, 2, 2)\n", + "# plt.hist(pdf[\"purityMeanR2\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of purityMeanR2\")\n", + "# plt.xlabel(\"purityMeanR2\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for topPP\n", - "plt.subplot(2, 2, 3)\n", - "plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of topPP\")\n", - "plt.xlabel(\"topPP\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for topPP\n", + "# plt.subplot(2, 2, 3)\n", + "# plt.hist(pdf[\"topPP\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of topPP\")\n", + "# plt.xlabel(\"topPP\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Histogram for credSetSize\n", - "plt.subplot(2, 2, 4)\n", - "plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", - "plt.title(\"Histogram of credSetSize\")\n", - "plt.xlabel(\"credSetSize\")\n", - "plt.ylabel(\"Frequency\")\n", + "# # Histogram for credSetSize\n", + "# plt.subplot(2, 2, 4)\n", + "# plt.hist(pdf[\"credSetSize\"], bins=30, alpha=0.7)\n", + "# plt.title(\"Histogram of credSetSize\")\n", + "# plt.xlabel(\"credSetSize\")\n", + "# plt.ylabel(\"Frequency\")\n", "\n", - "# Adjust layout to prevent overlap\n", - "plt.tight_layout()\n", - "plt.show()" + "# # Adjust layout to prevent overlap\n", + "# plt.tight_layout()\n", + "# plt.show()" ] }, { @@ -1397,24 +1388,24 @@ "source": [ "panda_df = df.select(\"locusSize\", \"locusLength\").toPandas()\n", "\n", - "plt.figure(figsize=(12, 6))\n", + "# plt.figure(figsize=(12, 6))\n", "\n", - "# Histogram for locusLength\n", - "plt.subplot(1, 2, 1) # 1 row, 2 columns, 1st subplot\n", - "plt.hist(panda_df[\"locusLength\"], bins=30, alpha=0.7)\n", - "plt.xlabel(\"Locus Length\")\n", - "plt.ylabel(\"Frequency\")\n", - "plt.title(\"Histogram of Locus Length\")\n", + "# # Histogram for locusLength\n", + "# plt.subplot(1, 2, 1) # 1 row, 2 columns, 1st subplot\n", + "# plt.hist(panda_df[\"locusLength\"], bins=30, alpha=0.7)\n", + "# plt.xlabel(\"Locus Length\")\n", + "# plt.ylabel(\"Frequency\")\n", + "# plt.title(\"Histogram of Locus Length\")\n", "\n", - "# Histogram for locusSize\n", - "plt.subplot(1, 2, 2) # 1 row, 2 columns, 2nd subplot\n", - "plt.hist(panda_df[\"locusSize\"], bins=30, alpha=0.7)\n", - "plt.xlabel(\"Locus Size\")\n", - "plt.ylabel(\"Frequency\")\n", - "plt.title(\"Histogram of Locus Size\")\n", + "# # Histogram for locusSize\n", + "# plt.subplot(1, 2, 2) # 1 row, 2 columns, 2nd subplot\n", + "# plt.hist(panda_df[\"locusSize\"], bins=30, alpha=0.7)\n", + "# plt.xlabel(\"Locus Size\")\n", + "# plt.ylabel(\"Frequency\")\n", + "# plt.title(\"Histogram of Locus Size\")\n", "\n", - "plt.tight_layout()\n", - "plt.show()" + "# plt.tight_layout()\n", + "# plt.show()" ] }, { @@ -1434,13 +1425,13 @@ } ], "source": [ - "plt.figure(figsize=(10, 6))\n", - "plt.scatter(panda_df[\"locusSize\"], panda_df[\"locusLength\"], alpha=0.5)\n", - "plt.title(\"Scatter Plot of Locus Size vs Locus Length\")\n", - "plt.xlabel(\"Locus Size\")\n", - "plt.ylabel(\"Locus Length\")\n", - "plt.grid(True)\n", - "plt.show()" + "# plt.figure(figsize=(10, 6))\n", + "# plt.scatter(panda_df[\"locusSize\"], panda_df[\"locusLength\"], alpha=0.5)\n", + "# plt.title(\"Scatter Plot of Locus Size vs Locus Length\")\n", + "# plt.xlabel(\"Locus Size\")\n", + "# plt.ylabel(\"Locus Length\")\n", + "# plt.grid(True)\n", + "# plt.show()" ] } ], diff --git a/poetry.lock b/poetry.lock index 64a192984..04804d728 100644 --- a/poetry.lock +++ b/poetry.lock @@ -7357,29 +7357,29 @@ pyasn1 = ">=0.1.3" [[package]] name = "ruff" -version = "0.5.1" +version = "0.6.1" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.5.1-py3-none-linux_armv6l.whl", hash = "sha256:6ecf968fcf94d942d42b700af18ede94b07521bd188aaf2cd7bc898dd8cb63b6"}, - {file = "ruff-0.5.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:204fb0a472f00f2e6280a7c8c7c066e11e20e23a37557d63045bf27a616ba61c"}, - {file = "ruff-0.5.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d235968460e8758d1e1297e1de59a38d94102f60cafb4d5382033c324404ee9d"}, - {file = "ruff-0.5.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38beace10b8d5f9b6bdc91619310af6d63dd2019f3fb2d17a2da26360d7962fa"}, - {file = "ruff-0.5.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e478d2f09cf06add143cf8c4540ef77b6599191e0c50ed976582f06e588c994"}, - {file = "ruff-0.5.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0368d765eec8247b8550251c49ebb20554cc4e812f383ff9f5bf0d5d94190b0"}, - {file = "ruff-0.5.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:3a9a9a1b582e37669b0138b7c1d9d60b9edac880b80eb2baba6d0e566bdeca4d"}, - {file = "ruff-0.5.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bdd9f723e16003623423affabcc0a807a66552ee6a29f90eddad87a40c750b78"}, - {file = "ruff-0.5.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:be9fd62c1e99539da05fcdc1e90d20f74aec1b7a1613463ed77870057cd6bd96"}, - {file = "ruff-0.5.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e216fc75a80ea1fbd96af94a6233d90190d5b65cc3d5dfacf2bd48c3e067d3e1"}, - {file = "ruff-0.5.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c4c2112e9883a40967827d5c24803525145e7dab315497fae149764979ac7929"}, - {file = "ruff-0.5.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dfaf11c8a116394da3b65cd4b36de30d8552fa45b8119b9ef5ca6638ab964fa3"}, - {file = "ruff-0.5.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:d7ceb9b2fe700ee09a0c6b192c5ef03c56eb82a0514218d8ff700f6ade004108"}, - {file = "ruff-0.5.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:bac6288e82f6296f82ed5285f597713acb2a6ae26618ffc6b429c597b392535c"}, - {file = "ruff-0.5.1-py3-none-win32.whl", hash = "sha256:5c441d9c24ec09e1cb190a04535c5379b36b73c4bc20aa180c54812c27d1cca4"}, - {file = "ruff-0.5.1-py3-none-win_amd64.whl", hash = "sha256:b1789bf2cd3d1b5a7d38397cac1398ddf3ad7f73f4de01b1e913e2abc7dfc51d"}, - {file = "ruff-0.5.1-py3-none-win_arm64.whl", hash = "sha256:2875b7596a740cbbd492f32d24be73e545a4ce0a3daf51e4f4e609962bfd3cd2"}, - {file = "ruff-0.5.1.tar.gz", hash = "sha256:3164488aebd89b1745b47fd00604fb4358d774465f20d1fcd907f9c0fc1b0655"}, + {file = "ruff-0.6.1-py3-none-linux_armv6l.whl", hash = "sha256:b4bb7de6a24169dc023f992718a9417380301b0c2da0fe85919f47264fb8add9"}, + {file = "ruff-0.6.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:45efaae53b360c81043e311cdec8a7696420b3d3e8935202c2846e7a97d4edae"}, + {file = "ruff-0.6.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:bc60c7d71b732c8fa73cf995efc0c836a2fd8b9810e115be8babb24ae87e0850"}, + {file = "ruff-0.6.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c7477c3b9da822e2db0b4e0b59e61b8a23e87886e727b327e7dcaf06213c5cf"}, + {file = "ruff-0.6.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a0af7ab3f86e3dc9f157a928e08e26c4b40707d0612b01cd577cc84b8905cc9"}, + {file = "ruff-0.6.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:392688dbb50fecf1bf7126731c90c11a9df1c3a4cdc3f481b53e851da5634fa5"}, + {file = "ruff-0.6.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5278d3e095ccc8c30430bcc9bc550f778790acc211865520f3041910a28d0024"}, + {file = "ruff-0.6.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fe6d5f65d6f276ee7a0fc50a0cecaccb362d30ef98a110f99cac1c7872df2f18"}, + {file = "ruff-0.6.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2e0dd11e2ae553ee5c92a81731d88a9883af8db7408db47fc81887c1f8b672e"}, + {file = "ruff-0.6.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d812615525a34ecfc07fd93f906ef5b93656be01dfae9a819e31caa6cfe758a1"}, + {file = "ruff-0.6.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:faaa4060f4064c3b7aaaa27328080c932fa142786f8142aff095b42b6a2eb631"}, + {file = "ruff-0.6.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:99d7ae0df47c62729d58765c593ea54c2546d5de213f2af2a19442d50a10cec9"}, + {file = "ruff-0.6.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9eb18dfd7b613eec000e3738b3f0e4398bf0153cb80bfa3e351b3c1c2f6d7b15"}, + {file = "ruff-0.6.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:c62bc04c6723a81e25e71715aa59489f15034d69bf641df88cb38bdc32fd1dbb"}, + {file = "ruff-0.6.1-py3-none-win32.whl", hash = "sha256:9fb4c4e8b83f19c9477a8745e56d2eeef07a7ff50b68a6998f7d9e2e3887bdc4"}, + {file = "ruff-0.6.1-py3-none-win_amd64.whl", hash = "sha256:c2ebfc8f51ef4aca05dad4552bbcf6fe8d1f75b2f6af546cc47cc1c1ca916b5b"}, + {file = "ruff-0.6.1-py3-none-win_arm64.whl", hash = "sha256:3bc81074971b0ffad1bd0c52284b22411f02a11a012082a76ac6da153536e014"}, + {file = "ruff-0.6.1.tar.gz", hash = "sha256:af3ffd8c6563acb8848d33cd19a69b9bfe943667f0419ca083f8ebe4224a3436"}, ] [[package]] @@ -8691,4 +8691,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "5ebbf7ab3f609d1206d56ff465af3a648b945cb79e8415be61798530943a5911" +content-hash = "622f47ea07bd1c332dcde8368143d533a27eefdd3d8096e8b648432e0dcd0dfb" diff --git a/pyproject.toml b/pyproject.toml index 3a206f4ef..1539cf7ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ pep8-naming = "^0.14.1" interrogate = "^1.7.0" isort = "^5.13.2" darglint = "^1.8.1" -ruff = "^0.5.1" +ruff = "^0.6.1" [tool.poetry.group.docs.dependencies] mkdocs = "^1.5.3" From 4169b813bcff4eae01af5c978269b52ae1cc629b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 22 Aug 2024 09:56:27 +0100 Subject: [PATCH 010/188] chore: pre-commit autoupdate (#724) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: pre-commit autoupdate updates: - [github.com/astral-sh/ruff-pre-commit: v0.5.5 → v0.6.1](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.5...v0.6.1) - [github.com/pre-commit/mirrors-mypy: v1.11.0 → v1.11.1](https://github.com/pre-commit/mirrors-mypy/compare/v1.11.0...v1.11.1) * chore: pre-commit auto fixes [...] --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: David Ochoa --- .pre-commit-config.yaml | 4 ++-- .../howto/python_api/b_create_dataset.py | 1 + src/airflow/dags/common_airflow.py | 3 ++- src/airflow/dags/eqtl_preprocess.py | 1 + src/airflow/dags/finngen_harmonisation.py | 1 + src/airflow/dags/finngen_preprocess.py | 1 + src/airflow/dags/genetics_etl.py | 1 + src/airflow/dags/gnomad_preprocess.py | 1 + .../dags/gwas_catalog_harmonisation.py | 1 + src/airflow/dags/gwas_catalog_preprocess.py | 1 + src/airflow/dags/gwas_curation_update.py | 1 + src/airflow/dags/ukb_ppp_eur.py | 1 + src/airflow/dags/variant_index.py | 15 ++++++++------- tests/airflow/test_dag.py | 1 + tests/gentropy/common/test_spark_helpers.py | 7 ++++--- tests/gentropy/common/test_version_engine.py | 1 + tests/gentropy/conftest.py | 4 ++-- tests/gentropy/dataset/test_dataset.py | 5 +++-- tests/gentropy/dataset/test_gene_index.py | 3 ++- tests/gentropy/dataset/test_l2g.py | 1 + tests/gentropy/dataset/test_pairwise_ld.py | 3 ++- tests/gentropy/dataset/test_study_index.py | 5 +++-- tests/gentropy/dataset/test_study_locus.py | 19 ++++++++++--------- .../dataset/test_study_locus_overlap.py | 3 ++- .../dataset/test_study_locus_overlaps.py | 4 +++- .../dataset/test_summary_statistics.py | 3 ++- tests/gentropy/dataset/test_variant_index.py | 5 +++-- .../datasource/ensembl/test_vep_variants.py | 5 +++-- .../eqtl_catalogue/test_eqtl_catalogue.py | 3 ++- .../finngen/test_finngen_finemapping.py | 3 ++- .../finngen/test_finngen_study_index.py | 3 ++- .../finngen/test_finngen_summary_stats.py | 3 ++- .../datasource/gnomad/test_gnomad_ld.py | 3 ++- .../test_gwas_catalog_associations.py | 7 ++++--- .../test_gwas_catalog_curation.py | 3 ++- .../test_gwas_catalog_study_index.py | 3 ++- .../test_gwas_catalog_study_splitter.py | 1 + .../test_gwas_catalog_summary_statistics.py | 1 + .../datasource/intervals/test_andersson.py | 3 ++- .../datasource/intervals/test_javierre.py | 3 ++- .../datasource/intervals/test_jung.py | 3 ++- .../datasource/intervals/test_thurman.py | 3 ++- .../open_targets/test_l2g_gold_standard.py | 3 ++- .../datasource/open_targets/test_target.py | 3 ++- .../datasource/open_targets/test_variants.py | 3 ++- .../ukbiobank/test_ukbiobank_study_index.py | 3 ++- tests/gentropy/docs/test_applying_methods.py | 4 ++-- tests/gentropy/docs/test_create_dataset.py | 4 ++-- .../docs/test_creating_spark_session.py | 3 +-- tests/gentropy/docs/test_inspect_dataset.py | 2 +- tests/gentropy/method/test_carma.py | 1 + tests/gentropy/method/test_clump.py | 1 + .../method/test_colocalisation_method.py | 7 ++++--- tests/gentropy/method/test_ld.py | 6 ++++-- .../method/test_locus_breaker_clumping.py | 5 +++-- tests/gentropy/method/test_locus_to_gene.py | 6 ++++-- tests/gentropy/method/test_pics.py | 3 ++- tests/gentropy/method/test_qc_of_sumstats.py | 3 ++- .../method/test_sumstat_imputation.py | 1 + tests/gentropy/method/test_susie_inf.py | 1 + .../method/test_window_based_clumping.py | 5 +++-- tests/gentropy/test_cli.py | 3 ++- tests/gentropy/test_schemas.py | 1 + tests/gentropy/test_spark_helpers.py | 3 ++- 64 files changed, 137 insertions(+), 77 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a68850464..17b7a4f6e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ ci: skip: [poetry-lock] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.5 + rev: v0.6.1 hooks: - id: ruff args: @@ -65,7 +65,7 @@ repos: stages: [commit-msg] - repo: https://github.com/pre-commit/mirrors-mypy - rev: "v1.11.0" + rev: "v1.11.1" hooks: - id: mypy args: diff --git a/docs/src_snippets/howto/python_api/b_create_dataset.py b/docs/src_snippets/howto/python_api/b_create_dataset.py index 1a006c97a..61945862e 100644 --- a/docs/src_snippets/howto/python_api/b_create_dataset.py +++ b/docs/src_snippets/howto/python_api/b_create_dataset.py @@ -42,6 +42,7 @@ def create_from_pandas() -> SummaryStatistics: """Create a dataset from a path with Pandas files.""" # --8<-- [start:create_from_pandas_import] import pyspark.pandas as ps + from gentropy.dataset.summary_statistics import SummaryStatistics # --8<-- [end:create_from_pandas_import] diff --git a/src/airflow/dags/common_airflow.py b/src/airflow/dags/common_airflow.py index d3bdc7e29..9c3c2f91c 100644 --- a/src/airflow/dags/common_airflow.py +++ b/src/airflow/dags/common_airflow.py @@ -6,6 +6,8 @@ import pendulum import yaml +from google.cloud import batch_v1, dataproc_v1, storage + from airflow.providers.google.cloud.operators.dataproc import ( ClusterGenerator, DataprocCreateClusterOperator, @@ -13,7 +15,6 @@ DataprocSubmitJobOperator, ) from airflow.utils.trigger_rule import TriggerRule -from google.cloud import batch_v1, dataproc_v1, storage if TYPE_CHECKING: from pathlib import Path diff --git a/src/airflow/dags/eqtl_preprocess.py b/src/airflow/dags/eqtl_preprocess.py index aef70085d..309604e09 100644 --- a/src/airflow/dags/eqtl_preprocess.py +++ b/src/airflow/dags/eqtl_preprocess.py @@ -5,6 +5,7 @@ from pathlib import Path import common_airflow as common + from airflow.models.dag import DAG from airflow.providers.google.cloud.operators.dataflow import ( DataflowTemplatedJobStartOperator, diff --git a/src/airflow/dags/finngen_harmonisation.py b/src/airflow/dags/finngen_harmonisation.py index b40561fd9..18f81a376 100644 --- a/src/airflow/dags/finngen_harmonisation.py +++ b/src/airflow/dags/finngen_harmonisation.py @@ -8,6 +8,7 @@ from typing import Any import common_airflow as common + from airflow.decorators import task from airflow.models.dag import DAG from airflow.providers.google.cloud.operators.gcs import GCSListObjectsOperator diff --git a/src/airflow/dags/finngen_preprocess.py b/src/airflow/dags/finngen_preprocess.py index 3a60a0cfd..fbfab91e5 100644 --- a/src/airflow/dags/finngen_preprocess.py +++ b/src/airflow/dags/finngen_preprocess.py @@ -5,6 +5,7 @@ from pathlib import Path import common_airflow as common + from airflow.models.dag import DAG from airflow.utils.task_group import TaskGroup from airflow.utils.trigger_rule import TriggerRule diff --git a/src/airflow/dags/genetics_etl.py b/src/airflow/dags/genetics_etl.py index 025e4fa0a..aeb87398c 100644 --- a/src/airflow/dags/genetics_etl.py +++ b/src/airflow/dags/genetics_etl.py @@ -5,6 +5,7 @@ from pathlib import Path import common_airflow as common + from airflow.models.dag import DAG from airflow.operators.python import ShortCircuitOperator from airflow.providers.google.cloud.transfers.gcs_to_gcs import GCSToGCSOperator diff --git a/src/airflow/dags/gnomad_preprocess.py b/src/airflow/dags/gnomad_preprocess.py index 03962bec5..54e6b6bf4 100644 --- a/src/airflow/dags/gnomad_preprocess.py +++ b/src/airflow/dags/gnomad_preprocess.py @@ -5,6 +5,7 @@ from pathlib import Path import common_airflow as common + from airflow.models.dag import DAG CLUSTER_NAME = "gnomad-preprocess" diff --git a/src/airflow/dags/gwas_catalog_harmonisation.py b/src/airflow/dags/gwas_catalog_harmonisation.py index 25970fa8a..e6399e957 100644 --- a/src/airflow/dags/gwas_catalog_harmonisation.py +++ b/src/airflow/dags/gwas_catalog_harmonisation.py @@ -8,6 +8,7 @@ from typing import Any import common_airflow as common + from airflow.decorators import task from airflow.models.dag import DAG from airflow.providers.google.cloud.operators.gcs import GCSListObjectsOperator diff --git a/src/airflow/dags/gwas_catalog_preprocess.py b/src/airflow/dags/gwas_catalog_preprocess.py index c4722b83b..7f6280242 100644 --- a/src/airflow/dags/gwas_catalog_preprocess.py +++ b/src/airflow/dags/gwas_catalog_preprocess.py @@ -5,6 +5,7 @@ from pathlib import Path import common_airflow as common + from airflow.models.dag import DAG from airflow.operators.python import PythonOperator from airflow.providers.google.cloud.hooks.gcs import GCSHook diff --git a/src/airflow/dags/gwas_curation_update.py b/src/airflow/dags/gwas_curation_update.py index 830007e6d..d5fd38e35 100644 --- a/src/airflow/dags/gwas_curation_update.py +++ b/src/airflow/dags/gwas_curation_update.py @@ -5,6 +5,7 @@ from pathlib import Path import common_airflow as common + from airflow.models.dag import DAG CLUSTER_NAME = "otg-gwascatalog-curation" diff --git a/src/airflow/dags/ukb_ppp_eur.py b/src/airflow/dags/ukb_ppp_eur.py index f8a7c2342..f0d0e1fe8 100644 --- a/src/airflow/dags/ukb_ppp_eur.py +++ b/src/airflow/dags/ukb_ppp_eur.py @@ -5,6 +5,7 @@ from pathlib import Path import common_airflow as common + from airflow.models.dag import DAG CLUSTER_NAME = "otg-ukb-ppp-eur" diff --git a/src/airflow/dags/variant_index.py b/src/airflow/dags/variant_index.py index f1e402a41..eb102f277 100644 --- a/src/airflow/dags/variant_index.py +++ b/src/airflow/dags/variant_index.py @@ -8,13 +8,6 @@ from pathlib import Path from typing import Any -from airflow.decorators import task -from airflow.models.dag import DAG -from airflow.providers.google.cloud.operators.cloud_batch import ( - CloudBatchSubmitJobOperator, -) -from airflow.providers.google.cloud.operators.gcs import GCSListObjectsOperator -from airflow.utils.trigger_rule import TriggerRule from common_airflow import ( create_batch_job, create_cluster, @@ -28,6 +21,14 @@ ) from google.cloud import batch_v1 +from airflow.decorators import task +from airflow.models.dag import DAG +from airflow.providers.google.cloud.operators.cloud_batch import ( + CloudBatchSubmitJobOperator, +) +from airflow.providers.google.cloud.operators.gcs import GCSListObjectsOperator +from airflow.utils.trigger_rule import TriggerRule + PROJECT_ID = "open-targets-genetics-dev" REGION = "europe-west1" CONFIG_FILE_PATH = Path(__file__).parent / "configs" / "variant_sources.yaml" diff --git a/tests/airflow/test_dag.py b/tests/airflow/test_dag.py index 49edc76ee..e18f91fb1 100644 --- a/tests/airflow/test_dag.py +++ b/tests/airflow/test_dag.py @@ -3,6 +3,7 @@ from __future__ import annotations import pytest + from airflow.models import DagBag diff --git a/tests/gentropy/common/test_spark_helpers.py b/tests/gentropy/common/test_spark_helpers.py index 19ef6a436..bd872ae90 100644 --- a/tests/gentropy/common/test_spark_helpers.py +++ b/tests/gentropy/common/test_spark_helpers.py @@ -3,13 +3,14 @@ from __future__ import annotations import pytest +from pyspark.sql import Column, SparkSession +from pyspark.sql import functions as f +from pyspark.sql import types as t + from gentropy.common.spark_helpers import ( enforce_schema, order_array_of_structs_by_field, ) -from pyspark.sql import Column, SparkSession -from pyspark.sql import functions as f -from pyspark.sql import types as t def test_order_array_of_structs_by_field(spark: SparkSession) -> None: diff --git a/tests/gentropy/common/test_version_engine.py b/tests/gentropy/common/test_version_engine.py index 46a670165..2ee2e12ce 100644 --- a/tests/gentropy/common/test_version_engine.py +++ b/tests/gentropy/common/test_version_engine.py @@ -5,6 +5,7 @@ from pathlib import Path import pytest + from gentropy.common.version_engine import GnomADVersionSeeker, VersionEngine diff --git a/tests/gentropy/conftest.py b/tests/gentropy/conftest.py index c35188466..9ae7ace58 100644 --- a/tests/gentropy/conftest.py +++ b/tests/gentropy/conftest.py @@ -9,6 +9,8 @@ import numpy as np import pandas as pd import pytest +from pyspark.sql import DataFrame, SparkSession + from gentropy.common.Liftover import LiftOverSpark from gentropy.common.session import Session from gentropy.dataset.colocalisation import Colocalisation @@ -28,8 +30,6 @@ from gentropy.datasource.eqtl_catalogue.study_index import EqtlCatalogueStudyIndex from gentropy.datasource.gwas_catalog.associations import StudyLocusGWASCatalog from gentropy.datasource.gwas_catalog.study_index import StudyIndexGWASCatalog -from pyspark.sql import DataFrame, SparkSession - from utils.spark import get_spark_testing_conf diff --git a/tests/gentropy/dataset/test_dataset.py b/tests/gentropy/dataset/test_dataset.py index bddbb4f6a..a152b1ac8 100644 --- a/tests/gentropy/dataset/test_dataset.py +++ b/tests/gentropy/dataset/test_dataset.py @@ -5,8 +5,6 @@ import numpy as np import pyspark.sql.functions as f import pytest -from gentropy.dataset.dataset import Dataset -from gentropy.dataset.study_index import StudyIndex from pyspark.sql import SparkSession from pyspark.sql.types import ( DoubleType, @@ -15,6 +13,9 @@ StructType, ) +from gentropy.dataset.dataset import Dataset +from gentropy.dataset.study_index import StudyIndex + class MockDataset(Dataset): """Concrete subclass of Dataset for testing. Necessary because Dataset is abstract.""" diff --git a/tests/gentropy/dataset/test_gene_index.py b/tests/gentropy/dataset/test_gene_index.py index 999f089b3..e4ae8e581 100644 --- a/tests/gentropy/dataset/test_gene_index.py +++ b/tests/gentropy/dataset/test_gene_index.py @@ -2,9 +2,10 @@ from __future__ import annotations -from gentropy.dataset.gene_index import GeneIndex from pyspark.sql import DataFrame +from gentropy.dataset.gene_index import GeneIndex + def test_gene_index_creation(mock_gene_index: GeneIndex) -> None: """Test gene index creation with mock gene index.""" diff --git a/tests/gentropy/dataset/test_l2g.py b/tests/gentropy/dataset/test_l2g.py index 547c58e51..d0f1c3672 100644 --- a/tests/gentropy/dataset/test_l2g.py +++ b/tests/gentropy/dataset/test_l2g.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING import pytest + from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix from gentropy.dataset.l2g_gold_standard import L2GGoldStandard from gentropy.dataset.l2g_prediction import L2GPrediction diff --git a/tests/gentropy/dataset/test_pairwise_ld.py b/tests/gentropy/dataset/test_pairwise_ld.py index 11ebf75ca..9a57c129b 100644 --- a/tests/gentropy/dataset/test_pairwise_ld.py +++ b/tests/gentropy/dataset/test_pairwise_ld.py @@ -6,10 +6,11 @@ import numpy as np import pytest -from gentropy.dataset.pairwise_ld import PairwiseLD from pyspark.sql import functions as f from pyspark.sql.window import Window +from gentropy.dataset.pairwise_ld import PairwiseLD + if TYPE_CHECKING: from pyspark.sql import SparkSession diff --git a/tests/gentropy/dataset/test_study_index.py b/tests/gentropy/dataset/test_study_index.py index d693f147c..3bdd7a5cb 100644 --- a/tests/gentropy/dataset/test_study_index.py +++ b/tests/gentropy/dataset/test_study_index.py @@ -3,11 +3,12 @@ from __future__ import annotations import pytest -from gentropy.dataset.gene_index import GeneIndex -from gentropy.dataset.study_index import StudyIndex from pyspark.sql import DataFrame, SparkSession from pyspark.sql import functions as f +from gentropy.dataset.gene_index import GeneIndex +from gentropy.dataset.study_index import StudyIndex + def test_study_index_creation(mock_study_index: StudyIndex) -> None: """Test study index creation with mock data.""" diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index 9ac58149b..9b40796db 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -7,15 +7,6 @@ import pyspark.sql.functions as f import pyspark.sql.types as t import pytest -from gentropy.dataset.ld_index import LDIndex -from gentropy.dataset.study_index import StudyIndex -from gentropy.dataset.study_locus import ( - CredibleInterval, - StudyLocus, - StudyLocusQualityCheck, -) -from gentropy.dataset.study_locus_overlap import StudyLocusOverlap -from gentropy.dataset.summary_statistics import SummaryStatistics from pyspark.sql import Column, Row, SparkSession from pyspark.sql.types import ( ArrayType, @@ -27,6 +18,16 @@ StructType, ) +from gentropy.dataset.ld_index import LDIndex +from gentropy.dataset.study_index import StudyIndex +from gentropy.dataset.study_locus import ( + CredibleInterval, + StudyLocus, + StudyLocusQualityCheck, +) +from gentropy.dataset.study_locus_overlap import StudyLocusOverlap +from gentropy.dataset.summary_statistics import SummaryStatistics + @pytest.mark.parametrize( "has_overlap, expected", diff --git a/tests/gentropy/dataset/test_study_locus_overlap.py b/tests/gentropy/dataset/test_study_locus_overlap.py index 53d87226c..e26b59c30 100644 --- a/tests/gentropy/dataset/test_study_locus_overlap.py +++ b/tests/gentropy/dataset/test_study_locus_overlap.py @@ -2,9 +2,10 @@ from __future__ import annotations -from gentropy.dataset.study_locus_overlap import StudyLocusOverlap from pyspark.sql import SparkSession +from gentropy.dataset.study_locus_overlap import StudyLocusOverlap + def test_study_locus_overlap_creation( mock_study_locus_overlap: StudyLocusOverlap, diff --git a/tests/gentropy/dataset/test_study_locus_overlaps.py b/tests/gentropy/dataset/test_study_locus_overlaps.py index 8e732fc5c..bd3415959 100644 --- a/tests/gentropy/dataset/test_study_locus_overlaps.py +++ b/tests/gentropy/dataset/test_study_locus_overlaps.py @@ -6,13 +6,15 @@ import pyspark.sql.types as t import pytest + from gentropy.dataset.study_locus import StudyLocus from gentropy.dataset.study_locus_overlap import StudyLocusOverlap if TYPE_CHECKING: - from gentropy.dataset.study_index import StudyIndex from pyspark.sql import SparkSession + from gentropy.dataset.study_index import StudyIndex + def test_study_locus_overlap_creation( mock_study_locus_overlap: StudyLocusOverlap, diff --git a/tests/gentropy/dataset/test_summary_statistics.py b/tests/gentropy/dataset/test_summary_statistics.py index 9885181a8..cf3cfdae7 100644 --- a/tests/gentropy/dataset/test_summary_statistics.py +++ b/tests/gentropy/dataset/test_summary_statistics.py @@ -4,9 +4,10 @@ from typing import TYPE_CHECKING +from pyspark.sql import types as t + from gentropy.dataset.study_locus import StudyLocus from gentropy.dataset.summary_statistics import SummaryStatistics -from pyspark.sql import types as t if TYPE_CHECKING: from pyspark.sql import SparkSession diff --git a/tests/gentropy/dataset/test_variant_index.py b/tests/gentropy/dataset/test_variant_index.py index 9a03308a8..12afba89f 100644 --- a/tests/gentropy/dataset/test_variant_index.py +++ b/tests/gentropy/dataset/test_variant_index.py @@ -5,11 +5,12 @@ from typing import TYPE_CHECKING import pytest +from pyspark.sql import functions as f +from pyspark.sql import types as t + from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.v2g import V2G from gentropy.dataset.variant_index import VariantIndex -from pyspark.sql import functions as f -from pyspark.sql import types as t if TYPE_CHECKING: from pyspark.sql import SparkSession diff --git a/tests/gentropy/datasource/ensembl/test_vep_variants.py b/tests/gentropy/datasource/ensembl/test_vep_variants.py index 1401f1b6c..e3313a1e5 100644 --- a/tests/gentropy/datasource/ensembl/test_vep_variants.py +++ b/tests/gentropy/datasource/ensembl/test_vep_variants.py @@ -5,11 +5,12 @@ from typing import TYPE_CHECKING import pytest -from gentropy.dataset.variant_index import VariantIndex -from gentropy.datasource.ensembl.vep_parser import VariantEffectPredictorParser from pyspark.sql import DataFrame from pyspark.sql import functions as f +from gentropy.dataset.variant_index import VariantIndex +from gentropy.datasource.ensembl.vep_parser import VariantEffectPredictorParser + if TYPE_CHECKING: from pyspark.sql import SparkSession diff --git a/tests/gentropy/datasource/eqtl_catalogue/test_eqtl_catalogue.py b/tests/gentropy/datasource/eqtl_catalogue/test_eqtl_catalogue.py index a488ab78a..ce892128a 100644 --- a/tests/gentropy/datasource/eqtl_catalogue/test_eqtl_catalogue.py +++ b/tests/gentropy/datasource/eqtl_catalogue/test_eqtl_catalogue.py @@ -3,11 +3,12 @@ from __future__ import annotations import pytest +from pyspark.sql import DataFrame + from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus from gentropy.datasource.eqtl_catalogue.finemapping import EqtlCatalogueFinemapping from gentropy.datasource.eqtl_catalogue.study_index import EqtlCatalogueStudyIndex -from pyspark.sql import DataFrame @pytest.fixture diff --git a/tests/gentropy/datasource/finngen/test_finngen_finemapping.py b/tests/gentropy/datasource/finngen/test_finngen_finemapping.py index 089b10f74..56a03fcb7 100644 --- a/tests/gentropy/datasource/finngen/test_finngen_finemapping.py +++ b/tests/gentropy/datasource/finngen/test_finngen_finemapping.py @@ -2,9 +2,10 @@ from __future__ import annotations +from pyspark.sql import SparkSession + from gentropy.dataset.study_locus import StudyLocus from gentropy.datasource.finngen.finemapping import FinnGenFinemapping -from pyspark.sql import SparkSession def test_finngen_finemapping_from_finngen_susie_finemapping( diff --git a/tests/gentropy/datasource/finngen/test_finngen_study_index.py b/tests/gentropy/datasource/finngen/test_finngen_study_index.py index 96a24db94..6fc4665dc 100644 --- a/tests/gentropy/datasource/finngen/test_finngen_study_index.py +++ b/tests/gentropy/datasource/finngen/test_finngen_study_index.py @@ -2,9 +2,10 @@ from __future__ import annotations +from pyspark.sql import SparkSession + from gentropy.dataset.study_index import StudyIndex from gentropy.datasource.finngen.study_index import FinnGenStudyIndex -from pyspark.sql import SparkSession def test_finngen_study_index_from_source(spark: SparkSession) -> None: diff --git a/tests/gentropy/datasource/finngen/test_finngen_summary_stats.py b/tests/gentropy/datasource/finngen/test_finngen_summary_stats.py index 624f66d66..73cffdc9f 100644 --- a/tests/gentropy/datasource/finngen/test_finngen_summary_stats.py +++ b/tests/gentropy/datasource/finngen/test_finngen_summary_stats.py @@ -2,9 +2,10 @@ from __future__ import annotations +from pyspark.sql import SparkSession + from gentropy.dataset.summary_statistics import SummaryStatistics from gentropy.datasource.finngen.summary_stats import FinnGenSummaryStats -from pyspark.sql import SparkSession def test_finngen_summary_stats_from_source(spark: SparkSession) -> None: diff --git a/tests/gentropy/datasource/gnomad/test_gnomad_ld.py b/tests/gentropy/datasource/gnomad/test_gnomad_ld.py index 5fa3b00a4..78b96ad84 100644 --- a/tests/gentropy/datasource/gnomad/test_gnomad_ld.py +++ b/tests/gentropy/datasource/gnomad/test_gnomad_ld.py @@ -7,10 +7,11 @@ import hail as hl import pytest -from gentropy.datasource.gnomad.ld import GnomADLDMatrix from pyspark.sql import DataFrame, SparkSession from pyspark.sql import functions as f +from gentropy.datasource.gnomad.ld import GnomADLDMatrix + @pytest.mark.parametrize( ("observed", "expected"), diff --git a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py index e7067e3d9..2179150cd 100644 --- a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py +++ b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py @@ -2,14 +2,15 @@ from __future__ import annotations +from pyspark.sql import DataFrame +from pyspark.sql import functions as f +from pyspark.sql.types import LongType + from gentropy.dataset.variant_index import VariantIndex from gentropy.datasource.gwas_catalog.associations import ( GWASCatalogCuratedAssociationsParser, StudyLocusGWASCatalog, ) -from pyspark.sql import DataFrame -from pyspark.sql import functions as f -from pyspark.sql.types import LongType def test_study_locus_gwas_catalog_creation( diff --git a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_curation.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_curation.py index 764d003e8..4163531cb 100644 --- a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_curation.py +++ b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_curation.py @@ -7,9 +7,10 @@ import pyspark.sql.functions as f import pyspark.sql.types as t import pytest -from gentropy.datasource.gwas_catalog.study_index import StudyIndexGWASCatalog from pyspark.sql import DataFrame +from gentropy.datasource.gwas_catalog.study_index import StudyIndexGWASCatalog + if TYPE_CHECKING: from pyspark.sql import SparkSession diff --git a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_index.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_index.py index 96dccdf9d..b91529b3d 100644 --- a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_index.py +++ b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_index.py @@ -2,11 +2,12 @@ from __future__ import annotations +from pyspark.sql import DataFrame + from gentropy.datasource.gwas_catalog.study_index import ( StudyIndexGWASCatalog, StudyIndexGWASCatalogParser, ) -from pyspark.sql import DataFrame def test_annotate_discovery_sample_sizes( diff --git a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_splitter.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_splitter.py index 58bcb53c8..c6bd03054 100644 --- a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_splitter.py +++ b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_splitter.py @@ -6,6 +6,7 @@ import pyspark.sql.functions as f import pytest + from gentropy.datasource.gwas_catalog.associations import StudyLocusGWASCatalog from gentropy.datasource.gwas_catalog.study_index import StudyIndexGWASCatalog from gentropy.datasource.gwas_catalog.study_splitter import GWASCatalogStudySplitter diff --git a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_summary_statistics.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_summary_statistics.py index 4ede62f36..867345455 100644 --- a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_summary_statistics.py +++ b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_summary_statistics.py @@ -6,6 +6,7 @@ import pyspark.sql.functions as f import pytest + from gentropy.dataset.summary_statistics import SummaryStatistics from gentropy.datasource.gwas_catalog.summary_statistics import ( GWASCatalogSummaryStatistics, diff --git a/tests/gentropy/datasource/intervals/test_andersson.py b/tests/gentropy/datasource/intervals/test_andersson.py index 09204967d..69575b7c3 100644 --- a/tests/gentropy/datasource/intervals/test_andersson.py +++ b/tests/gentropy/datasource/intervals/test_andersson.py @@ -3,11 +3,12 @@ from __future__ import annotations import pytest +from pyspark.sql import DataFrame, SparkSession + from gentropy.common.Liftover import LiftOverSpark from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.intervals import Intervals from gentropy.datasource.intervals.andersson import IntervalsAndersson -from pyspark.sql import DataFrame, SparkSession @pytest.fixture(scope="module") diff --git a/tests/gentropy/datasource/intervals/test_javierre.py b/tests/gentropy/datasource/intervals/test_javierre.py index a6a74e94e..886a28c52 100644 --- a/tests/gentropy/datasource/intervals/test_javierre.py +++ b/tests/gentropy/datasource/intervals/test_javierre.py @@ -3,11 +3,12 @@ from __future__ import annotations import pytest +from pyspark.sql import DataFrame, SparkSession + from gentropy.common.Liftover import LiftOverSpark from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.intervals import Intervals from gentropy.datasource.intervals.javierre import IntervalsJavierre -from pyspark.sql import DataFrame, SparkSession @pytest.fixture(scope="module") diff --git a/tests/gentropy/datasource/intervals/test_jung.py b/tests/gentropy/datasource/intervals/test_jung.py index 44062f680..e391b8f96 100644 --- a/tests/gentropy/datasource/intervals/test_jung.py +++ b/tests/gentropy/datasource/intervals/test_jung.py @@ -3,11 +3,12 @@ from __future__ import annotations import pytest +from pyspark.sql import DataFrame, SparkSession + from gentropy.common.Liftover import LiftOverSpark from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.intervals import Intervals from gentropy.datasource.intervals.jung import IntervalsJung -from pyspark.sql import DataFrame, SparkSession @pytest.fixture(scope="module") diff --git a/tests/gentropy/datasource/intervals/test_thurman.py b/tests/gentropy/datasource/intervals/test_thurman.py index 853adb380..616e1abec 100644 --- a/tests/gentropy/datasource/intervals/test_thurman.py +++ b/tests/gentropy/datasource/intervals/test_thurman.py @@ -3,11 +3,12 @@ from __future__ import annotations import pytest +from pyspark.sql import DataFrame, SparkSession + from gentropy.common.Liftover import LiftOverSpark from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.intervals import Intervals from gentropy.datasource.intervals.thurman import IntervalsThurman -from pyspark.sql import DataFrame, SparkSession @pytest.fixture(scope="module") diff --git a/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py index 0690781f5..6f91d32a9 100644 --- a/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py +++ b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py @@ -5,12 +5,13 @@ from typing import TYPE_CHECKING import pytest +from pyspark.sql import DataFrame + from gentropy.dataset.l2g_gold_standard import L2GGoldStandard from gentropy.dataset.v2g import V2G from gentropy.datasource.open_targets.l2g_gold_standard import ( OpenTargetsL2GGoldStandard, ) -from pyspark.sql import DataFrame if TYPE_CHECKING: from pyspark.sql.session import SparkSession diff --git a/tests/gentropy/datasource/open_targets/test_target.py b/tests/gentropy/datasource/open_targets/test_target.py index c0a92fcd7..091dcea53 100644 --- a/tests/gentropy/datasource/open_targets/test_target.py +++ b/tests/gentropy/datasource/open_targets/test_target.py @@ -2,9 +2,10 @@ from __future__ import annotations +from pyspark.sql import DataFrame + from gentropy.dataset.gene_index import GeneIndex from gentropy.datasource.open_targets.target import OpenTargetsTarget -from pyspark.sql import DataFrame def test_open_targets_as_gene_index(sample_target_index: DataFrame) -> None: diff --git a/tests/gentropy/datasource/open_targets/test_variants.py b/tests/gentropy/datasource/open_targets/test_variants.py index f75fd8860..9d2c56e73 100644 --- a/tests/gentropy/datasource/open_targets/test_variants.py +++ b/tests/gentropy/datasource/open_targets/test_variants.py @@ -8,9 +8,10 @@ from gentropy.datasource.open_targets.variants import OpenTargetsVariant if TYPE_CHECKING: - from gentropy.common.session import Session from pyspark.sql import SparkSession + from gentropy.common.session import Session + class TestOpenTargetsVariant: """Test suite for the OpenTargetsVariant class.""" diff --git a/tests/gentropy/datasource/ukbiobank/test_ukbiobank_study_index.py b/tests/gentropy/datasource/ukbiobank/test_ukbiobank_study_index.py index ff07db5db..9f48ebef2 100644 --- a/tests/gentropy/datasource/ukbiobank/test_ukbiobank_study_index.py +++ b/tests/gentropy/datasource/ukbiobank/test_ukbiobank_study_index.py @@ -2,9 +2,10 @@ from __future__ import annotations +from pyspark.sql import DataFrame + from gentropy.dataset.study_index import StudyIndex from gentropy.datasource.ukbiobank.study_index import UKBiobankStudyIndex -from pyspark.sql import DataFrame def test_ukbiobank_study_index_from_source( diff --git a/tests/gentropy/docs/test_applying_methods.py b/tests/gentropy/docs/test_applying_methods.py index cbfdb9155..14bd70fd7 100644 --- a/tests/gentropy/docs/test_applying_methods.py +++ b/tests/gentropy/docs/test_applying_methods.py @@ -3,14 +3,14 @@ from typing import Any import pytest -from gentropy.dataset.study_locus import StudyLocus -from gentropy.dataset.summary_statistics import SummaryStatistics from docs.src_snippets.howto.python_api.c_applying_methods import ( apply_class_method_clumping, apply_class_method_pics, apply_instance_method, ) +from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.summary_statistics import SummaryStatistics @pytest.mark.parametrize( diff --git a/tests/gentropy/docs/test_create_dataset.py b/tests/gentropy/docs/test_create_dataset.py index 2daae0c6a..256f11251 100644 --- a/tests/gentropy/docs/test_create_dataset.py +++ b/tests/gentropy/docs/test_create_dataset.py @@ -3,14 +3,14 @@ from typing import Any import pytest -from gentropy.common.session import Session -from gentropy.dataset.summary_statistics import SummaryStatistics from docs.src_snippets.howto.python_api.b_create_dataset import ( create_from_pandas, create_from_parquet, create_from_source, ) +from gentropy.common.session import Session +from gentropy.dataset.summary_statistics import SummaryStatistics @pytest.mark.parametrize( diff --git a/tests/gentropy/docs/test_creating_spark_session.py b/tests/gentropy/docs/test_creating_spark_session.py index 197d22d56..088f23d38 100644 --- a/tests/gentropy/docs/test_creating_spark_session.py +++ b/tests/gentropy/docs/test_creating_spark_session.py @@ -1,11 +1,10 @@ """Testing creating spark session docs.""" -from gentropy.common.session import Session - from docs.src_snippets.howto.python_api.a_creating_spark_session import ( custom_session, default_session, ) +from gentropy.common.session import Session def test_default_session() -> None: diff --git a/tests/gentropy/docs/test_inspect_dataset.py b/tests/gentropy/docs/test_inspect_dataset.py index 38e258158..ecfb6d85b 100644 --- a/tests/gentropy/docs/test_inspect_dataset.py +++ b/tests/gentropy/docs/test_inspect_dataset.py @@ -1,6 +1,5 @@ """Testing inspecting dataset docs.""" -from gentropy.dataset.summary_statistics import SummaryStatistics from pyspark.sql.types import StructType from docs.src_snippets.howto.python_api.d_inspect_dataset import ( @@ -8,6 +7,7 @@ get_dataset_schema, interact_w_dataframe, ) +from gentropy.dataset.summary_statistics import SummaryStatistics def test_filter_dataset(mock_summary_statistics: SummaryStatistics) -> None: diff --git a/tests/gentropy/method/test_carma.py b/tests/gentropy/method/test_carma.py index b3aecf6c8..8db2abe47 100644 --- a/tests/gentropy/method/test_carma.py +++ b/tests/gentropy/method/test_carma.py @@ -3,6 +3,7 @@ from __future__ import annotations import numpy as np + from gentropy.method.carma import CARMA diff --git a/tests/gentropy/method/test_clump.py b/tests/gentropy/method/test_clump.py index af4e2d141..4616c5c6f 100644 --- a/tests/gentropy/method/test_clump.py +++ b/tests/gentropy/method/test_clump.py @@ -7,6 +7,7 @@ import pyspark.sql.functions as f import pyspark.sql.types as t import pytest + from gentropy.dataset.study_locus import StudyLocus from gentropy.method.clump import LDclumping diff --git a/tests/gentropy/method/test_colocalisation_method.py b/tests/gentropy/method/test_colocalisation_method.py index e58b0e562..d6798d831 100644 --- a/tests/gentropy/method/test_colocalisation_method.py +++ b/tests/gentropy/method/test_colocalisation_method.py @@ -5,13 +5,14 @@ from typing import Any import pytest -from gentropy.dataset.colocalisation import Colocalisation -from gentropy.dataset.study_locus_overlap import StudyLocusOverlap -from gentropy.method.colocalisation import Coloc, ECaviar from pandas.testing import assert_frame_equal from pyspark.sql import SparkSession from pyspark.sql.types import DoubleType, LongType, StringType, StructField, StructType +from gentropy.dataset.colocalisation import Colocalisation +from gentropy.dataset.study_locus_overlap import StudyLocusOverlap +from gentropy.method.colocalisation import Coloc, ECaviar + def test_coloc(mock_study_locus_overlap: StudyLocusOverlap) -> None: """Test coloc.""" diff --git a/tests/gentropy/method/test_ld.py b/tests/gentropy/method/test_ld.py index db0b5aba4..8fb86ff31 100644 --- a/tests/gentropy/method/test_ld.py +++ b/tests/gentropy/method/test_ld.py @@ -7,14 +7,16 @@ import pyspark.sql.functions as f import pyspark.sql.types as t import pytest +from pyspark.sql import Row + from gentropy.dataset.study_locus import StudyLocus from gentropy.method.ld import LDAnnotator -from pyspark.sql import Row if TYPE_CHECKING: + from pyspark.sql import SparkSession + from gentropy.dataset.ld_index import LDIndex from gentropy.dataset.study_index import StudyIndex - from pyspark.sql import SparkSession class TestLDAnnotator: diff --git a/tests/gentropy/method/test_locus_breaker_clumping.py b/tests/gentropy/method/test_locus_breaker_clumping.py index fd3476ded..c2c23eca5 100644 --- a/tests/gentropy/method/test_locus_breaker_clumping.py +++ b/tests/gentropy/method/test_locus_breaker_clumping.py @@ -5,11 +5,12 @@ from typing import TYPE_CHECKING import pytest -from gentropy.dataset.study_locus import StudyLocus -from gentropy.dataset.summary_statistics import SummaryStatistics from pyspark.sql import functions as f from pyspark.sql import types as t +from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.summary_statistics import SummaryStatistics + if TYPE_CHECKING: from pyspark.sql import SparkSession diff --git a/tests/gentropy/method/test_locus_to_gene.py b/tests/gentropy/method/test_locus_to_gene.py index 35f736d1f..460d65062 100644 --- a/tests/gentropy/method/test_locus_to_gene.py +++ b/tests/gentropy/method/test_locus_to_gene.py @@ -5,18 +5,20 @@ from typing import TYPE_CHECKING import pytest +from sklearn.ensemble import RandomForestClassifier + from gentropy.dataset.colocalisation import Colocalisation from gentropy.dataset.l2g_feature import L2GFeature from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus from gentropy.method.l2g.feature_factory import ColocalisationFactory, StudyLocusFactory from gentropy.method.l2g.model import LocusToGeneModel -from sklearn.ensemble import RandomForestClassifier if TYPE_CHECKING: - from gentropy.dataset.v2g import V2G from pyspark.sql import SparkSession + from gentropy.dataset.v2g import V2G + @pytest.fixture(scope="module") def model() -> LocusToGeneModel: diff --git a/tests/gentropy/method/test_pics.py b/tests/gentropy/method/test_pics.py index ee0425b61..ff6f115dc 100644 --- a/tests/gentropy/method/test_pics.py +++ b/tests/gentropy/method/test_pics.py @@ -3,9 +3,10 @@ from __future__ import annotations import pyspark.sql.functions as f +from pyspark.sql import Row + from gentropy.dataset.study_locus import StudyLocus from gentropy.method.pics import PICS -from pyspark.sql import Row class TestFinemap: diff --git a/tests/gentropy/method/test_qc_of_sumstats.py b/tests/gentropy/method/test_qc_of_sumstats.py index 6c2d23f65..d734fcaef 100644 --- a/tests/gentropy/method/test_qc_of_sumstats.py +++ b/tests/gentropy/method/test_qc_of_sumstats.py @@ -6,10 +6,11 @@ import pandas as pd import pyspark.sql.functions as f import pytest +from pyspark.sql.functions import rand, when + from gentropy.common.session import Session from gentropy.dataset.summary_statistics import SummaryStatistics from gentropy.method.sumstat_quality_controls import SummaryStatisticsQC -from pyspark.sql.functions import rand, when def test_qc_functions( diff --git a/tests/gentropy/method/test_sumstat_imputation.py b/tests/gentropy/method/test_sumstat_imputation.py index aea59f76b..93df23abc 100644 --- a/tests/gentropy/method/test_sumstat_imputation.py +++ b/tests/gentropy/method/test_sumstat_imputation.py @@ -3,6 +3,7 @@ from __future__ import annotations import numpy as np + from gentropy.method.sumstat_imputation import SummaryStatisticsImputation diff --git a/tests/gentropy/method/test_susie_inf.py b/tests/gentropy/method/test_susie_inf.py index 45d79bcae..91227af0f 100644 --- a/tests/gentropy/method/test_susie_inf.py +++ b/tests/gentropy/method/test_susie_inf.py @@ -4,6 +4,7 @@ import numpy as np import pyspark.sql.functions as f + from gentropy.common.session import Session from gentropy.dataset.study_locus import StudyLocus from gentropy.dataset.summary_statistics import SummaryStatistics diff --git a/tests/gentropy/method/test_window_based_clumping.py b/tests/gentropy/method/test_window_based_clumping.py index cd583bac2..382dce1e9 100644 --- a/tests/gentropy/method/test_window_based_clumping.py +++ b/tests/gentropy/method/test_window_based_clumping.py @@ -4,14 +4,15 @@ from typing import TYPE_CHECKING -from gentropy.dataset.study_locus import StudyLocus -from gentropy.method.window_based_clumping import WindowBasedClumping from pyspark.ml import functions as fml from pyspark.ml.linalg import VectorUDT from pyspark.sql import SparkSession from pyspark.sql import functions as f from pyspark.sql.window import Window +from gentropy.dataset.study_locus import StudyLocus +from gentropy.method.window_based_clumping import WindowBasedClumping + if TYPE_CHECKING: from gentropy.dataset.summary_statistics import SummaryStatistics diff --git a/tests/gentropy/test_cli.py b/tests/gentropy/test_cli.py index 5675713ce..dbb5a8cac 100644 --- a/tests/gentropy/test_cli.py +++ b/tests/gentropy/test_cli.py @@ -3,10 +3,11 @@ from unittest.mock import patch import pytest -from gentropy.cli import main from hydra.errors import ConfigCompositionException from omegaconf.errors import MissingMandatoryValue +from gentropy.cli import main + def test_main_no_step() -> None: """Test the main function of the CLI without a valid step.""" diff --git a/tests/gentropy/test_schemas.py b/tests/gentropy/test_schemas.py index 630abd0ab..1af72c149 100644 --- a/tests/gentropy/test_schemas.py +++ b/tests/gentropy/test_schemas.py @@ -14,6 +14,7 @@ if TYPE_CHECKING: from _pytest.fixtures import FixtureRequest + from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.v2g import V2G diff --git a/tests/gentropy/test_spark_helpers.py b/tests/gentropy/test_spark_helpers.py index a3790c62e..1adb686cd 100644 --- a/tests/gentropy/test_spark_helpers.py +++ b/tests/gentropy/test_spark_helpers.py @@ -6,12 +6,13 @@ import pyspark.sql.functions as f import pytest +from pyspark.sql.types import ArrayType, DoubleType, StructField, StructType + from gentropy.common.spark_helpers import ( get_record_with_maximum_value, get_record_with_minimum_value, order_array_of_structs_by_field, ) -from pyspark.sql.types import ArrayType, DoubleType, StructField, StructType if TYPE_CHECKING: from pyspark.sql import DataFrame, SparkSession From 02b006d523d932830565876296513ec6dfc0aefb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 22 Aug 2024 14:07:03 +0100 Subject: [PATCH 011/188] build(deps-dev): bump deptry from 0.18.0 to 0.19.1 (#728) Bumps [deptry](https://github.com/fpgmaas/deptry) from 0.18.0 to 0.19.1. - [Release notes](https://github.com/fpgmaas/deptry/releases) - [Changelog](https://github.com/fpgmaas/deptry/blob/main/CHANGELOG.md) - [Commits](https://github.com/fpgmaas/deptry/compare/0.18.0...0.19.1) --- updated-dependencies: - dependency-name: deptry dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: David Ochoa --- poetry.lock | 27 ++++++++++++++------------- pyproject.toml | 2 +- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/poetry.lock b/poetry.lock index 04804d728..dd0c9718b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1667,22 +1667,23 @@ dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] [[package]] name = "deptry" -version = "0.18.0" +version = "0.19.1" description = "A command line utility to check for unused, missing and transitive dependencies in a Python project." optional = false python-versions = ">=3.8" files = [ - {file = "deptry-0.18.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:aac16b9825c67887f84795d3fe3c5a676376cd6cc8555f6f7b57bfd45603e421"}, - {file = "deptry-0.18.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:b1440d2fab960e224b542726e6fcb0d3065635cfa8233c14f6c578faa2766e02"}, - {file = "deptry-0.18.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5764d6b484d488ce0f7085dc1767d99069b476383857aafd3bbc912128892dd"}, - {file = "deptry-0.18.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee0a916d78ba8db092a9454d5bc20fccbadb6ed0e8fb81fc020ba7e0df3578ed"}, - {file = "deptry-0.18.0-cp38-abi3-win_amd64.whl", hash = "sha256:7d1b561a4477ab130e1cb277b3d3aa25743b3005e1bb60076031ec3926b47541"}, - {file = "deptry-0.18.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e73d5c2676a1f49a954baa59c248b56bc940ab87d6070cb164f1394c24e07cf3"}, - {file = "deptry-0.18.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7343bb4948ad625ac1b3109279665004e6790ce01c8dc6a8a2ef1e4424c29773"}, - {file = "deptry-0.18.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdf6da66e31ef8bdace3bb34a86c4f066b5c5296776dd61b76802c72b0b3f5f4"}, - {file = "deptry-0.18.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7590966832f5222d2277612e07e67285d92123ad96cf7713cda579d420d63d1"}, - {file = "deptry-0.18.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:57e09ca29e98c4782197dc959849498941b5c4fc53178e9fe1fa30025e608bfd"}, - {file = "deptry-0.18.0.tar.gz", hash = "sha256:9cf8e398ea394f90ccfa8e11d7dcfba8ed485f6a33270ee2b024475b72a00d11"}, + {file = "deptry-0.19.1-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3a20ef0dd1c737fb05553d1b9c2fa9f185d0c9d3d881d255334cef401ffdc599"}, + {file = "deptry-0.19.1-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:2c6b2df353e5113fd2f787c2f7e694657548d388929e988e8644bd178e19fc5c"}, + {file = "deptry-0.19.1-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a407bab3486e3844f93d702f1a381942873b2a46056c693b5634bbde219bb056"}, + {file = "deptry-0.19.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43f33789b97b47313609e92b62fabf8a71bba0d35a7476806da5d3d152e32345"}, + {file = "deptry-0.19.1-cp38-abi3-win_amd64.whl", hash = "sha256:0bad85a77b31360d0f52383b14783fdae4a201b597c0158fe10e91a779c67079"}, + {file = "deptry-0.19.1-cp38-abi3-win_arm64.whl", hash = "sha256:c59142d9dca8873325692fbb7aa1d2902fde87020dcc8102f75120ba95515172"}, + {file = "deptry-0.19.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1abc119f9c8536b8ab1ee2122d4130665f33225d00d8615256ce354eb2c11ba"}, + {file = "deptry-0.19.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7344c6cea032b549d86e156aa1e679fb94cd44deb7e93f25cb6d9c0ded5ea06f"}, + {file = "deptry-0.19.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ff7d8954265c48ea334fdd508339c51d3fba05e2d4a8be47712c69d1c8d35c94"}, + {file = "deptry-0.19.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:023073247e5dac21254bf7b600ca2e2b71560652d2dfbe11535445ee912ca059"}, + {file = "deptry-0.19.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:af8a0a9c42f8f92dfbc048e724fa89b9131f032f7e245812260560c214395abf"}, + {file = "deptry-0.19.1.tar.gz", hash = "sha256:1c12fea1d2301f42c7035c5636e4b9421457fde256fe7a241245662d20b4c841"}, ] [package.dependencies] @@ -8691,4 +8692,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "622f47ea07bd1c332dcde8368143d533a27eefdd3d8096e8b648432e0dcd0dfb" +content-hash = "e88c1cae723d94139b4ed7d1308004a63d6c4a87076bc393638d977974661797" diff --git a/pyproject.toml b/pyproject.toml index 1539cf7ee..3050f7b6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,7 @@ apache-airflow = "^2.8.0" apache-airflow-providers-google = "^10.13.1" pydoclint = ">=0.3.8,<0.6.0" prettier = "^0.0.7" -deptry = ">=0.12,<0.19" +deptry = ">=0.12,<0.20" yamllint = "^1.33.0" [tool.semantic_release] From 5984de958b9c6c4681243f917a87061d061d0426 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 23 Aug 2024 11:32:52 +0100 Subject: [PATCH 012/188] build(deps-dev): bump lxml from 5.2.2 to 5.3.0 (#727) Bumps [lxml](https://github.com/lxml/lxml) from 5.2.2 to 5.3.0. - [Release notes](https://github.com/lxml/lxml/releases) - [Changelog](https://github.com/lxml/lxml/blob/master/CHANGES.txt) - [Commits](https://github.com/lxml/lxml/compare/lxml-5.2.2...lxml-5.3.0) --- updated-dependencies: - dependency-name: lxml dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 284 ++++++++++++++++++++++++++-------------------------- 1 file changed, 140 insertions(+), 144 deletions(-) diff --git a/poetry.lock b/poetry.lock index dd0c9718b..fe8793398 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4424,153 +4424,149 @@ typing-extensions = ">=4.1.1" [[package]] name = "lxml" -version = "5.2.2" +version = "5.3.0" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." optional = false python-versions = ">=3.6" files = [ - {file = "lxml-5.2.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:364d03207f3e603922d0d3932ef363d55bbf48e3647395765f9bfcbdf6d23632"}, - {file = "lxml-5.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:50127c186f191b8917ea2fb8b206fbebe87fd414a6084d15568c27d0a21d60db"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74e4f025ef3db1c6da4460dd27c118d8cd136d0391da4e387a15e48e5c975147"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:981a06a3076997adf7c743dcd0d7a0415582661e2517c7d961493572e909aa1d"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aef5474d913d3b05e613906ba4090433c515e13ea49c837aca18bde190853dff"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1e275ea572389e41e8b039ac076a46cb87ee6b8542df3fff26f5baab43713bca"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5b65529bb2f21ac7861a0e94fdbf5dc0daab41497d18223b46ee8515e5ad297"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bcc98f911f10278d1daf14b87d65325851a1d29153caaf146877ec37031d5f36"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:b47633251727c8fe279f34025844b3b3a3e40cd1b198356d003aa146258d13a2"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:fbc9d316552f9ef7bba39f4edfad4a734d3d6f93341232a9dddadec4f15d425f"}, - {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:13e69be35391ce72712184f69000cda04fc89689429179bc4c0ae5f0b7a8c21b"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3b6a30a9ab040b3f545b697cb3adbf3696c05a3a68aad172e3fd7ca73ab3c835"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a233bb68625a85126ac9f1fc66d24337d6e8a0f9207b688eec2e7c880f012ec0"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:dfa7c241073d8f2b8e8dbc7803c434f57dbb83ae2a3d7892dd068d99e96efe2c"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1a7aca7964ac4bb07680d5c9d63b9d7028cace3e2d43175cb50bba8c5ad33316"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ae4073a60ab98529ab8a72ebf429f2a8cc612619a8c04e08bed27450d52103c0"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ffb2be176fed4457e445fe540617f0252a72a8bc56208fd65a690fdb1f57660b"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e290d79a4107d7d794634ce3e985b9ae4f920380a813717adf61804904dc4393"}, - {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:96e85aa09274955bb6bd483eaf5b12abadade01010478154b0ec70284c1b1526"}, - {file = "lxml-5.2.2-cp310-cp310-win32.whl", hash = "sha256:f956196ef61369f1685d14dad80611488d8dc1ef00be57c0c5a03064005b0f30"}, - {file = "lxml-5.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:875a3f90d7eb5c5d77e529080d95140eacb3c6d13ad5b616ee8095447b1d22e7"}, - {file = "lxml-5.2.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:45f9494613160d0405682f9eee781c7e6d1bf45f819654eb249f8f46a2c22545"}, - {file = "lxml-5.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b0b3f2df149efb242cee2ffdeb6674b7f30d23c9a7af26595099afaf46ef4e88"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d28cb356f119a437cc58a13f8135ab8a4c8ece18159eb9194b0d269ec4e28083"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:657a972f46bbefdbba2d4f14413c0d079f9ae243bd68193cb5061b9732fa54c1"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b74b9ea10063efb77a965a8d5f4182806fbf59ed068b3c3fd6f30d2ac7bee734"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:07542787f86112d46d07d4f3c4e7c760282011b354d012dc4141cc12a68cef5f"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:303f540ad2dddd35b92415b74b900c749ec2010e703ab3bfd6660979d01fd4ed"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2eb2227ce1ff998faf0cd7fe85bbf086aa41dfc5af3b1d80867ecfe75fb68df3"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:1d8a701774dfc42a2f0b8ccdfe7dbc140500d1049e0632a611985d943fcf12df"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:56793b7a1a091a7c286b5f4aa1fe4ae5d1446fe742d00cdf2ffb1077865db10d"}, - {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:eb00b549b13bd6d884c863554566095bf6fa9c3cecb2e7b399c4bc7904cb33b5"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1a2569a1f15ae6c8c64108a2cd2b4a858fc1e13d25846be0666fc144715e32ab"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:8cf85a6e40ff1f37fe0f25719aadf443686b1ac7652593dc53c7ef9b8492b115"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:d237ba6664b8e60fd90b8549a149a74fcc675272e0e95539a00522e4ca688b04"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0b3f5016e00ae7630a4b83d0868fca1e3d494c78a75b1c7252606a3a1c5fc2ad"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:23441e2b5339bc54dc949e9e675fa35efe858108404ef9aa92f0456929ef6fe8"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2fb0ba3e8566548d6c8e7dd82a8229ff47bd8fb8c2da237607ac8e5a1b8312e5"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:79d1fb9252e7e2cfe4de6e9a6610c7cbb99b9708e2c3e29057f487de5a9eaefa"}, - {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6dcc3d17eac1df7859ae01202e9bb11ffa8c98949dcbeb1069c8b9a75917e01b"}, - {file = "lxml-5.2.2-cp311-cp311-win32.whl", hash = "sha256:4c30a2f83677876465f44c018830f608fa3c6a8a466eb223535035fbc16f3438"}, - {file = "lxml-5.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:49095a38eb333aaf44c06052fd2ec3b8f23e19747ca7ec6f6c954ffea6dbf7be"}, - {file = "lxml-5.2.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7429e7faa1a60cad26ae4227f4dd0459efde239e494c7312624ce228e04f6391"}, - {file = "lxml-5.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:50ccb5d355961c0f12f6cf24b7187dbabd5433f29e15147a67995474f27d1776"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc911208b18842a3a57266d8e51fc3cfaccee90a5351b92079beed912a7914c2"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33ce9e786753743159799fdf8e92a5da351158c4bfb6f2db0bf31e7892a1feb5"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec87c44f619380878bd49ca109669c9f221d9ae6883a5bcb3616785fa8f94c97"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08ea0f606808354eb8f2dfaac095963cb25d9d28e27edcc375d7b30ab01abbf6"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75a9632f1d4f698b2e6e2e1ada40e71f369b15d69baddb8968dcc8e683839b18"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:74da9f97daec6928567b48c90ea2c82a106b2d500f397eeb8941e47d30b1ca85"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:0969e92af09c5687d769731e3f39ed62427cc72176cebb54b7a9d52cc4fa3b73"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:9164361769b6ca7769079f4d426a41df6164879f7f3568be9086e15baca61466"}, - {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d26a618ae1766279f2660aca0081b2220aca6bd1aa06b2cf73f07383faf48927"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab67ed772c584b7ef2379797bf14b82df9aa5f7438c5b9a09624dd834c1c1aaf"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:3d1e35572a56941b32c239774d7e9ad724074d37f90c7a7d499ab98761bd80cf"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:8268cbcd48c5375f46e000adb1390572c98879eb4f77910c6053d25cc3ac2c67"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e282aedd63c639c07c3857097fc0e236f984ceb4089a8b284da1c526491e3f3d"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfdc2bfe69e9adf0df4915949c22a25b39d175d599bf98e7ddf620a13678585"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4aefd911793b5d2d7a921233a54c90329bf3d4a6817dc465f12ffdfe4fc7b8fe"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:8b8df03a9e995b6211dafa63b32f9d405881518ff1ddd775db4e7b98fb545e1c"}, - {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f11ae142f3a322d44513de1018b50f474f8f736bc3cd91d969f464b5bfef8836"}, - {file = "lxml-5.2.2-cp312-cp312-win32.whl", hash = "sha256:16a8326e51fcdffc886294c1e70b11ddccec836516a343f9ed0f82aac043c24a"}, - {file = "lxml-5.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:bbc4b80af581e18568ff07f6395c02114d05f4865c2812a1f02f2eaecf0bfd48"}, - {file = "lxml-5.2.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e3d9d13603410b72787579769469af730c38f2f25505573a5888a94b62b920f8"}, - {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38b67afb0a06b8575948641c1d6d68e41b83a3abeae2ca9eed2ac59892b36706"}, - {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c689d0d5381f56de7bd6966a4541bff6e08bf8d3871bbd89a0c6ab18aa699573"}, - {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:cf2a978c795b54c539f47964ec05e35c05bd045db5ca1e8366988c7f2fe6b3ce"}, - {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:739e36ef7412b2bd940f75b278749106e6d025e40027c0b94a17ef7968d55d56"}, - {file = "lxml-5.2.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d8bbcd21769594dbba9c37d3c819e2d5847656ca99c747ddb31ac1701d0c0ed9"}, - {file = "lxml-5.2.2-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:2304d3c93f2258ccf2cf7a6ba8c761d76ef84948d87bf9664e14d203da2cd264"}, - {file = "lxml-5.2.2-cp36-cp36m-win32.whl", hash = "sha256:02437fb7308386867c8b7b0e5bc4cd4b04548b1c5d089ffb8e7b31009b961dc3"}, - {file = "lxml-5.2.2-cp36-cp36m-win_amd64.whl", hash = "sha256:edcfa83e03370032a489430215c1e7783128808fd3e2e0a3225deee278585196"}, - {file = "lxml-5.2.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:28bf95177400066596cdbcfc933312493799382879da504633d16cf60bba735b"}, - {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a745cc98d504d5bd2c19b10c79c61c7c3df9222629f1b6210c0368177589fb8"}, - {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b590b39ef90c6b22ec0be925b211298e810b4856909c8ca60d27ffbca6c12e6"}, - {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b336b0416828022bfd5a2e3083e7f5ba54b96242159f83c7e3eebaec752f1716"}, - {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:c2faf60c583af0d135e853c86ac2735ce178f0e338a3c7f9ae8f622fd2eb788c"}, - {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:4bc6cb140a7a0ad1f7bc37e018d0ed690b7b6520ade518285dc3171f7a117905"}, - {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7ff762670cada8e05b32bf1e4dc50b140790909caa8303cfddc4d702b71ea184"}, - {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:57f0a0bbc9868e10ebe874e9f129d2917750adf008fe7b9c1598c0fbbfdde6a6"}, - {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:a6d2092797b388342c1bc932077ad232f914351932353e2e8706851c870bca1f"}, - {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:60499fe961b21264e17a471ec296dcbf4365fbea611bf9e303ab69db7159ce61"}, - {file = "lxml-5.2.2-cp37-cp37m-win32.whl", hash = "sha256:d9b342c76003c6b9336a80efcc766748a333573abf9350f4094ee46b006ec18f"}, - {file = "lxml-5.2.2-cp37-cp37m-win_amd64.whl", hash = "sha256:b16db2770517b8799c79aa80f4053cd6f8b716f21f8aca962725a9565ce3ee40"}, - {file = "lxml-5.2.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7ed07b3062b055d7a7f9d6557a251cc655eed0b3152b76de619516621c56f5d3"}, - {file = "lxml-5.2.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f60fdd125d85bf9c279ffb8e94c78c51b3b6a37711464e1f5f31078b45002421"}, - {file = "lxml-5.2.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a7e24cb69ee5f32e003f50e016d5fde438010c1022c96738b04fc2423e61706"}, - {file = "lxml-5.2.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23cfafd56887eaed93d07bc4547abd5e09d837a002b791e9767765492a75883f"}, - {file = "lxml-5.2.2-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:19b4e485cd07b7d83e3fe3b72132e7df70bfac22b14fe4bf7a23822c3a35bff5"}, - {file = "lxml-5.2.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:7ce7ad8abebe737ad6143d9d3bf94b88b93365ea30a5b81f6877ec9c0dee0a48"}, - {file = "lxml-5.2.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e49b052b768bb74f58c7dda4e0bdf7b79d43a9204ca584ffe1fb48a6f3c84c66"}, - {file = "lxml-5.2.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d14a0d029a4e176795cef99c056d58067c06195e0c7e2dbb293bf95c08f772a3"}, - {file = "lxml-5.2.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:be49ad33819d7dcc28a309b86d4ed98e1a65f3075c6acd3cd4fe32103235222b"}, - {file = "lxml-5.2.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:a6d17e0370d2516d5bb9062c7b4cb731cff921fc875644c3d751ad857ba9c5b1"}, - {file = "lxml-5.2.2-cp38-cp38-win32.whl", hash = "sha256:5b8c041b6265e08eac8a724b74b655404070b636a8dd6d7a13c3adc07882ef30"}, - {file = "lxml-5.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:f61efaf4bed1cc0860e567d2ecb2363974d414f7f1f124b1df368bbf183453a6"}, - {file = "lxml-5.2.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:fb91819461b1b56d06fa4bcf86617fac795f6a99d12239fb0c68dbeba41a0a30"}, - {file = "lxml-5.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d4ed0c7cbecde7194cd3228c044e86bf73e30a23505af852857c09c24e77ec5d"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54401c77a63cc7d6dc4b4e173bb484f28a5607f3df71484709fe037c92d4f0ed"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:625e3ef310e7fa3a761d48ca7ea1f9d8718a32b1542e727d584d82f4453d5eeb"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:519895c99c815a1a24a926d5b60627ce5ea48e9f639a5cd328bda0515ea0f10c"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c7079d5eb1c1315a858bbf180000757db8ad904a89476653232db835c3114001"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:343ab62e9ca78094f2306aefed67dcfad61c4683f87eee48ff2fd74902447726"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:cd9e78285da6c9ba2d5c769628f43ef66d96ac3085e59b10ad4f3707980710d3"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:546cf886f6242dff9ec206331209db9c8e1643ae642dea5fdbecae2453cb50fd"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:02f6a8eb6512fdc2fd4ca10a49c341c4e109aa6e9448cc4859af5b949622715a"}, - {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:339ee4a4704bc724757cd5dd9dc8cf4d00980f5d3e6e06d5847c1b594ace68ab"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0a028b61a2e357ace98b1615fc03f76eb517cc028993964fe08ad514b1e8892d"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:f90e552ecbad426eab352e7b2933091f2be77115bb16f09f78404861c8322981"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:d83e2d94b69bf31ead2fa45f0acdef0757fa0458a129734f59f67f3d2eb7ef32"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a02d3c48f9bb1e10c7788d92c0c7db6f2002d024ab6e74d6f45ae33e3d0288a3"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:6d68ce8e7b2075390e8ac1e1d3a99e8b6372c694bbe612632606d1d546794207"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:453d037e09a5176d92ec0fd282e934ed26d806331a8b70ab431a81e2fbabf56d"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:3b019d4ee84b683342af793b56bb35034bd749e4cbdd3d33f7d1107790f8c472"}, - {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:cb3942960f0beb9f46e2a71a3aca220d1ca32feb5a398656be934320804c0df9"}, - {file = "lxml-5.2.2-cp39-cp39-win32.whl", hash = "sha256:ac6540c9fff6e3813d29d0403ee7a81897f1d8ecc09a8ff84d2eea70ede1cdbf"}, - {file = "lxml-5.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:610b5c77428a50269f38a534057444c249976433f40f53e3b47e68349cca1425"}, - {file = "lxml-5.2.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b537bd04d7ccd7c6350cdaaaad911f6312cbd61e6e6045542f781c7f8b2e99d2"}, - {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4820c02195d6dfb7b8508ff276752f6b2ff8b64ae5d13ebe02e7667e035000b9"}, - {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a09f6184f17a80897172863a655467da2b11151ec98ba8d7af89f17bf63dae"}, - {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:76acba4c66c47d27c8365e7c10b3d8016a7da83d3191d053a58382311a8bf4e1"}, - {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b128092c927eaf485928cec0c28f6b8bead277e28acf56800e972aa2c2abd7a2"}, - {file = "lxml-5.2.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ae791f6bd43305aade8c0e22f816b34f3b72b6c820477aab4d18473a37e8090b"}, - {file = "lxml-5.2.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a2f6a1bc2460e643785a2cde17293bd7a8f990884b822f7bca47bee0a82fc66b"}, - {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e8d351ff44c1638cb6e980623d517abd9f580d2e53bfcd18d8941c052a5a009"}, - {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bec4bd9133420c5c52d562469c754f27c5c9e36ee06abc169612c959bd7dbb07"}, - {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:55ce6b6d803890bd3cc89975fca9de1dff39729b43b73cb15ddd933b8bc20484"}, - {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8ab6a358d1286498d80fe67bd3d69fcbc7d1359b45b41e74c4a26964ca99c3f8"}, - {file = "lxml-5.2.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:06668e39e1f3c065349c51ac27ae430719d7806c026fec462e5693b08b95696b"}, - {file = "lxml-5.2.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9cd5323344d8ebb9fb5e96da5de5ad4ebab993bbf51674259dbe9d7a18049525"}, - {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89feb82ca055af0fe797a2323ec9043b26bc371365847dbe83c7fd2e2f181c34"}, - {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e481bba1e11ba585fb06db666bfc23dbe181dbafc7b25776156120bf12e0d5a6"}, - {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:9d6c6ea6a11ca0ff9cd0390b885984ed31157c168565702959c25e2191674a14"}, - {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3d98de734abee23e61f6b8c2e08a88453ada7d6486dc7cdc82922a03968928db"}, - {file = "lxml-5.2.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:69ab77a1373f1e7563e0fb5a29a8440367dec051da6c7405333699d07444f511"}, - {file = "lxml-5.2.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:34e17913c431f5ae01d8658dbf792fdc457073dcdfbb31dc0cc6ab256e664a8d"}, - {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05f8757b03208c3f50097761be2dea0aba02e94f0dc7023ed73a7bb14ff11eb0"}, - {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a520b4f9974b0a0a6ed73c2154de57cdfd0c8800f4f15ab2b73238ffed0b36e"}, - {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5e097646944b66207023bc3c634827de858aebc226d5d4d6d16f0b77566ea182"}, - {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b5e4ef22ff25bfd4ede5f8fb30f7b24446345f3e79d9b7455aef2836437bc38a"}, - {file = "lxml-5.2.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ff69a9a0b4b17d78170c73abe2ab12084bdf1691550c5629ad1fe7849433f324"}, - {file = "lxml-5.2.2.tar.gz", hash = "sha256:bb2dc4898180bea79863d5487e5f9c7c34297414bad54bcd0f0852aee9cfdb87"}, + {file = "lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:dd36439be765e2dde7660212b5275641edbc813e7b24668831a5c8ac91180656"}, + {file = "lxml-5.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ae5fe5c4b525aa82b8076c1a59d642c17b6e8739ecf852522c6321852178119d"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:501d0d7e26b4d261fca8132854d845e4988097611ba2531408ec91cf3fd9d20a"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb66442c2546446944437df74379e9cf9e9db353e61301d1a0e26482f43f0dd8"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e41506fec7a7f9405b14aa2d5c8abbb4dbbd09d88f9496958b6d00cb4d45330"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f7d4a670107d75dfe5ad080bed6c341d18c4442f9378c9f58e5851e86eb79965"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41ce1f1e2c7755abfc7e759dc34d7d05fd221723ff822947132dc934d122fe22"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:44264ecae91b30e5633013fb66f6ddd05c006d3e0e884f75ce0b4755b3e3847b"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:3c174dc350d3ec52deb77f2faf05c439331d6ed5e702fc247ccb4e6b62d884b7"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:2dfab5fa6a28a0b60a20638dc48e6343c02ea9933e3279ccb132f555a62323d8"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b1c8c20847b9f34e98080da785bb2336ea982e7f913eed5809e5a3c872900f32"}, + {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:2c86bf781b12ba417f64f3422cfc302523ac9cd1d8ae8c0f92a1c66e56ef2e86"}, + {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:c162b216070f280fa7da844531169be0baf9ccb17263cf5a8bf876fcd3117fa5"}, + {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:36aef61a1678cb778097b4a6eeae96a69875d51d1e8f4d4b491ab3cfb54b5a03"}, + {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f65e5120863c2b266dbcc927b306c5b78e502c71edf3295dfcb9501ec96e5fc7"}, + {file = "lxml-5.3.0-cp310-cp310-win32.whl", hash = "sha256:ef0c1fe22171dd7c7c27147f2e9c3e86f8bdf473fed75f16b0c2e84a5030ce80"}, + {file = "lxml-5.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:052d99051e77a4f3e8482c65014cf6372e61b0a6f4fe9edb98503bb5364cfee3"}, + {file = "lxml-5.3.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:74bcb423462233bc5d6066e4e98b0264e7c1bed7541fff2f4e34fe6b21563c8b"}, + {file = "lxml-5.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a3d819eb6f9b8677f57f9664265d0a10dd6551d227afb4af2b9cd7bdc2ccbf18"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b8f5db71b28b8c404956ddf79575ea77aa8b1538e8b2ef9ec877945b3f46442"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c3406b63232fc7e9b8783ab0b765d7c59e7c59ff96759d8ef9632fca27c7ee4"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ecdd78ab768f844c7a1d4a03595038c166b609f6395e25af9b0f3f26ae1230f"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:168f2dfcfdedf611eb285efac1516c8454c8c99caf271dccda8943576b67552e"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa617107a410245b8660028a7483b68e7914304a6d4882b5ff3d2d3eb5948d8c"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:69959bd3167b993e6e710b99051265654133a98f20cec1d9b493b931942e9c16"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:bd96517ef76c8654446fc3db9242d019a1bb5fe8b751ba414765d59f99210b79"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:ab6dd83b970dc97c2d10bc71aa925b84788c7c05de30241b9e96f9b6d9ea3080"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:eec1bb8cdbba2925bedc887bc0609a80e599c75b12d87ae42ac23fd199445654"}, + {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6a7095eeec6f89111d03dabfe5883a1fd54da319c94e0fb104ee8f23616b572d"}, + {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:6f651ebd0b21ec65dfca93aa629610a0dbc13dbc13554f19b0113da2e61a4763"}, + {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:f422a209d2455c56849442ae42f25dbaaba1c6c3f501d58761c619c7836642ec"}, + {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:62f7fdb0d1ed2065451f086519865b4c90aa19aed51081979ecd05a21eb4d1be"}, + {file = "lxml-5.3.0-cp311-cp311-win32.whl", hash = "sha256:c6379f35350b655fd817cd0d6cbeef7f265f3ae5fedb1caae2eb442bbeae9ab9"}, + {file = "lxml-5.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c52100e2c2dbb0649b90467935c4b0de5528833c76a35ea1a2691ec9f1ee7a1"}, + {file = "lxml-5.3.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e99f5507401436fdcc85036a2e7dc2e28d962550afe1cbfc07c40e454256a859"}, + {file = "lxml-5.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:384aacddf2e5813a36495233b64cb96b1949da72bef933918ba5c84e06af8f0e"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:874a216bf6afaf97c263b56371434e47e2c652d215788396f60477540298218f"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:65ab5685d56914b9a2a34d67dd5488b83213d680b0c5d10b47f81da5a16b0b0e"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aac0bbd3e8dd2d9c45ceb82249e8bdd3ac99131a32b4d35c8af3cc9db1657179"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b369d3db3c22ed14c75ccd5af429086f166a19627e84a8fdade3f8f31426e52a"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24037349665434f375645fa9d1f5304800cec574d0310f618490c871fd902b3"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:62d172f358f33a26d6b41b28c170c63886742f5b6772a42b59b4f0fa10526cb1"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:c1f794c02903c2824fccce5b20c339a1a14b114e83b306ff11b597c5f71a1c8d"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:5d6a6972b93c426ace71e0be9a6f4b2cfae9b1baed2eed2006076a746692288c"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:3879cc6ce938ff4eb4900d901ed63555c778731a96365e53fadb36437a131a99"}, + {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:74068c601baff6ff021c70f0935b0c7bc528baa8ea210c202e03757c68c5a4ff"}, + {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ecd4ad8453ac17bc7ba3868371bffb46f628161ad0eefbd0a855d2c8c32dd81a"}, + {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7e2f58095acc211eb9d8b5771bf04df9ff37d6b87618d1cbf85f92399c98dae8"}, + {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e63601ad5cd8f860aa99d109889b5ac34de571c7ee902d6812d5d9ddcc77fa7d"}, + {file = "lxml-5.3.0-cp312-cp312-win32.whl", hash = "sha256:17e8d968d04a37c50ad9c456a286b525d78c4a1c15dd53aa46c1d8e06bf6fa30"}, + {file = "lxml-5.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:c1a69e58a6bb2de65902051d57fde951febad631a20a64572677a1052690482f"}, + {file = "lxml-5.3.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8c72e9563347c7395910de6a3100a4840a75a6f60e05af5e58566868d5eb2d6a"}, + {file = "lxml-5.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e92ce66cd919d18d14b3856906a61d3f6b6a8500e0794142338da644260595cd"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d04f064bebdfef9240478f7a779e8c5dc32b8b7b0b2fc6a62e39b928d428e51"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c2fb570d7823c2bbaf8b419ba6e5662137f8166e364a8b2b91051a1fb40ab8b"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0c120f43553ec759f8de1fee2f4794452b0946773299d44c36bfe18e83caf002"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:562e7494778a69086f0312ec9689f6b6ac1c6b65670ed7d0267e49f57ffa08c4"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:423b121f7e6fa514ba0c7918e56955a1d4470ed35faa03e3d9f0e3baa4c7e492"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c00f323cc00576df6165cc9d21a4c21285fa6b9989c5c39830c3903dc4303ef3"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:1fdc9fae8dd4c763e8a31e7630afef517eab9f5d5d31a278df087f307bf601f4"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:658f2aa69d31e09699705949b5fc4719cbecbd4a97f9656a232e7d6c7be1a367"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1473427aff3d66a3fa2199004c3e601e6c4500ab86696edffdbc84954c72d832"}, + {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a87de7dd873bf9a792bf1e58b1c3887b9264036629a5bf2d2e6579fe8e73edff"}, + {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0d7b36afa46c97875303a94e8f3ad932bf78bace9e18e603f2085b652422edcd"}, + {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:cf120cce539453ae086eacc0130a324e7026113510efa83ab42ef3fcfccac7fb"}, + {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:df5c7333167b9674aa8ae1d4008fa4bc17a313cc490b2cca27838bbdcc6bb15b"}, + {file = "lxml-5.3.0-cp313-cp313-win32.whl", hash = "sha256:c802e1c2ed9f0c06a65bc4ed0189d000ada8049312cfeab6ca635e39c9608957"}, + {file = "lxml-5.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:406246b96d552e0503e17a1006fd27edac678b3fcc9f1be71a2f94b4ff61528d"}, + {file = "lxml-5.3.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:8f0de2d390af441fe8b2c12626d103540b5d850d585b18fcada58d972b74a74e"}, + {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1afe0a8c353746e610bd9031a630a95bcfb1a720684c3f2b36c4710a0a96528f"}, + {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56b9861a71575f5795bde89256e7467ece3d339c9b43141dbdd54544566b3b94"}, + {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:9fb81d2824dff4f2e297a276297e9031f46d2682cafc484f49de182aa5e5df99"}, + {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2c226a06ecb8cdef28845ae976da407917542c5e6e75dcac7cc33eb04aaeb237"}, + {file = "lxml-5.3.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:7d3d1ca42870cdb6d0d29939630dbe48fa511c203724820fc0fd507b2fb46577"}, + {file = "lxml-5.3.0-cp36-cp36m-win32.whl", hash = "sha256:094cb601ba9f55296774c2d57ad68730daa0b13dc260e1f941b4d13678239e70"}, + {file = "lxml-5.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:eafa2c8658f4e560b098fe9fc54539f86528651f61849b22111a9b107d18910c"}, + {file = "lxml-5.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cb83f8a875b3d9b458cada4f880fa498646874ba4011dc974e071a0a84a1b033"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:25f1b69d41656b05885aa185f5fdf822cb01a586d1b32739633679699f220391"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23e0553b8055600b3bf4a00b255ec5c92e1e4aebf8c2c09334f8368e8bd174d6"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ada35dd21dc6c039259596b358caab6b13f4db4d4a7f8665764d616daf9cc1d"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:81b4e48da4c69313192d8c8d4311e5d818b8be1afe68ee20f6385d0e96fc9512"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:2bc9fd5ca4729af796f9f59cd8ff160fe06a474da40aca03fcc79655ddee1a8b"}, + {file = "lxml-5.3.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:07da23d7ee08577760f0a71d67a861019103e4812c87e2fab26b039054594cc5"}, + {file = "lxml-5.3.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:ea2e2f6f801696ad7de8aec061044d6c8c0dd4037608c7cab38a9a4d316bfb11"}, + {file = "lxml-5.3.0-cp37-cp37m-win32.whl", hash = "sha256:5c54afdcbb0182d06836cc3d1be921e540be3ebdf8b8a51ee3ef987537455f84"}, + {file = "lxml-5.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:f2901429da1e645ce548bf9171784c0f74f0718c3f6150ce166be39e4dd66c3e"}, + {file = "lxml-5.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c56a1d43b2f9ee4786e4658c7903f05da35b923fb53c11025712562d5cc02753"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ee8c39582d2652dcd516d1b879451500f8db3fe3607ce45d7c5957ab2596040"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdf3a3059611f7585a78ee10399a15566356116a4288380921a4b598d807a22"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:146173654d79eb1fc97498b4280c1d3e1e5d58c398fa530905c9ea50ea849b22"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0a7056921edbdd7560746f4221dca89bb7a3fe457d3d74267995253f46343f15"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:9e4b47ac0f5e749cfc618efdf4726269441014ae1d5583e047b452a32e221920"}, + {file = "lxml-5.3.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:f914c03e6a31deb632e2daa881fe198461f4d06e57ac3d0e05bbcab8eae01945"}, + {file = "lxml-5.3.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:213261f168c5e1d9b7535a67e68b1f59f92398dd17a56d934550837143f79c42"}, + {file = "lxml-5.3.0-cp38-cp38-win32.whl", hash = "sha256:218c1b2e17a710e363855594230f44060e2025b05c80d1f0661258142b2add2e"}, + {file = "lxml-5.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:315f9542011b2c4e1d280e4a20ddcca1761993dda3afc7a73b01235f8641e903"}, + {file = "lxml-5.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1ffc23010330c2ab67fac02781df60998ca8fe759e8efde6f8b756a20599c5de"}, + {file = "lxml-5.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2b3778cb38212f52fac9fe913017deea2fdf4eb1a4f8e4cfc6b009a13a6d3fcc"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b0c7a688944891086ba192e21c5229dea54382f4836a209ff8d0a660fac06be"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:747a3d3e98e24597981ca0be0fd922aebd471fa99d0043a3842d00cdcad7ad6a"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86a6b24b19eaebc448dc56b87c4865527855145d851f9fc3891673ff97950540"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b11a5d918a6216e521c715b02749240fb07ae5a1fefd4b7bf12f833bc8b4fe70"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68b87753c784d6acb8a25b05cb526c3406913c9d988d51f80adecc2b0775d6aa"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:109fa6fede314cc50eed29e6e56c540075e63d922455346f11e4d7a036d2b8cf"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:02ced472497b8362c8e902ade23e3300479f4f43e45f4105c85ef43b8db85229"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:6b038cc86b285e4f9fea2ba5ee76e89f21ed1ea898e287dc277a25884f3a7dfe"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:7437237c6a66b7ca341e868cda48be24b8701862757426852c9b3186de1da8a2"}, + {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7f41026c1d64043a36fda21d64c5026762d53a77043e73e94b71f0521939cc71"}, + {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:482c2f67761868f0108b1743098640fbb2a28a8e15bf3f47ada9fa59d9fe08c3"}, + {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:1483fd3358963cc5c1c9b122c80606a3a79ee0875bcac0204149fa09d6ff2727"}, + {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2dec2d1130a9cda5b904696cec33b2cfb451304ba9081eeda7f90f724097300a"}, + {file = "lxml-5.3.0-cp39-cp39-win32.whl", hash = "sha256:a0eabd0a81625049c5df745209dc7fcef6e2aea7793e5f003ba363610aa0a3ff"}, + {file = "lxml-5.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:89e043f1d9d341c52bf2af6d02e6adde62e0a46e6755d5eb60dc6e4f0b8aeca2"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7b1cd427cb0d5f7393c31b7496419da594fe600e6fdc4b105a54f82405e6626c"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51806cfe0279e06ed8500ce19479d757db42a30fd509940b1701be9c86a5ff9a"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee70d08fd60c9565ba8190f41a46a54096afa0eeb8f76bd66f2c25d3b1b83005"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:8dc2c0395bea8254d8daebc76dcf8eb3a95ec2a46fa6fae5eaccee366bfe02ce"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6ba0d3dcac281aad8a0e5b14c7ed6f9fa89c8612b47939fc94f80b16e2e9bc83"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6e91cf736959057f7aac7adfc83481e03615a8e8dd5758aa1d95ea69e8931dba"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:94d6c3782907b5e40e21cadf94b13b0842ac421192f26b84c45f13f3c9d5dc27"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c300306673aa0f3ed5ed9372b21867690a17dba38c68c44b287437c362ce486b"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78d9b952e07aed35fe2e1a7ad26e929595412db48535921c5013edc8aa4a35ce"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:01220dca0d066d1349bd6a1726856a78f7929f3878f7e2ee83c296c69495309e"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:2d9b8d9177afaef80c53c0a9e30fa252ff3036fb1c6494d427c066a4ce6a282f"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:20094fc3f21ea0a8669dc4c61ed7fa8263bd37d97d93b90f28fc613371e7a875"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ace2c2326a319a0bb8a8b0e5b570c764962e95818de9f259ce814ee666603f19"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92e67a0be1639c251d21e35fe74df6bcc40cba445c2cda7c4a967656733249e2"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd5350b55f9fecddc51385463a4f67a5da829bc741e38cf689f38ec9023f54ab"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c1fefd7e3d00921c44dc9ca80a775af49698bbfd92ea84498e56acffd4c5469"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:71a8dd38fbd2f2319136d4ae855a7078c69c9a38ae06e0c17c73fd70fc6caad8"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:97acf1e1fd66ab53dacd2c35b319d7e548380c2e9e8c54525c6e76d21b1ae3b1"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:68934b242c51eb02907c5b81d138cb977b2129a0a75a8f8b60b01cb8586c7b21"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b710bc2b8292966b23a6a0121f7a6c51d45d2347edcc75f016ac123b8054d3f2"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18feb4b93302091b1541221196a2155aa296c363fd233814fa11e181adebc52f"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:3eb44520c4724c2e1a57c0af33a379eee41792595023f367ba3952a2d96c2aab"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:609251a0ca4770e5a8768ff902aa02bf636339c5a93f9349b48eb1f606f7f3e9"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:516f491c834eb320d6c843156440fe7fc0d50b33e44387fcec5b02f0bc118a4c"}, + {file = "lxml-5.3.0.tar.gz", hash = "sha256:4e109ca30d1edec1ac60cdbe341905dc3b8f55b16855e03a54aaf59e51ec8c6f"}, ] [package.extras] @@ -4578,7 +4574,7 @@ cssselect = ["cssselect (>=0.7)"] html-clean = ["lxml-html-clean"] html5 = ["html5lib"] htmlsoup = ["BeautifulSoup4"] -source = ["Cython (>=3.0.10)"] +source = ["Cython (>=3.0.11)"] [[package]] name = "mako" From 4db64a3eb5cd3f2cd2bd2026dd5b05191f4d0292 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 23 Aug 2024 12:29:20 +0100 Subject: [PATCH 013/188] build(deps-dev): bump pre-commit from 3.7.1 to 3.8.0 (#719) * build(deps-dev): bump pre-commit from 3.7.1 to 3.8.0 Bumps [pre-commit](https://github.com/pre-commit/pre-commit) from 3.7.1 to 3.8.0. - [Release notes](https://github.com/pre-commit/pre-commit/releases) - [Changelog](https://github.com/pre-commit/pre-commit/blob/main/CHANGELOG.md) - [Commits](https://github.com/pre-commit/pre-commit/compare/v3.7.1...v3.8.0) --- updated-dependencies: - dependency-name: pre-commit dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * fix: update lock --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Co-authored-by: David Ochoa Co-authored-by: David Ochoa --- poetry.lock | 11 +++++------ pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index fe8793398..46d0cf279 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiodns" @@ -6052,13 +6052,13 @@ tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "p [[package]] name = "pre-commit" -version = "3.7.1" +version = "3.8.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." optional = false python-versions = ">=3.9" files = [ - {file = "pre_commit-3.7.1-py2.py3-none-any.whl", hash = "sha256:fae36fd1d7ad7d6a5a1c0b0d5adb2ed1a3bda5a21bf6c3e5372073d7a11cd4c5"}, - {file = "pre_commit-3.7.1.tar.gz", hash = "sha256:8ca3ad567bc78a4972a3f1a477e94a79d4597e8140a6e0b651c5e33899c3654a"}, + {file = "pre_commit-3.8.0-py2.py3-none-any.whl", hash = "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f"}, + {file = "pre_commit-3.8.0.tar.gz", hash = "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af"}, ] [package.dependencies] @@ -6879,7 +6879,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -8688,4 +8687,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "e88c1cae723d94139b4ed7d1308004a63d6c4a87076bc393638d977974661797" +content-hash = "04bc80689794ab41c58a2daf8f7841a36b6f34bed3b74069b6e0e8c30f32d24b" diff --git a/pyproject.toml b/pyproject.toml index 3050f7b6e..3dbf9f8ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ skops = ">=0.9,<0.11" google-cloud-secret-manager = "^2.20.0" [tool.poetry.dev-dependencies] -pre-commit = "^3.7.0" +pre-commit = "^3.8.0" mypy = "^1.11" pep8-naming = "^0.14.1" interrogate = "^1.7.0" From dc8c9dc63376bdc353771d7158060494a8a62434 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Tue, 27 Aug 2024 15:59:23 +0100 Subject: [PATCH 014/188] feat(variant_index): changes for a successful run (#735) * feat: update variant sources for variant idx and drop rsid mapping * chore: remove variant_index step from etl dag * chore: move gnomad annotation from dev to static assets * chore: change gentropy docker image * feat(convert_to_vcf): write data partitioned * feat: add vcf merging step * chore: assert merge is succesful * fix: remove non canonical variants from vcfs * chore: pass vcf types to merging rule * fix: match vep parser order of columns to variant index schema * fix: ditch `assign_variant_id` in favour of `hash_long_variant_ids` * chore: update `test_as_vcf_df_without_variant_id` * fix: correct test mock data * chore: pre-commit auto fixes [...] * chore: merge from dev * chore: pre-commit auto fixes [...] --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- config/datasets/ot_gcp.yaml | 2 +- src/airflow/dags/configs/dag.yaml | 4 - src/airflow/dags/configs/variant_sources.yaml | 6 +- src/airflow/dags/variant_index.py | 93 ++++++++++++++++--- src/gentropy/dataset/variant_index.py | 23 ----- src/gentropy/datasource/ensembl/vep_parser.py | 2 +- .../datasource/open_targets/variants.py | 19 ++-- src/gentropy/variant_index.py | 13 ++- .../datasource/open_targets/test_variants.py | 17 +--- 9 files changed, 106 insertions(+), 73 deletions(-) diff --git a/config/datasets/ot_gcp.yaml b/config/datasets/ot_gcp.yaml index c61863b86..d169f7c08 100644 --- a/config/datasets/ot_gcp.yaml +++ b/config/datasets/ot_gcp.yaml @@ -55,7 +55,7 @@ finngen_finemapping_results_path: ${datasets.inputs}/Finngen_susie_finemapping_r finngen_finemapping_summaries_path: ${datasets.inputs}/Finngen_susie_finemapping_r10/Finngen_susie_credset_summary_r10.tsv # Dev output datasets -gnomad_variants: ${datasets.outputs}/gnomad_variants +gnomad_variants: ${datasets.static_assets}/gnomad_variants study_locus: ${datasets.outputs}/study_locus summary_statistics: ${datasets.outputs}/summary_statistics study_locus_overlap: ${datasets.outputs}/study_locus_overlap diff --git a/src/airflow/dags/configs/dag.yaml b/src/airflow/dags/configs/dag.yaml index b6d343343..24d185c1e 100644 --- a/src/airflow/dags/configs/dag.yaml +++ b/src/airflow/dags/configs/dag.yaml @@ -1,21 +1,17 @@ - id: "ot_gene_index" -- id: "ot_variant_index" - id: "ot_variant_to_gene" prerequisites: - - "ot_variant_index" - "ot_gene_index" - id: "ot_colocalisation_ecaviar" - id: "ot_colocalisation_coloc" - id: "ot_locus_to_gene_train" prerequisites: - - "ot_variant_index" - "ot_variant_to_gene" - "ot_colocalisation_ecaviar" - "ot_colocalisation_coloc" - id: "ot_locus_to_gene_predict" prerequisites: - "ot_locus_to_gene_train" - - "ot_variant_index" - "ot_variant_to_gene" - "ot_colocalisation_ecaviar" - "ot_colocalisation_coloc" diff --git a/src/airflow/dags/configs/variant_sources.yaml b/src/airflow/dags/configs/variant_sources.yaml index e8914b8aa..233eb0ccf 100644 --- a/src/airflow/dags/configs/variant_sources.yaml +++ b/src/airflow/dags/configs/variant_sources.yaml @@ -1,12 +1,12 @@ sources_inclusion_list: - name: uniprot - location: gs://open-targets-pre-data-releases/24.06/input/evidence-files/uniprot.json.gz ## input + location: gs://open-targets-pre-data-releases/24.09/input/evidence-files/uniprot.json.gz ## input format: json - name: clinvar - location: gs://open-targets-pre-data-releases/24.06/input/evidence-files/eva.json.gz + location: gs://open-targets-pre-data-releases/24.09/input/evidence-files/eva.json.gz format: json - name: pharmgkb - location: gs://open-targets-pre-data-releases/24.06/input/pharmacogenomics-inputs/pharmacogenomics.json.gz + location: gs://open-targets-pre-data-releases/24.09/input/pharmacogenomics-inputs/pharmacogenomics.json.gz format: json - name: gentropy_credible_sets location: gs://genetics_etl_python_playground/releases/24.06/credible_set diff --git a/src/airflow/dags/variant_index.py b/src/airflow/dags/variant_index.py index eb102f277..9d0736632 100644 --- a/src/airflow/dags/variant_index.py +++ b/src/airflow/dags/variant_index.py @@ -8,6 +8,7 @@ from pathlib import Path from typing import Any +import pandas as pd from common_airflow import ( create_batch_job, create_cluster, @@ -31,14 +32,19 @@ PROJECT_ID = "open-targets-genetics-dev" REGION = "europe-west1" +GCS_BUCKET = "genetics_etl_python_playground" CONFIG_FILE_PATH = Path(__file__).parent / "configs" / "variant_sources.yaml" -GENTROPY_DOCKER_IMAGE = "europe-west1-docker.pkg.dev/open-targets-genetics-dev/gentropy-app/gentropy:il-3333" +GENTROPY_DOCKER_IMAGE = "europe-west1-docker.pkg.dev/open-targets-genetics-dev/gentropy-app/gentropy:il-variant-idx" # TODO: change to dev VEP_DOCKER_IMAGE = "europe-west1-docker.pkg.dev/open-targets-genetics-dev/gentropy-app/custom_ensembl_vep:dev" -VCF_DST_PATH = "gs://genetics_etl_python_playground/il-3333" -VEP_OUTPUT_BUCKET = "gs://genetics_etl_python_playground/il-3333/vep_output" -VEP_CACHE_BUCKET = "gs://genetics_etl_python_playground/vep/cache" -VARIANT_INDEX_BUCKET = "gs://genetics_etl_python_playground/il-3333/variant_index" -GNOMAD_ANNOTATION_PATH = "gs://genetics_etl_python_playground/output/python_etl/parquet/24.06/gnomad_variants" +VEP_CACHE_BUCKET = f"gs://{GCS_BUCKET}/vep/cache" + +RELEASE = "XX.XX" # This needs to be updated to the latest release + +VCF_DST_PATH = f"gs://{GCS_BUCKET}/{RELEASE}/variant_vcf" +VCF_MERGED_DST_PATH = f"{VCF_DST_PATH}/merged" +VEP_OUTPUT_BUCKET = f"gs://{GCS_BUCKET}/{RELEASE}/vep_output" +VARIANT_INDEX_BUCKET = f"gs://{GCS_BUCKET}/{RELEASE}/variant_index" +GNOMAD_ANNOTATION_PATH = f"gs://{GCS_BUCKET}/static_assets/gnomad_variants" # Internal parameters for the docker image: MOUNT_DIR = "/mnt/disks/share" @@ -67,7 +73,7 @@ def create_vcf(**kwargs: Any) -> None: commands = [ "-c", - rf"poetry run gentropy step=variant_to_vcf step.source_path=$SOURCE_PATH step.source_format=$SOURCE_FORMAT step.vcf_path={VCF_DST_PATH}/$SOURCE_NAME.vcf +step.session.extended_spark_conf={{spark.jars:https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop3-latest.jar}}", + rf"poetry run gentropy step=variant_to_vcf step.source_path=$SOURCE_PATH step.source_format=$SOURCE_FORMAT step.vcf_path={VCF_DST_PATH}/$SOURCE_NAME +step.session.extended_spark_conf={{spark.jars:https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop3-latest.jar}}", ] task = create_task_spec( GENTROPY_DOCKER_IMAGE, commands, options="-e HYDRA_FULL_ERROR=1" @@ -89,6 +95,59 @@ def create_vcf(**kwargs: Any) -> None: batch_task.execute(context=kwargs) +@task(task_id="merge_vcfs") +def merge_vcfs(chunk_size: int = 2000, **kwargs: Any) -> None: + """Task that merges the information from all the VCF files into a single one so that we only submit one VEP job. + + Args: + chunk_size (int): Partition size of the merged file. Defaults to 2000. + **kwargs (Any): Keyword arguments + """ + ti = kwargs["ti"] + input_vcfs = [ + f"gs://{GCS_BUCKET}/{listed_file}" + for listed_file in ti.xcom_pull( + task_ids="get_vcf_per_source", key="return_value" + ) + ] + merged_df = ( + pd.concat( + pd.read_csv( + file, + sep="\t", + dtype={ + "#CHROM": str, + "POS": int, + "ID": str, + "REF": str, + "ALT": str, + "QUAL": str, + "FILTER": str, + "INFO": str, + }, + ) + for file in input_vcfs + ) + .drop_duplicates(subset=["#CHROM", "POS", "REF", "ALT"]) + .sort_values(by=["#CHROM", "POS"]) + .reset_index(drop=True) + ) + # Partition the merged file into chunks of 2000 variants to run the VEP jobs in parallel + chunks = 0 + for i in range(0, len(merged_df), chunk_size): + merged_df[i : i + chunk_size].to_csv( + f"{VCF_MERGED_DST_PATH}/chunk_{i + 1}-{i + chunk_size}.vcf", + index=False, + header=True, + sep="\t", + ) + chunks += 1 + expected_chunks_count = len(merged_df) // chunk_size + 1 + assert ( + chunks == expected_chunks_count + ), f"Expected {expected_chunks_count} chunks but got {chunks} chunks" + + @dataclass class PathManager: """It is quite complicated to keep track of all the input/output buckets, the corresponding mounting points prefixes etc...""" @@ -147,7 +206,7 @@ def get_mount_config(self) -> list[dict[str, str]]: @task(task_id="vep_annotation") def vep_annotation(pm: PathManager, **kwargs: Any) -> None: - """Submit a Batch job to download cache for VEP. + """Submit a Batch job to annotate VCFs with a local VEP docker image. Args: pm (PathManager): The path manager with all the required path related information. @@ -166,7 +225,7 @@ def vep_annotation(pm: PathManager, **kwargs: Any) -> None: task_env = [ batch_v1.Environment( variables={ - "INPUT_FILE": f"{filename}.tsv", + "INPUT_FILE": f"{filename}.vcf", "OUTPUT_FILE": f"{filename}.json", } ) @@ -210,19 +269,27 @@ def vep_annotation(pm: PathManager, **kwargs: Any) -> None: **shared_dag_kwargs, ) as dag: pm = PathManager( - VCF_DST_PATH, + VCF_MERGED_DST_PATH, VEP_OUTPUT_BUCKET, VEP_CACHE_BUCKET, MOUNT_DIR, ) ( create_vcf() + >> GCSListObjectsOperator( + task_id="get_vcf_per_source", + bucket=GCS_BUCKET, + prefix=VCF_DST_PATH.replace(f"gs://{GCS_BUCKET}/", ""), + trigger_rule=TriggerRule.ALL_SUCCESS, + match_glob="**.csv", + ) + >> merge_vcfs() >> GCSListObjectsOperator( task_id="get_vep_todo_list", - bucket=pm.input_bucket, - prefix=pm.input_path, - match_glob="**vcf", + bucket=GCS_BUCKET, + prefix=VCF_MERGED_DST_PATH.replace(f"gs://{GCS_BUCKET}/", ""), trigger_rule=TriggerRule.ALL_SUCCESS, + match_glob="**.vcf", ) >> vep_annotation(pm) >> create_cluster( diff --git a/src/gentropy/dataset/variant_index.py b/src/gentropy/dataset/variant_index.py index fa110a2d4..1cc1eac1b 100644 --- a/src/gentropy/dataset/variant_index.py +++ b/src/gentropy/dataset/variant_index.py @@ -59,29 +59,6 @@ def get_schema(cls: type[VariantIndex]) -> StructType: """ return parse_spark_schema("variant_index.json") - @classmethod - def assign_variant_id( - cls: type[VariantIndex], - ) -> Column: - """Creates a column with the variant ID that will be used to index the variant index. - - This is to ensure that the variant ID is unique and not too long. - - Returns: - Column: Column with the variant ID containing the hash if the variant ID is longer than 100 characters - """ - return ( - f.when( - f.length(f.col("variantId")) >= 100, - f.concat( - f.lit("otvar_"), - f.xxhash64(f.col("variantId")).cast("string"), - ), - ) - .otherwise(f.col("variantId")) - .alias("variantId") - ) - @staticmethod def hash_long_variant_ids( variant_id: Column, chromosome: Column, position: Column, threshold: int = 100 diff --git a/src/gentropy/datasource/ensembl/vep_parser.py b/src/gentropy/datasource/ensembl/vep_parser.py index 9cde1c44b..2259ef34e 100644 --- a/src/gentropy/datasource/ensembl/vep_parser.py +++ b/src/gentropy/datasource/ensembl/vep_parser.py @@ -738,7 +738,6 @@ def process_vep_output( transcript.distance.alias("distance"), transcript.gene_id.alias("targetId"), transcript.impact.alias("impact"), - transcript.transcript_id.alias("transcriptId"), transcript.lof.cast(t.StringType()).alias( "lofteePrediction" ), @@ -746,6 +745,7 @@ def process_vep_output( transcript.lof.cast(t.FloatType()).alias( "polyphenPrediction" ), + transcript.transcript_id.alias("transcriptId"), ), ), ).alias("transcriptConsequences"), diff --git a/src/gentropy/datasource/open_targets/variants.py b/src/gentropy/datasource/open_targets/variants.py index a50cc04ae..03018438b 100644 --- a/src/gentropy/datasource/open_targets/variants.py +++ b/src/gentropy/datasource/open_targets/variants.py @@ -96,15 +96,6 @@ def as_vcf_df( col, create_empty_column_if_not_exists(col) ) - variant_df = cls.map_rsids_to_variant_ids(session, variant_df) - - variant_df = variant_df.withColumn( - "variantId", - f.when(f.col("variantId").isNull(), f.lit(".")).otherwise( - f.col("variantId") - ), - ) - return ( variant_df.filter(f.col("variantId").isNotNull()) .withColumn( @@ -135,6 +126,12 @@ def as_vcf_df( f.lit(".").alias("FILTER"), f.lit(".").alias("INFO"), ) - .filter(f.col("#CHROM") != ".") + .distinct() + .filter( + (f.col("#CHROM") != ".") + & (f.col("POS").isNotNull()) + & (f.col("REF").rlike("^[GCTA.]*$")) + & (f.col("ALT").rlike("^[GCTA.]*$")) + ) .orderBy(f.col("#CHROM").asc(), f.col("POS").asc()) - ).distinct() + ) diff --git a/src/gentropy/variant_index.py b/src/gentropy/variant_index.py index a3dbdbcda..f6d555f07 100644 --- a/src/gentropy/variant_index.py +++ b/src/gentropy/variant_index.py @@ -2,6 +2,8 @@ from __future__ import annotations +from pyspark.sql.functions import col + from gentropy.common.session import Session from gentropy.config import VariantIndexConfig from gentropy.dataset.variant_index import VariantIndex @@ -51,7 +53,12 @@ def __init__( variant_index = variant_index.add_annotation(annotations) ( - variant_index.df.withColumn("variantId", VariantIndex.assign_variant_id()) + variant_index.df.withColumn( + "variantId", + VariantIndex.hash_long_variant_ids( + col("variantId"), col("chromosome"), col("position") + ), + ) .write.partitionBy("chromosome") .mode(session.write_mode) .parquet(variant_index_path) @@ -77,8 +84,8 @@ def __init__( vcf_path (str): Output VCF file path. """ # Load - df = session.load_data(source_path, source_format).limit(100) + df = session.load_data(source_path, source_format) # Extract vcf_df = OpenTargetsVariant.as_vcf_df(session, df) # Write - vcf_df.toPandas().to_csv(vcf_path, sep="\t", index=False) + vcf_df.write.csv(vcf_path, sep="\t", header=True) diff --git a/tests/gentropy/datasource/open_targets/test_variants.py b/tests/gentropy/datasource/open_targets/test_variants.py index 9d2c56e73..247a9d81e 100644 --- a/tests/gentropy/datasource/open_targets/test_variants.py +++ b/tests/gentropy/datasource/open_targets/test_variants.py @@ -66,18 +66,7 @@ def test_as_vcf_df_without_variant_id( session, df_without_variant_id_df ).orderBy(*["#CHROM", "POS", "REF", "ALT"]) - vcf_cols = ["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"] - df_without_variant_id_expected_df = spark.createDataFrame( - [ - ("17", 7041768, "rs75493593", "G", "C", ".", ".", "."), - ("17", 7041768, "rs75493593", "G", "T", ".", ".", "."), - ], - vcf_cols, - ) - - assert ( - observed_df.collect() == df_without_variant_id_expected_df.collect() - ), "Unexpected VCF dataframe." + assert observed_df.count() == 0, "A variant ID should be present for VCF step." def test_as_vcf_df_without_rs_id( self: TestOpenTargetsVariant, @@ -85,13 +74,13 @@ def test_as_vcf_df_without_rs_id( session: Session, ) -> None: """Test the as_vcf_df method with a dataframe of variants without an annotated variantRsId.""" - df_without_rs_id_df = spark.createDataFrame([("1_2_x_y",)], ["variantId"]) + df_without_rs_id_df = spark.createDataFrame([("1_2_G_GA",)], ["variantId"]) observed_df = OpenTargetsVariant.as_vcf_df(session, df_without_rs_id_df) vcf_cols = ["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"] df_without_rs_id_expected_df = spark.createDataFrame( [ - ("1", 2, ".", "x", "y", ".", ".", "."), + ("1", 2, ".", "G", "GA", ".", ".", "."), ], vcf_cols, ) From 348b6f0804988449398d335b2201dccbe1b75c41 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Wed, 28 Aug 2024 11:18:03 +0200 Subject: [PATCH 015/188] fix: update cluster creation command (#739) Co-authored-by: Szymon Szyszkowski --- Makefile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index f1caf887f..b83075558 100644 --- a/Makefile +++ b/Makefile @@ -38,18 +38,19 @@ build-documentation: ## Create local server with documentation create-dev-cluster: build ## Spin up a simple dataproc cluster with all dependencies for development purposes @echo "Creating Dataproc Dev Cluster" @gcloud config set project ${PROJECT_ID} - @gcloud dataproc clusters create "ot-genetics-dev-${CLEAN_VERSION_NO}" \ + @gcloud dataproc clusters create "ot-genetics-dev-${CLEAN_VERSION_NO}-$(USER)" \ --image-version 2.1 \ --region ${REGION} \ --master-machine-type n1-standard-16 \ --initialization-actions=gs://genetics_etl_python_playground/initialisation/${VERSION_NO}/install_dependencies_on_cluster.sh \ --metadata="PACKAGE=gs://genetics_etl_python_playground/initialisation/${VERSION_NO}/gentropy-${VERSION_NO}-py3-none-any.whl,CONFIGTAR=gs://genetics_etl_python_playground/initialisation/${VERSION_NO}/config.tar.gz" \ - --primary-worker-type n1-standard-8 \ + --secondary-worker-type spot \ --worker-machine-type n1-standard-4 \ --worker-boot-disk-size 500 \ - --autoscaling_policy=f"projects/${PROJECT_ID}/regions/${REGION}/autoscalingPolicies/eqtl-preprocess", \ + --autoscaling-policy="projects/${PROJECT_ID}/regions/${REGION}/autoscalingPolicies/otg-etl" \ --optional-components=JUPYTER \ - --enable-component-gateway + --enable-component-gateway \ + --max-idle=30m make update-dev-cluster: build ## Reinstalls the package on the dev-cluster @echo "Updating Dataproc Dev Cluster" From b6746fb17fcef46791d00dbb7de28ced52fedd66 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Wed, 28 Aug 2024 16:18:14 +0200 Subject: [PATCH 016/188] fix: revert recursiveFileLookup to False (#738) Co-authored-by: Szymon Szyszkowski --- src/gentropy/common/session.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gentropy/common/session.py b/src/gentropy/common/session.py index f618cd722..297903629 100644 --- a/src/gentropy/common/session.py +++ b/src/gentropy/common/session.py @@ -134,11 +134,13 @@ def load_data( ) -> DataFrame: """Generic function to read a file or folder into a Spark dataframe. + The `recursiveFileLookup` flag when set to True will skip all partition columns, but read files from all subdirectories. + Args: path (str | list[str]): path to the dataset format (str): file format. Defaults to parquet. schema (StructType | str | None): Schema to use when reading the data. - **kwargs (bool | float | int | str | None): Additional arguments to pass to spark.read.load. `recursiveFileLookup` and `mergeSchema` are set to True by default. + **kwargs (bool | float | int | str | None): Additional arguments to pass to spark.read.load. `mergeSchema` is set to True, `recursiveFileLookup` is set to False by default. Returns: DataFrame: Dataframe @@ -147,7 +149,7 @@ def load_data( if schema is None: kwargs["inferSchema"] = kwargs.get("inferSchema", True) kwargs["mergeSchema"] = kwargs.get("mergeSchema", True) - kwargs["recursiveFileLookup"] = kwargs.get("recursiveFileLookup", True) + kwargs["recursiveFileLookup"] = kwargs.get("recursiveFileLookup", False) return self.spark.read.load(path, format=format, schema=schema, **kwargs) From cf1ec24577580243f513aadd42676b863e2ffe8f Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Thu, 29 Aug 2024 13:55:35 +0200 Subject: [PATCH 017/188] feat: Finngen r11 ingestion (#733) * feat: finngen r11 study index paths * feat: simplification of finngen susie finemapping ingestion * feat: efos in finngne study index * docs: updated docstring * fix: pass spark_session not gentropy session to step * fix: read uploaded file requires file:/// * fix: parallelize csv from url * fix: typo in docstring * chore: pre-commit auto fixes [...] --------- Co-authored-by: Szymon Szyszkowski Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Daniel Suveges --- config/datasets/ot_gcp.yaml | 3 - .../ot_finngen_finemapping_ingestion.yaml | 7 - src/airflow/dags/finngen_harmonisation.py | 79 ------- src/airflow/dags/finngen_preprocess.py | 76 ------- src/gentropy/config.py | 41 ++-- .../datasource/finngen/finemapping.py | 203 ++++++++++++++++-- .../datasource/finngen/study_index.py | 80 +++++-- src/gentropy/finngen_finemapping_ingestion.py | 17 +- src/gentropy/finngen_studies.py | 45 +++- .../data_samples/finngen_R9_AB1_EBV.SUSIE.snp | 90 ++++++++ .../finngen_R9_AB1_EBV.SUSIE.snp.bgz | Bin 0 -> 35105 bytes .../finngen_credset_summary_sample.tsv.bgz | Bin 0 -> 1338 bytes .../finngen/test_finngen_finemapping.py | 29 ++- .../finngen/test_finngen_study_index.py | 73 +++++++ 14 files changed, 518 insertions(+), 225 deletions(-) delete mode 100644 config/step/ot_finngen_finemapping_ingestion.yaml delete mode 100644 src/airflow/dags/finngen_harmonisation.py delete mode 100644 src/airflow/dags/finngen_preprocess.py create mode 100644 tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp create mode 100644 tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.bgz create mode 100644 tests/gentropy/data_samples/finngen_credset_summary_sample.tsv.bgz diff --git a/config/datasets/ot_gcp.yaml b/config/datasets/ot_gcp.yaml index d169f7c08..a8d8886bb 100644 --- a/config/datasets/ot_gcp.yaml +++ b/config/datasets/ot_gcp.yaml @@ -51,9 +51,6 @@ thurman: ${datasets.static_assets}/thurman2012/genomewideCorrs_above0.7_promoter target_index: ${datasets.static_assets}/targets # OTP 23.12 data gene_interactions: ${datasets.static_assets}/interaction # OTP 23.12 data -finngen_finemapping_results_path: ${datasets.inputs}/Finngen_susie_finemapping_r10/full -finngen_finemapping_summaries_path: ${datasets.inputs}/Finngen_susie_finemapping_r10/Finngen_susie_credset_summary_r10.tsv - # Dev output datasets gnomad_variants: ${datasets.static_assets}/gnomad_variants study_locus: ${datasets.outputs}/study_locus diff --git a/config/step/ot_finngen_finemapping_ingestion.yaml b/config/step/ot_finngen_finemapping_ingestion.yaml deleted file mode 100644 index 46aa497fa..000000000 --- a/config/step/ot_finngen_finemapping_ingestion.yaml +++ /dev/null @@ -1,7 +0,0 @@ -defaults: - - finngen_finemapping_ingestion - -finngen_finemapping_results_path: ${datasets.finngen_finemapping_results_path} -finngen_finemapping_summaries_path: ${datasets.finngen_finemapping_summaries_path} -finngen_release_prefix: ${datasets.finngen_release_prefix} -finngen_finemapping_out: ${datasets.finngen_finemapping_out} diff --git a/src/airflow/dags/finngen_harmonisation.py b/src/airflow/dags/finngen_harmonisation.py deleted file mode 100644 index 18f81a376..000000000 --- a/src/airflow/dags/finngen_harmonisation.py +++ /dev/null @@ -1,79 +0,0 @@ -"""Airflow DAG for the harmonisation part of the pipeline.""" - -from __future__ import annotations - -import re -import time -from pathlib import Path -from typing import Any - -import common_airflow as common - -from airflow.decorators import task -from airflow.models.dag import DAG -from airflow.providers.google.cloud.operators.gcs import GCSListObjectsOperator - -CLUSTER_NAME = "otg-finngen-harmonisation" -AUTOSCALING = "gwascatalog-harmonisation" # same as GWAS Catalog harmonisation -SUMMARY_STATS_BUCKET_NAME = "finngen-public-data-r10" -RELEASEBUCKET = "gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX" -SUMSTATS_PARQUET = f"{RELEASEBUCKET}/summary_statistics/finngen" - -with DAG( - dag_id=Path(__file__).stem, - description="Open Targets Genetics — Finngen harmonisation", - default_args=common.shared_dag_args, - **common.shared_dag_kwargs, -): - # List raw harmonised files from GWAS Catalog - list_inputs = GCSListObjectsOperator( - task_id="list_raw_sumstats", - bucket=SUMMARY_STATS_BUCKET_NAME, - prefix="summary_stats", - match_glob="**/*.gz", - ) - - # Submit jobs to dataproc - @task(task_id="submit_jobs") - def submit_jobs(**kwargs: Any) -> None: - """Submit jobs to dataproc. - - Args: - **kwargs (Any): Keyword arguments. - """ - ti = kwargs["ti"] - todo = ti.xcom_pull(task_ids="list_raw_sumstats", key="return_value") - print("Number of jobs to submit: ", len(todo)) # noqa: T201 - for i in range(len(todo)): - # Not to exceed default quota 400 jobs per minute - if i > 0 and i % 399 == 0: - time.sleep(60) - input_path = todo[i] - match_result = re.search(r"summary_stats/finngen_(.*).gz", input_path) - if match_result: - study_id = match_result.group(1) - print("Submitting job for study: ", study_id) # noqa: T201 - common.submit_pyspark_job_no_operator( - cluster_name=CLUSTER_NAME, - step_id="finngen_sumstat_preprocess", - other_args=[ - f"step.raw_sumstats_path=gs://{SUMMARY_STATS_BUCKET_NAME}/{input_path}", - f"step.out_sumstats_path={SUMSTATS_PARQUET}/{study_id}.parquet", - ], - ) - - # list_inputs >> - ( - list_inputs - >> common.create_cluster( - CLUSTER_NAME, - autoscaling_policy=AUTOSCALING, - num_workers=8, - # num_preemptible_workers=8, - master_machine_type="n1-highmem-32", - worker_machine_type="n1-standard-2", - ) - >> common.install_dependencies(CLUSTER_NAME) - >> submit_jobs() - >> common.delete_cluster(CLUSTER_NAME) - ) diff --git a/src/airflow/dags/finngen_preprocess.py b/src/airflow/dags/finngen_preprocess.py deleted file mode 100644 index fbfab91e5..000000000 --- a/src/airflow/dags/finngen_preprocess.py +++ /dev/null @@ -1,76 +0,0 @@ -"""Airflow DAG for the Preprocess part of the pipeline.""" - -from __future__ import annotations - -from pathlib import Path - -import common_airflow as common - -from airflow.models.dag import DAG -from airflow.utils.task_group import TaskGroup -from airflow.utils.trigger_rule import TriggerRule - -CLUSTER_NAME = "otg-preprocess-finngen" -AUTOSCALING = "finngen-preprocess" - -# Get all parameters for the DAG: -FINNGEN_VERSION = "r10" -FINNGEN_BUCKET = f"gs://finngen_data/{FINNGEN_VERSION}" - -STUDY_INDEX = f"{FINNGEN_BUCKET}/study_index" -SUMMARY_STATISTICS = f"{FINNGEN_BUCKET}/harmonised_summary_statistics" -WINDOW_BASED_CLUMPED = f"{FINNGEN_BUCKET}/study_locus_datasets/finngen_window_clumped" -LD_CLUMPED = f"{FINNGEN_BUCKET}/study_locus_datasets/finngen_ld_clumped" -PICSED_CREDIBLE_SET = f"{FINNGEN_BUCKET}/credible_set_datasets/finngen_pics" - -FINNGEN_FINEMAPPING = ( - "gs://genetics_etl_python_playground/input/Finngen_susie_finemapping_r10/full" -) -FINNGEN_FM_SUMMARIES = "gs://genetics_etl_python_playground/input/Finngen_susie_finemapping_r10/Finngen_susie_credset_summary_r10.tsv" -FINNGEN_PREFIX = "FINNGEN_R10_" -FINNGEN_FM_OUT = f"{FINNGEN_BUCKET}/credible_set_datasets/finngen_susie" - -with DAG( - dag_id=Path(__file__).stem, - description="Open Targets Genetics — Finngen preprocess", - default_args=common.shared_dag_args, - **common.shared_dag_kwargs, -): - finngen_finemapping_ingestion = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="ot_finngen_finemapping_ingestion", - task_id="finngen_finemapping_ingestion", - other_args=[ - f"step.finngen_finemapping_out={FINNGEN_FM_OUT}", - f"step.finngen_release_prefix={FINNGEN_PREFIX}", - f"step.finngen_finemapping_results_path={FINNGEN_FINEMAPPING}", - f"step.finngen_finemapping_summaries_path={FINNGEN_FM_SUMMARIES}", - ], - # This allows to attempt running the task when above step fails do to failifexists - trigger_rule=TriggerRule.ALL_DONE, - ) - with TaskGroup( - group_id="finngen_summary_stats_preprocess" - ) as finngen_summary_stats_preprocess: - study_index = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="finngen_studies", - task_id="finngen_studies", - other_args=[ - f"step.finngen_study_index_out={STUDY_INDEX}", - ], - ) - - # Define order of steps: - (study_index) - ( - common.create_cluster( - CLUSTER_NAME, - autoscaling_policy=AUTOSCALING, - master_disk_size=2000, - num_workers=6, - ) - >> common.install_dependencies(CLUSTER_NAME) - >> [finngen_summary_stats_preprocess, finngen_finemapping_ingestion] - >> common.delete_cluster(CLUSTER_NAME) - ) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 90160e962..2d0cf5b8e 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -123,26 +123,38 @@ class EqtlCatalogueConfig(StepConfig): class FinngenStudiesConfig(StepConfig): """FinnGen study index step configuration.""" + session: Any = field( + default_factory=lambda: { + "start_hail": True, + } + ) finngen_study_index_out: str = MISSING + finngen_phenotype_table_url: str = "https://r11.finngen.fi/api/phenos" + finngen_release_prefix: str = "FINNGEN_R11_" + finngen_summary_stats_url_prefix: str = ( + "gs://finngen-public-data-r11/summary_stats/finngen_R11_" + ) + finngen_summary_stats_url_suffix: str = ".gz" + efo_curation_mapping_url: str = "https://raw.githubusercontent.com/opentargets/curation/24.09.1/mappings/disease/manual_string.tsv" _target_: str = "gentropy.finngen_studies.FinnGenStudiesStep" -@dataclass -class FinngenSumstatPreprocessConfig(StepConfig): - """FinnGen study index step configuration.""" - - raw_sumstats_path: str = MISSING - out_sumstats_path: str = MISSING - _target_: str = "gentropy.finngen_sumstat_preprocess.FinnGenSumstatPreprocessStep" - - @dataclass class FinngenFinemappingConfig(StepConfig): """FinnGen fine mapping ingestion step configuration.""" - finngen_finemapping_results_path: str = MISSING - finngen_finemapping_summaries_path: str = MISSING - finngen_release_prefix: str = MISSING + session: Any = field( + default_factory=lambda: { + "start_hail": True, + } + ) + finngen_susie_finemapping_snp_files: str = ( + "gs://finngen-public-data-r11/finemap/full/susie/*.snp.bgz" + ) + finngen_susie_finemapping_cs_summary_files: str = ( + "gs://finngen-public-data-r11/finemap/summary/*.cred.summary.tsv" + ) + finngen_release_prefix: str = "FINNGEN_R11_" finngen_finemapping_out: str = MISSING _target_: str = ( "gentropy.finngen_finemapping_ingestion.FinnGenFinemappingIngestionStep" @@ -510,11 +522,6 @@ def register_config() -> None: cs.store(group="step", name="ld_index", node=LDIndexConfig) cs.store(group="step", name="locus_to_gene", node=LocusToGeneConfig) cs.store(group="step", name="finngen_studies", node=FinngenStudiesConfig) - cs.store( - group="step", - name="finngen_sumstat_preprocess", - node=FinngenSumstatPreprocessConfig, - ) cs.store( group="step", diff --git a/src/gentropy/datasource/finngen/finemapping.py b/src/gentropy/datasource/finngen/finemapping.py index a31ddf511..340b1d3c9 100644 --- a/src/gentropy/datasource/finngen/finemapping.py +++ b/src/gentropy/datasource/finngen/finemapping.py @@ -5,6 +5,7 @@ from dataclasses import dataclass +import hail as hl import pyspark.sql.functions as f import pyspark.sql.types as t from pyspark.sql import SparkSession, Window @@ -21,7 +22,7 @@ class FinnGenFinemapping: Credible sets from SuSIE are extracted and transformed into StudyLocus objects: - - Study ID in the special format (e.g. FINNGEN_R10_*) + - Study ID in the special format (e.g. FINNGEN_R11*) - Credible set specific finemapping statistics (e.g. LogBayesFactors, Alphas/Posterior) - Additional credible set level BayesFactor filtering is applied (LBF > 2) - StudyLocusId is annotated for each credible set. @@ -109,34 +110,189 @@ class FinnGenFinemapping: ] ) + raw_hail_shema: hl.tstruct = hl.tstruct( + trait=hl.tstr, + region=hl.tstr, + v=hl.tstr, + rsid=hl.tstr, + chromosome=hl.tstr, + position=hl.tstr, + allele1=hl.tstr, + allele2=hl.tstr, + maf=hl.tstr, + beta=hl.tstr, + se=hl.tstr, + p=hl.tstr, + mean=hl.tstr, + sd=hl.tstr, + prob=hl.tstr, + cs=hl.tstr, + cs_specific_prob=hl.tfloat64, + low_purity=hl.tstr, + lead_r2=hl.tstr, + mean_99=hl.tstr, + sd_99=hl.tstr, + prob_99=hl.tstr, + cs_99=hl.tstr, + cs_specific_prob_99=hl.tstr, + low_purity_99=hl.tstr, + lead_r2_99=hl.tstr, + alpha1=hl.tfloat64, + alpha2=hl.tfloat64, + alpha3=hl.tfloat64, + alpha4=hl.tfloat64, + alpha5=hl.tfloat64, + alpha6=hl.tfloat64, + alpha7=hl.tfloat64, + alpha8=hl.tfloat64, + alpha9=hl.tfloat64, + alpha10=hl.tfloat64, + mean1=hl.tstr, + mean2=hl.tstr, + mean3=hl.tstr, + mean4=hl.tstr, + mean5=hl.tstr, + mean6=hl.tstr, + mean7=hl.tstr, + mean8=hl.tstr, + mean9=hl.tstr, + mean10=hl.tstr, + sd1=hl.tstr, + sd2=hl.tstr, + sd3=hl.tstr, + sd4=hl.tstr, + sd5=hl.tstr, + sd6=hl.tstr, + sd7=hl.tstr, + sd8=hl.tstr, + sd9=hl.tstr, + sd10=hl.tstr, + lbf_variable1=hl.tfloat64, + lbf_variable2=hl.tfloat64, + lbf_variable3=hl.tfloat64, + lbf_variable4=hl.tfloat64, + lbf_variable5=hl.tfloat64, + lbf_variable6=hl.tfloat64, + lbf_variable7=hl.tfloat64, + lbf_variable8=hl.tfloat64, + lbf_variable9=hl.tfloat64, + lbf_variable10=hl.tfloat64, + ) + + summary_hail_schema: hl.tstruct = hl.tstruct( + trait=hl.tstr, + region=hl.tstr, + cs=hl.tstr, + cs_log10bf=hl.tfloat64, + ) + + @staticmethod + def _infer_block_gzip_compression(paths: str | list[str]) -> bool: + """Naively infer compression type based on the file extension. + + Args: + paths (str | list[str]): File path(s). + + Returns: + bool: True if block gzipped, False otherwise. + """ + if isinstance(paths, str): + return paths.endswith(".bgz") + return all(path.endswith(".bgz") for path in paths) + @classmethod def from_finngen_susie_finemapping( cls: type[FinnGenFinemapping], spark: SparkSession, - finngen_finemapping_df: (str | list[str]), - finngen_finemapping_summaries: (str | list[str]), + finngen_susie_finemapping_snp_files: (str | list[str]), + finngen_susie_finemapping_cs_summary_files: (str | list[str]), finngen_release_prefix: str, credset_lbf_threshold: float = 0.8685889638065036, ) -> StudyLocus: """Process the SuSIE finemapping output for FinnGen studies. + The finngen_susue_finemapping_snp_files are files that contain variant summaries with credible set information with following shema: + - trait: phenotype + - region: region for which the fine-mapping was run. + - v, rsid: variant ids + - chromosome + - position + - allele1 + - allele2 + - maf: minor allele frequency + - beta: original marginal beta + - se: original se + - p: original p + - mean: posterior mean beta after fine-mapping + - sd: posterior standard deviation after fine-mapping. + - prob: posterior inclusion probability + - cs: credible set index within region + - lead_r2: r2 value to a lead variant (the one with maximum PIP) in a credible set + - alphax: posterior inclusion probability for the x-th single effect (x := 1..L where L is the number of single effects (causal variants) specified; default: L = 10). + - lbfx: log-Bayes Factor for each variable and single effect (i.e credible set). + - meanx: posterior mean for each variable and single effect (i.e credible set). + - sdx: posterior sd of mean for each variable and single effect (i.e credible set). + As for r11 finngen release these files are ingested from `https://console.cloud.google.com/storage/browser/finngen-public-data-r11/finemap/full/susie/` by + - *.snp.bgz + - *.snp.bgz.tbi + Each file contains index (.tbi) file that is required to read the block gzipped compressed snp file. These files needs to be + downloaded, transfromed from block gzipped to plain gzipped and then uploaded to the storage bucket, before they can be read by spark or read by hail directly as import table. + + The finngen_susie_finemapping_cs_summary_files are files that Contains credible set summaries from SuSiE fine-mapping for all genome-wide significant regions with following schema: + - trait: phenotype + - region: region for which the fine-mapping was run. + - cs: running number for independent credible sets in a region + - cs_log10bf: Log10 bayes factor of comparing the solution of this model (cs independent credible sets) to cs -1 credible sets + - cs_avg_r2: Average correlation R2 between variants in the credible set + - cs_min_r2: minimum r2 between variants in the credible set + - low_purity: boolean (TRUE,FALSE) indicator if the CS is low purity (low min r2) + - cs_size: how many snps does this credible set contain + - good_cs: boolean (TRUE,FALSE) indicator if this CS is considered reliable. IF this is FALSE then top variant reported for the CS will be chosen based on minimum p-value in the credible set, otherwise top variant is chosen by maximum PIP + - cs_id: + - v: top variant (chr:pos:ref:alt) + - p: top variant p-value + - beta: top variant beta + - sd: top variant standard deviation + - prob: overall PIP of the variant in the region + - cs_specific_prob: PIP of the variant in the current credible set (this and previous are typically almost identical) + - 0..n: configured annotation columns. Typical default most_severe,gene_most_severe giving consequence and gene of top variant + These files needs to be downloaded from the `https://console.cloud.google.com/storage/browser/finngen-public-data-r11/finemap/summary/` by *.cred.summary.tsv pattern, + Args: - spark (SparkSession): Spark session object. - finngen_finemapping_df (str | list[str]): SuSIE finemapping output filename(s). - finngen_finemapping_summaries (str | list[str]): filename of SuSIE finemapping summaries. + spark (SparkSession): SparkSession object. + finngen_susie_finemapping_snp_files (str | list[str]): SuSIE finemapping output filename(s). + finngen_susie_finemapping_cs_summary_files (str | list[str]): filename of SuSIE finemapping credible set summaries. finngen_release_prefix (str): FinnGen study prefix. credset_lbf_threshold (float, optional): Filter out credible sets below, Default 0.8685889638065036 == np.log10(np.exp(2)), this is threshold from publication. Returns: StudyLocus: Processed SuSIE finemapping output in StudyLocus format. """ + # NOTE: hail allows for importing block gzipped files, spark does not without external libraries. + # check https://github.com/projectglow/glow/blob/36bf6121fbc4ccc33a13b028deb87b63faeba7a9/core/src/main/scala/io/projectglow/vcf/VCFFileFormat.scala#L274 + # how it could be implemented with spark. + bgzip_compressed_snps = cls._infer_block_gzip_compression( + finngen_susie_finemapping_snp_files + ) + + # NOTE: fallback to spark read if not block gzipped file in the input + if bgzip_compressed_snps: + snps_df = hl.import_table( + finngen_susie_finemapping_snp_files, + delimiter="\t", + types=cls.raw_hail_shema, + ).to_spark() + else: + snps_df = ( + spark.read.schema(cls.raw_schema) + .option("delimiter", "\t") + .option("compression", "gzip") + .csv(finngen_susie_finemapping_snp_files, header=True) + ) + processed_finngen_finemapping_df = ( - spark.read.schema(cls.raw_schema) - .option("delimiter", "\t") - .option("compression", "gzip") - .csv(finngen_finemapping_df, header=True) # Drop rows which don't have proper position. - .filter(f.col("position").cast(t.IntegerType()).isNotNull()) + snps_df.filter(f.col("position").cast(t.IntegerType()).isNotNull()) # Drop non credible set SNPs: .filter(f.col("cs").cast(t.IntegerType()) > 0) .select( @@ -222,14 +378,31 @@ def from_finngen_susie_finemapping( ) ) + bgzip_compressed_cs_summaries = cls._infer_block_gzip_compression( + finngen_susie_finemapping_cs_summary_files + ) + + # NOTE: fallback to spark read if not block gzipped file in the input + # in case we want to use the raw files from the + # https://console.cloud.google.com/storage/browser/finngen-public-data-r11/finemap/full/susie/*.cred.gz + if bgzip_compressed_cs_summaries: + cs_summary_df = hl.import_table( + finngen_susie_finemapping_cs_summary_files, + delimiter="\t", + types=cls.summary_hail_schema, + ).to_spark() + else: + cs_summary_df = ( + spark.read.schema(cls.summary_schema) + .option("delimiter", "\t") + .csv(finngen_susie_finemapping_cs_summary_files, header=True) + ) + # drop credible sets where logbf > 2. Except when there's only one credible set in region: # 0.8685889638065036 corresponds to np.log10(np.exp(2)), to match the orginal threshold in publication. finngen_finemapping_summaries_df = ( # Read credible set level lbf, it is output as a different file which is not ideal. - spark.read.schema(cls.summary_schema) - .option("delimiter", "\t") - .csv(finngen_finemapping_summaries, header=True) - .select( + cs_summary_df.select( f.col("region"), f.col("trait"), f.col("cs").cast("integer").alias("credibleSetIndex"), diff --git a/src/gentropy/datasource/finngen/study_index.py b/src/gentropy/datasource/finngen/study_index.py index 872c25dee..71dce10d4 100644 --- a/src/gentropy/datasource/finngen/study_index.py +++ b/src/gentropy/datasource/finngen/study_index.py @@ -2,11 +2,13 @@ from __future__ import annotations +import re from urllib.request import urlopen import pyspark.sql.functions as f -from pyspark.sql import SparkSession +from pyspark.sql import DataFrame, SparkSession +from gentropy.config import FinngenStudiesConfig from gentropy.dataset.study_index import StudyIndex @@ -15,35 +17,89 @@ class FinnGenStudyIndex: The following information is aggregated/extracted: - - Study ID in the special format (e.g. FINNGEN_R10_*) + - Study ID in the special format (e.g. FINNGEN_R11_*) - Trait name (for example, Amoebiasis) - Number of cases and controls - Link to the summary statistics location + - EFO mapping from curated EFO mapping file Some fields are also populated as constants, such as study type and the initial sample size. """ - finngen_phenotype_table_url: str = "https://r10.finngen.fi/api/phenos" - finngen_release_prefix: str = "FINNGEN_R10" - finngen_summary_stats_url_prefix: str = ( - "gs://finngen-public-data-r10/summary_stats/finngen_R10_" - ) - finngen_summary_stats_url_suffix: str = ".gz" + @staticmethod + def join_efo_mapping( + study_index: StudyIndex, + efo_curation_mapping: DataFrame, + finngen_release_prefix: str = FinngenStudiesConfig().finngen_release_prefix, + ) -> StudyIndex: + """Add EFO mapping to the Finngen study index table. + + This function performs inner join on table of EFO mappings to the study index table by trait name. + All studies without EFO traits are dropped. The EFO mappings are then aggregated into lists per + studyId. + + Args: + study_index (StudyIndex): Study index table. + efo_curation_mapping (DataFrame): Dataframe with EFO mappings. + finngen_release_prefix (str): FinnGen release prefix. + + Returns: + StudyIndex: Study index table with added EFO mappings. + + Raises: + ValueError: when incorrect release prefix is provided. + """ + finngen_release_prefix_regex = re.compile(r"FINNGEN_(?PR\d+){1}_?") + finngen_release_prefix_match = finngen_release_prefix_regex.match( + finngen_release_prefix + ) + if not finngen_release_prefix_match: + raise ValueError( + f"Invalid FinnGen release prefix: {finngen_release_prefix}, use the format FINNGEN_R*_" + ) + finngen_release = finngen_release_prefix_match.group("release").upper() + + efo_mappings = ( + efo_curation_mapping.withColumn("STUDY", f.upper(f.col("STUDY"))) + .filter(f.col("STUDY").contains("FINNGEN")) + .filter(f.upper(f.col("STUDY")).contains(finngen_release)) + .select( + f.regexp_replace(f.col("SEMANTIC_TAG"), r"^.*/", "").alias( + "traitFromSourceMappedId" + ), + f.col("PROPERTY_VALUE").alias("traitFromSource"), + ) + ) + # NOTE: inner join to keep only the studies with EFO mappings + si_df = study_index.df.join(efo_mappings, on="traitFromSource", how="inner") + common_cols = [c for c in si_df.columns if c != "traitFromSourceMappedId"] + si_df = si_df.groupby(common_cols).agg( + f.collect_list("traitFromSourceMappedId").alias("traitFromSourceMappedIds") + ) + return StudyIndex(_df=si_df, _schema=StudyIndex.get_schema()) @classmethod def from_source( cls: type[FinnGenStudyIndex], spark: SparkSession, + finngen_phenotype_table_url: str = FinngenStudiesConfig().finngen_phenotype_table_url, + finngen_release_prefix: str = FinngenStudiesConfig().finngen_release_prefix, + finngen_summary_stats_url_prefix: str = FinngenStudiesConfig().finngen_summary_stats_url_prefix, + finngen_summary_stats_url_suffix: str = FinngenStudiesConfig().finngen_summary_stats_url_suffix, ) -> StudyIndex: """This function ingests study level metadata from FinnGen. Args: spark (SparkSession): Spark session object. + finngen_phenotype_table_url (str): URL to the FinnGen phenotype table. + finngen_release_prefix (str): FinnGen release prefix. + finngen_summary_stats_url_prefix (str): FinnGen summary stats URL prefix. + finngen_summary_stats_url_suffix (str): FinnGen summary stats URL suffix. Returns: StudyIndex: Parsed and annotated FinnGen study table. """ - json_data = urlopen(cls.finngen_phenotype_table_url).read().decode("utf-8") + json_data = urlopen(finngen_phenotype_table_url).read().decode("utf-8") rdd = spark.sparkContext.parallelize([json_data]) raw_df = spark.read.json(rdd) return StudyIndex( @@ -55,7 +111,7 @@ def from_source( (f.col("num_cases") + f.col("num_controls")) .cast("integer") .alias("nSamples"), - f.lit(cls.finngen_release_prefix).alias("projectId"), + f.lit(finngen_release_prefix).alias("projectId"), f.lit("gwas").alias("studyType"), f.lit(True).alias("hasSumstats"), f.lit("377,277 (210,870 females and 166,407 males)").alias( @@ -70,9 +126,9 @@ def from_source( # Cohort label is consistent with GWAS Catalog curation. f.array(f.lit("FinnGen")).alias("cohorts"), f.concat( - f.lit(cls.finngen_summary_stats_url_prefix), + f.lit(finngen_summary_stats_url_prefix), f.col("phenocode"), - f.lit(cls.finngen_summary_stats_url_suffix), + f.lit(finngen_summary_stats_url_suffix), ).alias("summarystatsLocation"), ).withColumn( "ldPopulationStructure", diff --git a/src/gentropy/finngen_finemapping_ingestion.py b/src/gentropy/finngen_finemapping_ingestion.py index d70316b5e..e85508023 100644 --- a/src/gentropy/finngen_finemapping_ingestion.py +++ b/src/gentropy/finngen_finemapping_ingestion.py @@ -6,6 +6,7 @@ from dataclasses import dataclass from gentropy.common.session import Session +from gentropy.config import FinngenFinemappingConfig from gentropy.datasource.finngen.finemapping import FinnGenFinemapping @@ -16,26 +17,26 @@ class FinnGenFinemappingIngestionStep(FinnGenFinemapping): def __init__( self, session: Session, - finngen_finemapping_results_path: str, - finngen_finemapping_summaries_path: str, - finngen_release_prefix: str, finngen_finemapping_out: str, + finngen_susie_finemapping_snp_files: str = FinngenFinemappingConfig().finngen_susie_finemapping_snp_files, + finngen_susie_finemapping_cs_summary_files: str = FinngenFinemappingConfig().finngen_susie_finemapping_cs_summary_files, + finngen_release_prefix: str = FinngenFinemappingConfig().finngen_release_prefix, ) -> None: """Run FinnGen finemapping ingestion step. Args: session (Session): Session object. - finngen_finemapping_results_path (str): Path to the FinnGen SuSIE finemapping results. - finngen_finemapping_summaries_path (str): FinnGen SuSIE summaries for CS filters(LBF>2). - finngen_release_prefix (str): Release prefix for FinnGen. finngen_finemapping_out (str): Output path for the finemapping results in StudyLocus format. + finngen_susie_finemapping_snp_files(str): Path to the FinnGen SuSIE finemapping results. + finngen_susie_finemapping_cs_summary_files (str): FinnGen SuSIE summaries for CS filters(LBF>2). + finngen_release_prefix (str): Release prefix for FinnGen. """ # Read finemapping outputs from the input paths. finngen_finemapping_df = FinnGenFinemapping.from_finngen_susie_finemapping( spark=session.spark, - finngen_finemapping_df=finngen_finemapping_results_path, - finngen_finemapping_summaries=finngen_finemapping_summaries_path, + finngen_susie_finemapping_snp_files=finngen_susie_finemapping_snp_files, + finngen_susie_finemapping_cs_summary_files=finngen_susie_finemapping_cs_summary_files, finngen_release_prefix=finngen_release_prefix, ) diff --git a/src/gentropy/finngen_studies.py b/src/gentropy/finngen_studies.py index c1869b736..706fa3d39 100644 --- a/src/gentropy/finngen_studies.py +++ b/src/gentropy/finngen_studies.py @@ -2,21 +2,56 @@ from __future__ import annotations +from urllib.request import urlopen + from gentropy.common.session import Session +from gentropy.config import FinngenStudiesConfig from gentropy.datasource.finngen.study_index import FinnGenStudyIndex class FinnGenStudiesStep: """FinnGen study index generation step.""" - def __init__(self, session: Session, finngen_study_index_out: str) -> None: + def __init__( + self, + session: Session, + finngen_study_index_out: str, + finngen_phenotype_table_url: str = FinngenStudiesConfig().finngen_phenotype_table_url, + finngen_release_prefix: str = FinngenStudiesConfig().finngen_release_prefix, + finngen_summary_stats_url_prefix: str = FinngenStudiesConfig().finngen_summary_stats_url_prefix, + finngen_summary_stats_url_suffix: str = FinngenStudiesConfig().finngen_summary_stats_url_suffix, + efo_curation_mapping_url: str = FinngenStudiesConfig().efo_curation_mapping_url, + ) -> None: """Run FinnGen study index generation step. Args: session (Session): Session object. finngen_study_index_out (str): Output FinnGen study index path. + finngen_phenotype_table_url (str): URL to the FinnGen phenotype table. + finngen_release_prefix (str): FinnGen release prefix. + finngen_summary_stats_url_prefix (str): FinnGen summary stats URL prefix. + finngen_summary_stats_url_suffix (str): FinnGen summary stats URL suffix. + efo_curation_mapping_url (str): URL to the EFO curation mapping file """ - # Fetch study index. - FinnGenStudyIndex.from_source(session.spark).df.write.mode( - session.write_mode - ).parquet(finngen_study_index_out) + study_index = FinnGenStudyIndex.from_source( + session.spark, + finngen_phenotype_table_url, + finngen_release_prefix, + finngen_summary_stats_url_prefix, + finngen_summary_stats_url_suffix, + ) + + # NOTE: hack to allow spark to read directly from the URL. + csv_data = urlopen(efo_curation_mapping_url).readlines() + csv_rows = [row.decode("utf8") for row in csv_data] + rdd = session.spark.sparkContext.parallelize(csv_rows) + efo_curation_mapping = session.spark.read.csv(rdd, header=True, sep="\t") + + study_index_with_efo = FinnGenStudyIndex.join_efo_mapping( + study_index, + efo_curation_mapping, + finngen_release_prefix, + ) + study_index_with_efo.df.write.mode(session.write_mode).parquet( + finngen_study_index_out + ) diff --git a/tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp b/tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp new file mode 100644 index 000000000..052399388 --- /dev/null +++ b/tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp @@ -0,0 +1,90 @@ +trait region v rsid chromosome position allele1 allele2 maf beta se p mean sd prob cs cs_specific_prob low_purity lead_r2 mean_99 sd_99 prob_99 cs_99 cs_specific_prob_99 low_purity_99 lead_r2_99 alpha1 alpha2 alpha3 alpha4 alpha5 alpha6 alpha7 alpha8 alpha9 alpha10 mean1 mean2 mean3 mean4 mean5 mean6 mean7 mean8 mean9 mean10 sd1 sd2 sd3 sd4 sd5 sd6 sd7 sd8 sd9 sd10 lbf_variable1 lbf_variable2 lbf_variable3 lbf_variable4 lbf_variable5 lbf_variable6 lbf_variable7 lbf_variable8 lbf_variable9 lbf_variable10 +AB1_EBV chr6:1412516-4412516 6:2898066:G:T chr6_2898066_G_T chr6 2898066 G T 0.413765 0.192277 0.0298155 1.12676e-10 0.0329796138322914 0.0726322608911893 0.175343223649957 1 0.17509358936193 0 0.829216035769 0.0329796138322914 0.0726322608911893 0.175343223649957 1 0.17509358936193 0 0.829216035769 0.17509358936193 1.43993284524406e-05 3.60778561866872e-05 3.60648003884279e-05 3.60463349785855e-05 3.60278897859947e-05 3.60134896639449e-05 3.60060022461947e-05 3.60073664773458e-05 3.60187793017426e-05 0.0329766994994914 2.57161576786077e-07 3.31357531479788e-07 3.31581920897055e-07 3.31896507403073e-07 3.32212185390434e-07 3.32463786669888e-07 3.32605524722194e-07 3.32606528595316e-07 3.3244723805964e-07 0.0726311826236639 0.000126488575709164 0.000132387886331515 0.000132445000681512 0.000132525304968958 0.00013260558503703 0.000132668849958987 0.000132703150483526 0.000132700432413816 0.000132655610635964 18.5528693035569 -0.907607419163184 -0.195319530537169 -0.195641777295255 -0.196097663493068 -0.196552998035164 -0.196908246717843 -0.197092464695888 -0.197057687849098 -0.196774385008403 +AB1_EBV chr6:1412516-4412516 6:2898362:A:G chr6_2898362_A_G chr6 2898362 A G 0.443219 0.194397 0.0300071 9.27257e-11 0.0241861868176693 0.0638457314593366 0.128984802300941 1 0.128501831150163 0 0.468032488641 0.0241861868176693 0.0638457314593366 0.128984802300941 1 0.128501831150163 0 0.468032488641 0.128501831150163 4.5718571259867e-05 6.35067335181723e-05 6.35268204571264e-05 6.35549284123062e-05 6.35828685460672e-05 6.36046221942562e-05 6.36159391721562e-05 6.36139574209168e-05 6.35968920726836e-05 0.0241721312923613 2.13319750249321e-06 1.48299548305333e-06 1.48511049788324e-06 1.48808727845368e-06 1.49105919339296e-06 1.49338558929439e-06 1.49461138816016e-06 1.49442930680649e-06 1.49264906846324e-06 0.0638408505215491 0.000369070254874925 0.000245869167457302 0.000246119260779774 0.000246471397738596 0.000246822988506741 0.000247098297537043 0.000247243598398107 0.000247222620317828 0.000247012843232762 18.2434918306617 0.247715599243637 0.370147134295912 0.37050307766634 0.371001689249645 0.371497718747115 0.371884315841652 0.372085936546344 0.372051672829573 0.371749767645852 +AB1_EBV chr6:1412516-4412516 6:2898390:T:C chr6_2898390_T_C chr6 2898390 T C 0.0759868 0.063491 0.056021 0.25707 4.31960345414416e-05 0.00196441267952157 0.00075046097081477 -1 NA NA NA 4.31960345414416e-05 0.00196441267952157 0.00075046097081477 -1 NA NA NA 3.93838174054942e-10 8.09816386834445e-05 8.35285041586315e-05 8.358390349547e-05 8.36617711445916e-05 8.37390173140435e-05 8.37985885393119e-05 8.38283740382775e-05 8.38201989302556e-05 8.37690030828716e-05 2.21782417886126e-11 8.40987313953194e-06 4.32181362915771e-06 4.32953616230941e-06 4.34041881168691e-06 4.35126272348911e-06 4.35969622337004e-06 4.36402915653852e-06 4.36311925498564e-06 4.35626326213054e-06 1.56709910069891e-06 0.00104868422604798 0.000584775010658085 0.000585504251701282 0.000586531786129352 0.000587555924454652 0.000588353554304855 0.000588766075105534 0.00058868635013728 0.000588048090257165 -1.35977706153615 0.819433456645043 0.644189135136349 0.644891851168843 0.645879279429021 0.646858672726141 0.647614336265917 0.647993424205583 0.647892786053872 0.647248212724249 +AB1_EBV chr6:1412516-4412516 6:2898544:G:A chr6_2898544_G_A chr6 2898544 G A 0.000162658 -1.06485 1.263 0.399165 -0.000142307972554879 0.0174158850922084 0.000311180806728872 -1 NA NA NA -0.000142307972554879 0.0174158850922084 0.000311180806728872 -1 NA NA NA 3.3135651989527e-10 1.52689507421236e-05 3.70370646643062e-05 3.70247425398274e-05 3.70073367344066e-05 3.69899109471804e-05 3.69762228369255e-05 3.69689522016209e-05 3.69699116314651e-05 3.6980218847936e-05 -3.43352664772061e-10 -1.31256485573238e-05 -1.61079999492037e-05 -1.6119506258209e-05 -1.6135711269742e-05 -1.61518735029245e-05 -1.61645243654611e-05 -1.61712251774856e-05 -1.61703323205355e-05 -1.6160807801329e-05 2.95278730463148e-05 0.00574326803029291 0.00580365618281197 0.00580635729320344 0.00581016191629574 0.00581395759104792 0.00581692952751578 0.00581850423125729 0.00581829515872435 0.00581605883240119 -1.53252226561108 -0.848967586219725 -0.169079646723388 -0.169372702017943 -0.169786674000275 -0.17020115447332 -0.170526747831395 -0.170699687623306 -0.170676847074374 -0.17043169077503 +AB1_EBV chr6:1412516-4412516 6:2898634:A:G chr6_2898634_A_G chr6 2898634 A G 0.00990053 -0.0578846 0.155065 0.708929 3.36036046428797e-06 0.00177237672153217 0.00027298259335895 -1 NA NA NA 3.36036046428797e-06 0.00177237672153217 0.00027298259335895 -1 NA NA NA 2.45728985394081e-10 1.18511459605607e-05 3.26983764410425e-05 3.26831287402668e-05 3.26616014503584e-05 3.26400678296922e-05 3.26231695999668e-05 3.26142077268511e-05 3.26154135009179e-05 3.26281696699098e-05 -1.13844933399768e-11 1.98687927513989e-07 3.94710720255768e-07 3.94852414729217e-07 3.95050128659409e-07 3.95248276157307e-07 3.95406902746122e-07 3.95498447227266e-07 3.95504388775756e-07 3.95412642716477e-07 2.50933551331188e-06 0.000507185455541354 0.000599816995638025 0.00059999301326225 0.000600240532874255 0.00060048710965031 0.000600680011385158 0.000600782359514579 0.000600769347073544 0.000600625192283742 -1.83148791747326 -1.10236442097249 -0.29367337908761 -0.294100104367622 -0.294702736769016 -0.295305744621835 -0.295779068020591 -0.29603010504238 -0.295996246331836 -0.295638819167599 +AB1_EBV chr6:1412516-4412516 6:2898644:T:C chr6_2898644_T_C chr6 2898644 T C 0.118276 0.0617586 0.0458201 0.177706 0.000125511810900469 0.0034159962920344 0.00182190717924902 -1 NA NA NA 0.000125511810900469 0.0034159962920344 0.00182190717924902 -1 NA NA NA 4.99625110164512e-10 0.000374713090516501 0.000180149143340477 0.000180420738438503 0.000180804029116973 0.000181186404788696 0.000181484208566077 0.000181637677334594 0.000181606348560438 0.000181365328632569 2.78040407063015e-11 4.2635373091066e-05 1.02671877785273e-05 1.02939763337702e-05 1.03318266007943e-05 1.03696614548733e-05 1.03992204262546e-05 1.04145817520178e-05 1.04117330753764e-05 1.03882225837485e-05 1.60359727996177e-06 0.00235589933186244 0.000869385262846467 0.000870883775241722 0.000872998948126608 0.000875111357562359 0.000876761378346027 0.000877621128059248 0.000877468732854278 0.000876167427429173 -1.12185912318842 2.35137165177169 1.41278627628876 1.41433245064316 1.41651087554207 1.41868000920298 1.42036681690019 1.4212358006416 1.42106019484617 1.4196985525637 +AB1_EBV chr6:1412516-4412516 6:2898830:A:G chr6_2898830_A_G chr6 2898830 A G 0.000489288 -0.517711 0.741123 0.484834 -7.05337249388736e-05 0.00977642959411629 0.000301195614534455 -1 NA NA NA -7.05337249388736e-05 0.00977642959411629 0.000301195614534455 -1 NA NA NA 2.97764493146971e-10 1.43401826588032e-05 3.59071858643746e-05 3.58941405813954e-05 3.58757109199825e-05 3.58572625522856e-05 3.5842773584086e-05 3.58350787098728e-05 3.58360952711092e-05 3.584700788488e-05 -1.50380125009478e-10 -6.30023739860649e-06 -8.01001018220035e-06 -8.01556149521334e-06 -8.02337146677288e-06 -8.03115738911102e-06 -8.03725003519927e-06 -8.04047655572757e-06 -8.04004705958623e-06 -8.03546297633142e-06 1.53506655570811e-05 0.00312826418533324 0.00326989814640512 0.00327130492779428 0.00327328536956076 0.00327526049902121 0.00327680659330808 0.0032776256749774 0.00327751699993004 0.00327635402345175 -1.63941427810691 -0.911723416607235 -0.200061366819909 -0.20038504129272 -0.200842366638393 -0.2013002246487 -0.201659854812704 -0.201850852419317 -0.201825596480631 -0.201554733229527 +AB1_EBV chr6:1412516-4412516 6:2898852:T:C chr6_2898852_T_C chr6 2898852 T C 0.0024569 -0.0135518 0.324193 0.966657 -1.90309295779787e-05 0.00392799672003962 0.000283694464996431 -1 NA NA NA -1.90309295779787e-05 0.00392799672003962 0.000283694464996431 -1 NA NA NA 2.34840617180525e-10 1.28335944565251e-05 3.39111799170108e-05 3.38970129876025e-05 3.38770069576382e-05 3.3856987559621e-05 3.3841267667435e-05 3.38329164730246e-05 3.38340098433433e-05 3.38458341753585e-05 -2.09651795148505e-12 -1.63067161461712e-06 -2.16968783989117e-06 -2.17123876945852e-06 -2.17342034713896e-06 -2.17559432396506e-06 -2.17729339929383e-06 -2.17818917940871e-06 -2.17806108789175e-06 -2.17677091979559e-06 4.91110635777357e-06 0.0011870206897283 0.00132217297406661 0.001322651864879 0.00132332577835033 0.00132399749839758 0.00132452293088544 0.00132480093897028 0.00132476346769777 0.00132436755008538 -1.87681010143231 -1.02272268973018 -0.257254054557634 -0.257632210418853 -0.258166333679878 -0.25870094760849 -0.259120833215976 -0.259343929571735 -0.259314724759633 -0.258998910137287 +AB1_EBV chr6:1412516-4412516 6:2899158:A:C chr6_2899158_A_C chr6 2899158 A C 0.0385391 -0.0845606 0.0783558 0.280505 -1.06305709374504e-06 0.000890585228214677 0.000271879605582837 -1 NA NA NA -1.06305709374504e-06 0.000890585228214677 0.000271879605582837 -1 NA NA NA 3.89971767959007e-10 1.18853024263449e-05 3.25560426474199e-05 3.25408385949403e-05 3.25193782421875e-05 3.2497910462905e-05 3.24810583812967e-05 3.24721091331244e-05 3.24732843530629e-05 3.24859607402584e-05 -3.04216196484357e-11 -1.20773208732576e-07 -1.17467876151092e-07 -1.17558562261101e-07 -1.17689048444265e-07 -1.17819396074175e-07 -1.1792032044286e-07 -1.17971037817629e-07 -1.17957314252656e-07 -1.17869907949035e-07 2.17049329642282e-06 0.000257383025890685 0.000301128541857739 0.000301216604637751 0.000301340511307874 0.000301463928398313 0.000301560402178935 0.000301611424323912 0.000301604540952648 0.000301531828463628 -1.36964281405956 -1.09948644286867 -0.298035812202123 -0.298463235801365 -0.299066691853802 -0.299670559011449 -0.300144727480335 -0.300396577946486 -0.300363498356908 -0.300006815940666 +AB1_EBV chr6:1412516-4412516 6:2899177:C:T chr6_2899177_C_T chr6 2899177 C T 0.00149376 0.210318 0.395069 0.594478 1.1087286785856e-05 0.00454796513625276 0.000274177154775179 -1 NA NA NA 1.1087286785856e-05 0.00454796513625276 0.000274177154775179 -1 NA NA NA 2.67372961757553e-10 1.20357475324825e-05 3.28237946821668e-05 3.28087983026664e-05 3.27876271505826e-05 3.27664482961326e-05 3.27498248649085e-05 3.27410018608325e-05 3.27421727690368e-05 3.27546968813631e-05 5.32485539737573e-11 9.41836094528548e-07 1.26508166244442e-06 1.26597518122759e-06 1.26723377472505e-06 1.268490108755e-06 1.26947525977421e-06 1.26999997624907e-06 1.26993668153731e-06 1.26920479806084e-06 7.16606946438526e-06 0.0013219318002017 0.00153689317083669 0.00153736560443222 0.00153803021717581 0.00153869238437754 0.00153921027711139 0.00153948458599627 0.00153944848834106 0.00153905955204809 -1.74707261650093 -1.0869078068944 -0.289845105629987 -0.29026238810814 -0.29085163390118 -0.291441278779356 -0.291904213443387 -0.292149946326287 -0.292117295645589 -0.291768466701535 +AB1_EBV chr6:1412516-4412516 6:2899240:C:G chr6_2899240_C_G chr6 2899240 C G 0.0666566 -0.0741942 0.0595902 0.213105 -2.35036062319914e-06 0.00069660721485727 0.000277034829564426 -1 NA NA NA -2.35036062319914e-06 0.00069660721485727 0.000277034829564426 -1 NA NA NA 5.29478798776676e-10 1.2187859861444e-05 3.31625006174758e-05 3.31473453882313e-05 3.31259462375983e-05 3.31045519310923e-05 3.30877873025634e-05 3.30789417366297e-05 3.30802378336914e-05 3.30930509408374e-05 -3.97653759915523e-11 -1.80278726260054e-07 -2.7087361447815e-07 -2.70978473752448e-07 -2.71125234085206e-07 -2.71274104665597e-07 -2.71396403440742e-07 -2.71472184299646e-07 -2.71487842059741e-07 -2.71434274781564e-07 2.19598734227066e-06 0.000203049208153187 0.000235341514858765 0.000235413753819564 0.000235515338999907 0.000235616680522203 0.000235696231531389 0.000235738912010994 0.000235734565442074 0.000235676718860371 -1.06382403655627 -1.07434862642961 -0.279579067553628 -0.279996473824015 -0.28058600754497 -0.281175559326416 -0.281637577243573 -0.281881239704408 -0.281845169961472 -0.281491515746687 +AB1_EBV chr6:1412516-4412516 6:2899287:G:A chr6_2899287_G_A chr6 2899287 G A 0.000973672 0.986415 0.535162 0.065298 0.000205637050101746 0.0126007782851599 0.000497212565959249 -1 NA NA NA 0.000205637050101746 0.0126007782851599 0.000497212565959249 -1 NA NA NA 1.21738607486518e-09 3.6312730369849e-05 5.75982567906221e-05 5.76063163334434e-05 5.76176058640543e-05 5.76288416022498e-05 5.76376454787103e-05 5.76423596489159e-05 5.76418647761721e-05 5.76354359891861e-05 1.1680419720311e-09 2.75315948481859e-05 2.21712096584771e-05 2.21977200026116e-05 2.22350866021457e-05 2.22724309351061e-05 2.23017363944456e-05 2.23173233289229e-05 2.2315353047048e-05 2.22934272428311e-05 3.82248679592031e-05 0.00548996456467016 0.00399781093087686 0.00400131834219449 0.00400626261432472 0.00401120330696571 0.00401507928280891 0.00401713908431259 0.00401687504406763 0.00401397001993241 -0.231255883141779 0.0173793892737324 0.272493497554994 0.272673111793488 0.272925321253496 0.273176811812416 0.273374093684475 0.273479589676235 0.273467892984359 0.273322751959984 +AB1_EBV chr6:1412516-4412516 6:2899309:C:T chr6_2899309_C_T chr6 2899309 C T 0.00201975 -0.222409 0.355153 0.531161 -5.69774573358679e-05 0.00551697685252187 0.00034144100434419 -1 NA NA NA -5.69774573358679e-05 0.00551697685252187 0.00034144100434419 -1 NA NA NA 2.8470006636764e-10 1.81775181581979e-05 4.04492007225223e-05 4.04391980402028e-05 4.04250432914032e-05 4.0410845371332e-05 4.03996652542573e-05 4.03936943770997e-05 4.03944168669415e-05 4.04027431440506e-05 -6.20618320233197e-11 -5.67953826532967e-06 -6.39495570484508e-06 -6.39996837016291e-06 -6.40702208170669e-06 -6.41405240374765e-06 -6.41954772365366e-06 -6.42244442944812e-06 -6.42202579005806e-06 -6.41784050508404e-06 6.97085070922511e-06 0.00195382940556986 0.00182064312290325 0.00182165207819596 0.00182307294576125 0.00182449032343091 0.00182559954701346 0.00182618589315656 0.00182610460264635 0.00182526475596744 -1.68428093331602 -0.674603424930791 -0.0809519219688912 -0.0811595453629046 -0.0814533807374165 -0.0817481541047984 -0.0819803287269014 -0.0821044253045882 -0.0820896506889417 -0.0819171524572564 +AB1_EBV chr6:1412516-4412516 6:2899314:G:A chr6_2899314_G_A chr6 2899314 G A 0.00135432 -0.0619992 0.437623 0.887339 -2.43115488702986e-05 0.0052673935976394 0.000282464733497689 -1 NA NA NA -2.43115488702986e-05 0.0052673935976394 0.000282464733497689 -1 NA NA NA 2.3761345399726e-10 1.26853254402591e-05 3.37776612443965e-05 3.37629959130597e-05 3.37422920681702e-05 3.37215779436554e-05 3.3705316942895e-05 3.36966852850623e-05 3.36978302318604e-05 3.37100809368494e-05 -1.60019770592677e-11 -2.02382570470464e-06 -2.78067427247183e-06 -2.78220489258914e-06 -2.78436135757928e-06 -2.78651083235294e-06 -2.78819262804003e-06 -2.78908448212784e-06 -2.78896942551408e-06 -2.78770927294178e-06 6.74620903076303e-06 0.00157718883328219 0.00177478256886408 0.00177538443340019 0.00177623154657695 0.00177707582465066 0.001777736310445 0.00177808617502091 0.00177804004370986 0.00177754378462498 -1.86507195026131 -1.0343431411693 -0.261199131574553 -0.261593701171978 -0.262150849036905 -0.262708425310302 -0.26314623017427 -0.263378645288213 -0.263347779268952 -0.263017904506943 +AB1_EBV chr6:1412516-4412516 6:2899350:T:C chr6_2899350_T_C chr6 2899350 T C 0.0355572 -0.000819206 0.0800958 0.991839 7.32864915311251e-06 0.0010478896155351 0.000299176292555825 -1 NA NA NA 7.32864915311251e-06 0.0010478896155351 0.000299176292555825 -1 NA NA NA 2.35265856190137e-10 1.43878771095247e-05 3.56470650085047e-05 3.56345405861951e-05 3.56168563699093e-05 3.55991379878365e-05 3.55851838546828e-05 3.55776997009629e-05 3.55785165932683e-05 3.55887681763074e-05 -1.23441442927995e-12 6.88880582696483e-07 8.27671568238661e-07 8.28342793131389e-07 8.29288982520426e-07 8.30230315026994e-07 8.30961543885504e-07 8.31338047714907e-07 8.31262512882923e-07 8.30674041429651e-07 1.21663962920582e-06 0.000339449598993973 0.000349968305915352 0.000350124878355504 0.000350345466973101 0.000350565296261939 0.000350736897475 0.000350826848316304 0.000350812640677258 0.00035068013845627 -1.87500098307431 -0.908403004792035 -0.207331990118194 -0.207643699799531 -0.208083837774638 -0.208524929091143 -0.208872460501747 -0.209059089630061 -0.209039240525643 -0.208784747312844 +AB1_EBV chr6:1412516-4412516 6:2899430:C:T chr6_2899430_C_T chr6 2899430 C T 0.000430113 0.554905 0.722716 0.442603 5.99156548867282e-05 0.00925033668662987 0.000294727962604879 -1 NA NA NA 5.99156548867282e-05 0.00925033668662987 0.000294727962604879 -1 NA NA NA 3.11709759048175e-10 1.37827163651308e-05 3.51696672118842e-05 3.51561909498981e-05 3.51371577219624e-05 3.51181105109568e-05 3.51031565024186e-05 3.50952206845135e-05 3.50962811425252e-05 3.51075610491758e-05 1.67637675055622e-10 5.27800908307061e-06 6.81350052228466e-06 6.81818455565397e-06 6.82477697793433e-06 6.83135208246479e-06 6.83650155819372e-06 6.83923579239388e-06 6.83888830119467e-06 6.83503837586246e-06 1.57838568324735e-05 0.00290440207718282 0.00310074207519064 0.00310200196189509 0.00310377565859856 0.00310554461097979 0.00310692951042729 0.00310766375394084 0.00310756776217689 0.00310652822535614 -1.59364466532564 -0.951373619511788 -0.220814821468332 -0.221158376272983 -0.221643662025386 -0.222129386452423 -0.22251077260167 -0.222713159948769 -0.222686055239937 -0.22239831294462 +AB1_EBV chr6:1412516-4412516 6:2899448:A:G chr6_2899448_A_G chr6 2899448 A G 0.00026419 0.82525 0.994775 0.406774 0.000105775492105234 0.0134979386420686 0.00030746990816144 -1 NA NA NA 0.000105775492105234 0.0134979386420686 0.00030746990816144 -1 NA NA NA 3.28071092823362e-10 1.4927362759166e-05 3.66168144960311e-05 3.66041907262449e-05 3.65863580856004e-05 3.65685066985302e-05 3.65544866637575e-05 3.65470429953807e-05 3.65480326160835e-05 3.65586017273023e-05 2.63854267004587e-10 9.65701937130248e-06 1.19857159185444e-05 1.19941284407886e-05 1.20059722797453e-05 1.20177841993937e-05 1.20270313207347e-05 1.20319336962253e-05 1.20312923835425e-05 1.20243506406901e-05 2.30045101988549e-05 0.00440559444046247 0.00450385284642857 0.0045058870612298 0.00450875179867735 0.00451160956843206 0.00451384716570932 0.00451503315975474 0.00451487684830838 0.00451319510196308 -1.54248682940356 -0.871593033839509 -0.180491258267593 -0.180796373893336 -0.181227416082072 -0.18165895556074 -0.181997895073484 -0.182177838391413 -0.182153872189501 -0.18189833484089 +AB1_EBV chr6:1412516-4412516 6:2899745:G:A chr6_2899745_G_A chr6 2899745 G A 0.000117045 -1.36897 1.53632 0.372891 -0.000214482894300735 0.0226752228956154 0.000327842170640524 -1 NA NA NA -0.000214482894300735 0.0226752228956154 0.000327842170640524 -1 NA NA NA 3.4598841215619e-10 1.68188102648398e-05 3.8921837227274e-05 3.89108912347048e-05 3.88954221464638e-05 3.88799280587529e-05 3.88677527068588e-05 3.88612850417718e-05 3.88621420421056e-05 3.88713185192388e-05 -4.62627639678193e-10 -2.05989778749332e-05 -2.41715975197179e-05 -2.41900701729451e-05 -2.42160880585753e-05 -2.42420440626695e-05 -2.42623674480377e-05 -2.42731382353883e-05 -2.42717148466788e-05 -2.42564334541495e-05 3.76293905253514e-05 0.00778454364442189 0.00751610979510354 0.00752002589184257 0.00752554294920676 0.00753104868167867 0.00753536087793154 0.007537646547676 0.007537344158716 0.00753410079872939 -1.48931187709545 -0.752291070730094 -0.119443344210645 -0.119684916699199 -0.120026296017635 -0.120368223581343 -0.120636900301013 -0.120779606416229 -0.120760665282574 -0.120558169179215 +AB1_EBV chr6:1412516-4412516 6:2900248:C:A chr6_2900248_C_A chr6 2900248 C A 0.0666209 -0.0745332 0.0595208 0.210489 -2.40599263449429e-06 0.000696875971842839 0.0002773208531206 -1 NA NA NA -2.40599263449429e-06 0.000696875971842839 0.0002773208531206 -1 NA NA NA 5.34239928424041e-10 1.22100123955882e-05 3.31954458722522e-05 3.3180303528717e-05 3.3158922412406e-05 3.31375463876293e-05 3.31207967544259e-05 3.31119604398682e-05 3.31132583693554e-05 3.3126064771795e-05 -4.02961574085908e-11 -1.85072747193858e-07 -2.77214528631182e-07 -2.77323368982149e-07 -2.77475730133333e-07 -2.77630186771891e-07 -2.77756864285653e-07 -2.77834980447318e-07 -2.77850367011055e-07 -2.7779356488044e-07 2.21076556571287e-06 0.000203341460822337 0.000235408256271966 0.000235480799123452 0.000235582812396303 0.000235684585134965 0.000235764481250163 0.000235807357421787 0.000235803014119548 0.000235744951193583 -1.05487211849611 -1.07253268608871 -0.278586111646277 -0.279002675800522 -0.279591023977834 -0.280179381363973 -0.280640442051281 -0.280883558606506 -0.280847472595501 -0.280494406880535 +AB1_EBV chr6:1412516-4412516 6:2900335:G:A chr6_2900335_G_A chr6 2900335 G A 0.0141425 0.0871561 0.128692 0.49825 -4.8508851812967e-06 0.00150079197313381 0.000276535291099278 -1 NA NA NA -4.8508851812967e-06 0.00150079197313381 0.000276535291099278 -1 NA NA NA 2.9118904773619e-10 1.22127022947677e-05 3.30969532069184e-05 3.30818377973087e-05 3.30604993683275e-05 3.30391480522195e-05 3.30223804765177e-05 3.30134663789116e-05 3.30146157602308e-05 3.30271970744514e-05 2.43074616704319e-11 -4.01449309932804e-07 -5.55372192291594e-07 -5.55610515295359e-07 -5.55946267795425e-07 -5.56278692320696e-07 -5.56535030480033e-07 -5.56664687226849e-07 -5.56633503268646e-07 -5.56419290146967e-07 2.59576698269183e-06 0.000439789713479433 0.000506778882168131 0.000506934685916685 0.000507153885835485 0.000507372164401548 0.000507542686144415 0.000507632676254687 0.00050762007726774 0.000507490860118933 -1.6617444558951 -1.07231240761235 -0.28155757562021 -0.281974683704746 -0.282563659917823 -0.283153189855267 -0.283616300020534 -0.283862567999277 -0.283830864503621 -0.283483458915005 +AB1_EBV chr6:1412516-4412516 6:2900414:T:A chr6_2900414_T_A chr6 2900414 T A 0.000665535 -0.139883 0.716694 0.845254 -3.82750989083661e-05 0.00859039307097078 0.000281652410816347 -1 NA NA NA -3.82750989083661e-05 0.00859039307097078 0.000281652410816347 -1 NA NA NA 2.39508370320693e-10 1.26326017337332e-05 3.36824589320091e-05 3.36678668884519e-05 3.36472607420992e-05 3.3626640591405e-05 3.36104486317959e-05 3.36018459573805e-05 3.36029700713173e-05 3.36151449528514e-05 -3.39828253445488e-11 -3.20715310728596e-06 -4.3744647901316e-06 -4.37709079805373e-06 -4.38078008181301e-06 -4.38445091094579e-06 -4.3873144802804e-06 -4.38881848885003e-06 -4.38859190152779e-06 -4.38640036665243e-06 1.11779046586714e-05 0.00256822572033681 0.00289485122527406 0.00289583737532278 0.0028972246820386 0.00289860691148584 0.00289968767710333 0.00290025922613081 0.00290018176576873 0.00289936678143994 -1.85712879532708 -1.03850807792568 -0.264021610257959 -0.264415230773286 -0.264971208724758 -0.265527725276927 -0.265964838061926 -0.266197113647407 -0.26616677170098 -0.265838126664907 +AB1_EBV chr6:1412516-4412516 6:2900482:G:A chr6_2900482_G_A chr6 2900482 G A 0.00263659 -0.148118 0.285531 0.603939 -7.81711333268456e-06 0.00328394972541939 0.000274033837365906 -1 NA NA NA -7.81711333268456e-06 0.00328394972541939 0.000274033837365906 -1 NA NA NA 2.65623587561404e-10 1.19997776301267e-05 3.28099467075179e-05 3.2795070836965e-05 3.27740673447991e-05 3.27530575777528e-05 3.27365702526763e-05 3.27278256360016e-05 3.2729000636728e-05 3.27414447134073e-05 -3.725607632674e-11 -6.3084943095232e-07 -8.95451470538984e-07 -8.96264942137871e-07 -8.97407813111108e-07 -8.98550900264671e-07 -8.99452015723433e-07 -8.99940070338565e-07 -8.99899885363641e-07 -8.99259548177639e-07 5.13525367516566e-06 0.000951241529523494 0.00111007623869679 0.00111042320525827 0.00111091123194518 0.00111139758053865 0.00111177816708481 0.00111198007054922 0.00111195422561528 0.00111166951609209 -1.75363693846563 -1.08990087061697 -0.290267082941084 -0.290680883726823 -0.29126528416087 -0.291850033955015 -0.292309018514408 -0.29255246537276 -0.2925196752518 -0.292173136894739 +AB1_EBV chr6:1412516-4412516 6:2900706:G:A chr6_2900706_G_A chr6 2900706 G A 0.0666048 -0.0748309 0.059488 0.208422 -2.4505317402585e-06 0.000697356259738811 0.000277550020485684 -1 NA NA NA -2.4505317402585e-06 0.000697356259738811 0.000277550020485684 -1 NA NA NA 5.38086030734586e-10 1.22277846647579e-05 3.32218387383518e-05 3.32067068942862e-05 3.31853404764832e-05 3.31639793445744e-05 3.31472419094258e-05 3.31384130878286e-05 3.31397124354581e-05 3.31525132685726e-05 -4.07404343673439e-11 -1.88905566542675e-07 -2.82291734212634e-07 -2.82403759486555e-07 -2.82560601359849e-07 -2.82719528341308e-07 -2.82849712802439e-07 -2.82929704285372e-07 -2.82944884411671e-07 -2.82885508381631e-07 2.22349317817976e-06 0.000203652812363333 0.000235551223885542 0.000235624038880583 0.00023572643548321 0.000235828593919897 0.000235908797992618 0.000235951847624438 0.000235947504759349 0.000235889244887071 -1.04769870490789 -1.07107819555023 -0.277791352642094 -0.2782072380547 -0.278794630610174 -0.279382025404102 -0.279842315022417 -0.280084992562041 -0.280048895046835 -0.279696305926548 +AB1_EBV chr6:1412516-4412516 6:2900779:G:T chr6_2900779_G_T chr6 2900779 G T 0.000293308 0.222851 0.892501 0.802825 3.93278283454109e-05 0.0105165019832976 0.000278429524439683 -1 NA NA NA 3.93278283454109e-05 0.0105165019832976 0.000278429524439683 -1 NA NA NA 2.42119911334643e-10 1.23805452883762e-05 3.33120134147895e-05 3.32971316570212e-05 3.32761196029438e-05 3.32550934598933e-05 3.32385807075095e-05 3.32298022254387e-05 3.32309351934593e-05 3.32433277153777e-05 5.30951482486496e-11 3.30691633667144e-06 4.49426732413003e-06 4.49670771714816e-06 4.50013763168623e-06 4.50354268658721e-06 4.50618537893779e-06 4.50754993946109e-06 4.50728872163161e-06 4.50517951400913e-06 1.41407276088693e-05 0.00310971365275386 0.00354793619288009 0.00354907774513592 0.00355068362647631 0.00355228303996116 0.00355353284435313 0.00355419270368656 0.00355410085553536 0.0035531545954489 -1.84628405589504 -1.0586626772132 -0.275080710503293 -0.275487851612368 -0.276062846421069 -0.276638410697475 -0.277090557128362 -0.277330987653494 -0.277300004725495 -0.276960757973803 +AB1_EBV chr6:1412516-4412516 6:2901299:T:A chr6_2901299_T_A chr6 2901299 T A 0.000160786 -1.26096 1.47459 0.392483 -0.000170522636125533 0.0204851026808546 0.000312954617270877 -1 NA NA NA -0.000170522636125533 0.0204851026808546 0.000312954617270877 -1 NA NA NA 3.35255518153791e-10 1.53990054822489e-05 3.7241766909278e-05 3.72296882854881e-05 3.72126196025097e-05 3.71955283613543e-05 3.71821013619481e-05 3.71749684918958e-05 3.7175909131918e-05 3.71860208911237e-05 -4.12416769738376e-10 -1.57052145928113e-05 -1.93033337994492e-05 -1.93174668113558e-05 -1.93373577642266e-05 -1.93571929576123e-05 -1.93727175786129e-05 -1.93809394925251e-05 -1.93798430306353e-05 -1.93681576815342e-05 3.49162021266074e-05 0.00677140504927717 0.00682423602985352 0.00682747911755856 0.00683204596431248 0.00683660189556094 0.00684016907512273 0.00684205909989392 0.00684180805236277 0.00683912400865881 -1.52082417887053 -0.840486061150979 -0.163567907810423 -0.163852593283656 -0.164254916728738 -0.164657804117231 -0.164974328567028 -0.165142473531004 -0.165120282230847 -0.164881926566009 +AB1_EBV chr6:1412516-4412516 6:2901437:T:C chr6_2901437_T_C chr6 2901437 T C 0.00243636 0.160119 0.310196 0.605724 2.96061315937843e-05 0.00409536289602358 0.000301337566358306 -1 NA NA NA 2.96061315937843e-05 0.00409536289602358 0.000301337566358306 -1 NA NA NA 2.66812142111886e-10 1.43959073204915e-05 3.59189719774459e-05 3.59056435298025e-05 3.58868259382378e-05 3.58679869005927e-05 3.58531811787001e-05 3.58452976161622e-05 3.58462883764916e-05 3.58573568471173e-05 4.13834474904866e-11 2.67310651563078e-06 3.35915900741316e-06 3.36132554446077e-06 3.36437932025667e-06 3.36742138929354e-06 3.36979562569709e-06 3.3710412365638e-06 3.37084698290736e-06 3.36901458811364e-06 5.61130019692742e-06 0.00131551545531481 0.00136920959834915 0.00136978442111869 0.00137059415886078 0.00137140149547561 0.00137203287982255 0.00137236629721574 0.00137231946856703 0.00137184035323422 -1.74917233754388 -0.907845037079717 -0.199733182394312 -0.200064623936376 -0.200532594469493 -0.201001184952951 -0.201369528957596 -0.201565728180789 -0.201541200093894 -0.201266076817944 +AB1_EBV chr6:1412516-4412516 6:2901740:C:T chr6_2901740_C_T chr6 2901740 C T 0.00113492 -0.660862 0.469182 0.158971 -0.000104368763391992 0.00837225553970323 0.00038399391514754 -1 NA NA NA -0.000104368763391992 0.00837225553970323 0.00038399391514754 -1 NA NA NA 6.27557957401794e-10 2.23904185497478e-05 4.52285784811251e-05 4.52228368637095e-05 4.52146905460237e-05 4.52064991309838e-05 4.52000393455772e-05 4.51965963634224e-05 4.51970426383519e-05 4.52019040545982e-05 -4.08017420349811e-10 -1.12431861891166e-05 -1.16039623831258e-05 -1.1614574853731e-05 -1.16295246741969e-05 -1.16444459722166e-05 -1.16561338320443e-05 -1.16623272812409e-05 -1.16615012891556e-05 -1.16526988997442e-05 2.00042363984756e-05 0.00316816247511648 0.00273361515849696 0.00273543568248955 0.00273800114071713 0.00274056251299853 0.00274256942538591 0.00274363323642256 0.00274349175815446 0.00274198074860733 -0.893881131580187 -0.466155865372607 0.0307303486857835 0.0306430911317879 0.030519189006661 0.0303945097545189 0.0302961294412096 0.0302436639934607 0.0302504265993964 0.0303243763281786 +AB1_EBV chr6:1412516-4412516 6:2901779:C:T chr6_2901779_C_T chr6 2901779 C T 0.000474125 0.464259 0.713054 0.514991 7.42781689841096e-05 0.00962408463873327 0.000306219320683976 -1 NA NA NA 7.42781689841096e-05 0.00962408463873327 0.000306219320683976 -1 NA NA NA 2.86243935440098e-10 1.48599589426249e-05 3.64684340513584e-05 3.64559467233702e-05 3.64383065447139e-05 3.64206442766831e-05 3.64067651396288e-05 3.63993818493059e-05 3.6400330025449e-05 3.64107439124906e-05 1.27023445979675e-10 6.82486782147636e-06 8.41039220824134e-06 8.41655434299846e-06 8.42523035358088e-06 8.43387953285573e-06 8.44064190143476e-06 8.44420944646573e-06 8.44370167347457e-06 8.43856468013581e-06 1.40881940768886e-05 0.00314110358222158 0.00321124072749791 0.00321270312520817 0.00321476262578972 0.00321681678949376 0.00321842434512764 0.00321927474243777 0.00321915878472844 0.00321794486437146 -1.67887279230573 -0.876118713070541 -0.184551739732469 -0.184854515897884 -0.185282258223097 -0.185710588499166 -0.186047215015958 -0.186226325978645 -0.186203388508407 -0.185950941202606 +AB1_EBV chr6:1412516-4412516 6:2901780:G:C chr6_2901780_G_C chr6 2901780 G C 0.0602314 -0.0252196 0.0618397 0.683405 8.57138514535798e-06 0.000910498237234011 0.000327196293954057 -1 NA NA NA 8.57138514535798e-06 0.000910498237234011 0.000327196293954057 -1 NA NA NA 2.57933295715376e-10 1.67770980844004e-05 3.88435654164233e-05 3.88334085586811e-05 3.88190914231861e-05 3.8804753336167e-05 3.87934701671027e-05 3.87874356121324e-05 3.87881299708099e-05 3.87964586105363e-05 -6.84061171967653e-12 8.24205140773222e-07 9.65577315271994e-07 9.66393292140342e-07 9.67546441330625e-07 9.68697563004973e-07 9.6959771249865e-07 9.70071031874967e-07 9.69998525976611e-07 9.69304963098313e-07 1.06983434732419e-06 0.000312499195642551 0.000301785553703583 0.000301950290219811 0.000302182775581074 0.000302414863899145 0.000302596525548325 0.000302692435916132 0.000302678731690022 0.000302540389994796 -1.78301615295971 -0.754774242278217 -0.121456368887976 -0.121678186993833 -0.121990684503573 -0.1223036050414 -0.122549890070179 -0.122681748558017 -0.122666958507057 -0.122485865185222 +AB1_EBV chr6:1412516-4412516 6:2901784:T:C chr6_2901784_T_C chr6 2901784 T C 0.000474114 0.464262 0.713054 0.514988 7.4278229790865e-05 0.00962408661564884 0.000306219369939686 -1 NA NA NA 7.4278229790865e-05 0.00962408661564884 0.000306219369939686 -1 NA NA NA 2.86244808138723e-10 1.48599605837659e-05 3.64684400314365e-05 3.64559526953849e-05 3.64383125054799e-05 3.64206502263469e-05 3.64067710808445e-05 3.63993877865356e-05 3.6400335964364e-05 3.64107498594699e-05 1.27024809591101e-10 6.82487019409241e-06 8.41039954058659e-06 8.4165616667619e-06 8.42523766538765e-06 8.43388683287977e-06 8.44064919253504e-06 8.44421673344543e-06 8.44370896243587e-06 8.43857197793072e-06 1.40882463064968e-05 0.00314110399239066 0.00321124141832974 0.00321270381557033 0.00321476331550199 0.00321681747857059 0.00321842503373471 0.00321927543085274 0.00321915947330345 0.00321794555349866 -1.67886974351696 -0.876118602630065 -0.184551575752915 -0.184854352083343 -0.185282094637956 -0.185710425139527 -0.186047051826089 -0.186226162865182 -0.186203225352894 -0.185950777872275 +AB1_EBV chr6:1412516-4412516 6:2901906:G:T chr6_2901906_G_T chr6 2901906 G T 0.000732806 -0.514416 0.561698 0.35976 -0.000114028330580257 0.00962161397599455 0.000370399667817156 -1 NA NA NA -0.000114028330580257 0.00962161397599455 0.000370399667817156 -1 NA NA NA 3.52125777245866e-10 2.11518692151745e-05 4.36868584718273e-05 4.36801706901131e-05 4.3670704489078e-05 4.36611964857497e-05 4.36536971131505e-05 4.36496837702575e-05 4.36501604902728e-05 4.36557399258389e-05 -1.7600131130793e-10 -1.21396951826313e-05 -1.26963525001502e-05 -1.27078425352357e-05 -1.27240367061852e-05 -1.27401984318054e-05 -1.27528498983887e-05 -1.27595380654515e-05 -1.27586100625871e-05 -1.27490311965107e-05 1.40160216167457e-05 0.00359352235404155 0.00314855773213012 0.00315059007537553 0.00315345483706459 0.00315631482545548 0.0031585548423876 0.00315974055082775 0.0031595791217589 0.0031578873594886 -1.47172872721918 -0.523060708855343 -0.00395147022989217 -0.00406486929332406 -0.00422535763315501 -0.00438659746392567 -0.00451385013387107 -0.00458208072348754 -0.00457427071126393 -0.0044800622103649 +AB1_EBV chr6:1412516-4412516 6:2901927:C:A chr6_2901927_C_A chr6 2901927 C A 0.000122246 -1.13981 1.35456 0.40009 -0.000159865798488725 0.0189320928496257 0.000314385192374811 -1 NA NA NA -0.000159865798488725 0.0189320928496257 0.000314385192374811 -1 NA NA NA 3.31831868939229e-10 1.55355647665652e-05 3.74034876093184e-05 3.73914278597979e-05 3.73743921275384e-05 3.73573359212473e-05 3.73439376908123e-05 3.73368215672707e-05 3.73377627028184e-05 3.73478552980006e-05 -3.69538033917271e-10 -1.47921828239582e-05 -1.80888781861212e-05 -1.81019823473878e-05 -1.81204396926077e-05 -1.81388504163526e-05 -1.81532633341475e-05 -1.81608999283469e-05 -1.81598876287449e-05 -1.81490445930246e-05 3.17180559629383e-05 0.00628352839668168 0.00630368303904831 0.00630668925041208 0.00631092398764079 0.00631514915690365 0.00631845770904774 0.0063202110540328 0.00631997875775944 0.00631748983560914 -1.53108873892888 -0.831657092764422 -0.159234853978305 -0.159517631922922 -0.159917089674251 -0.160317050330861 -0.160631240290594 -0.160798105215386 -0.160776010328358 -0.160539346534644 +AB1_EBV chr6:1412516-4412516 6:2901956:C:G chr6_2901956_C_G chr6 2901956 C G 0.00422977 -0.169267 0.237368 0.475785 -1.62815035121505e-05 0.00293764001846143 0.000287893968729858 -1 NA NA NA -1.62815035121505e-05 0.00293764001846143 0.000287893968729858 -1 NA NA NA 3.00922682592311e-10 1.31499796504163e-05 3.43960612222218e-05 3.4382081817001e-05 3.4362329904722e-05 3.43425610815575e-05 3.4327038236061e-05 3.43187958648315e-05 3.43198858566079e-05 3.43315780105471e-05 -4.97429504469612e-11 -1.38546252123471e-06 -1.85768974678648e-06 -1.85894190280673e-06 -1.8606983796764e-06 -1.86244754062533e-06 -1.86381550540057e-06 -1.86453972766464e-06 -1.86444344328619e-06 -1.86341500171895e-06 4.97775331539796e-06 0.000897851438327447 0.000987627332533439 0.000987999491806122 0.000988522889854285 0.000989044558694027 0.000989452722598915 0.000989668899040274 0.00098964024095878 0.000989333352109394 -1.62886379742584 -0.998368778284787 -0.243056747167402 -0.243423557200197 -0.243941953653876 -0.244460920015889 -0.244868497471278 -0.245084929834525 -0.245056280985397 -0.244749262376344 +AB1_EBV chr6:1412516-4412516 6:2902097:A:C chr6_2902097_A_C chr6 2902097 A C 0.381506 0.171729 0.0305523 1.90055e-08 0.000152423497918228 0.00495139434682274 0.0013570758271243 -1 NA NA NA 0.000152423497918228 0.00495139434682274 0.0013570758271243 -1 NA NA NA 0.000843526188656871 3.91741028318361e-05 5.93126743663999e-05 5.93282533599584e-05 5.93500295728665e-05 5.93716631818064e-05 5.93884955045684e-05 5.93972332518681e-05 5.93956532569341e-05 5.93823609924639e-05 0.000139928800104302 1.7518564521159e-06 1.33642690192504e-06 1.3382905777224e-06 1.34091346719012e-06 1.34353208568464e-06 1.3455820506066e-06 1.34666229599662e-06 1.34650175732951e-06 1.3449322253552e-06 0.00489504602054235 0.000333052358433822 0.000234759365008444 0.000234991037227334 0.00023531723633409 0.000235642941485097 0.000235898001954203 0.000236032634770317 0.0002360132076357 0.000235818808107556 13.2173843463615 0.093226897020803 0.301824210520419 0.302126531966772 0.302549762227976 0.302970708901167 0.303298701362282 0.303469528717203 0.30343981645509 0.303182394556006 +AB1_EBV chr6:1412516-4412516 6:2902159:A:G chr6_2902159_A_G chr6 2902159 A G 0.199925 -0.0167221 0.036845 0.649937 1.27910315514759e-05 0.000823906046209054 0.000470625668141822 -1 NA NA NA 1.27910315514759e-05 0.000823906046209054 0.000470625668141822 -1 NA NA NA 2.7512128114274e-10 3.45131117258491e-05 5.44921382732307e-05 5.45025758305991e-05 5.45172433642999e-05 5.45316126904354e-05 5.4542362454444e-05 5.4547147694049e-05 5.45443895335381e-05 5.45331019684047e-05 -5.64309864838996e-12 1.76043634854507e-06 1.37247712640654e-06 1.37433592409176e-06 1.37695732716769e-06 1.37956536863081e-06 1.38158671720971e-06 1.38261411351815e-06 1.38237255902427e-06 1.38069170998055e-06 6.93135129452434e-07 0.000362684329388557 0.00026070914685714 0.000260955264477976 0.000261302336434109 0.00026164802637794 0.000261916803312259 0.000262055077651312 0.00026202666807898 0.000261809108603266 -1.71850513793224 -0.0334496875492922 0.217057633759386 0.21728885503609 0.217614186526596 0.217934230040226 0.218175864601768 0.218287304678361 0.218233627265473 0.217993058133872 +AB1_EBV chr6:1412516-4412516 6:2912043:G:A chr6_2912043_G_A chr6 2912043 G A 0.379836 0.195819 0.0298776 5.60015e-11 0.0627098030382369 0.0914758405630092 0.327483481692081 1 0.327287739668512 0 0.988626524209 0.0627098030382369 0.0914758405630092 0.327483481692081 1 0.327287739668512 0 0.988626524209 0.327287739668512 1.34116556674083e-05 3.47479906129698e-05 3.47341023926405e-05 3.47144673802213e-05 3.46948481583097e-05 3.46795126959479e-05 3.46714989293944e-05 3.46728600996272e-05 3.46848650779766e-05 0.0627075544730858 1.93070115154659e-07 2.56351789036658e-07 2.56517545891351e-07 2.56749448141219e-07 2.56982473311057e-07 2.57169098245676e-07 2.57275938042434e-07 2.57280452181013e-07 2.57168291095261e-07 0.0914750699783512 0.000115965451802635 0.000126090501441368 0.000126139059796606 0.000126207301983071 0.000126275491956345 0.000126329203768056 0.000126358304086684 0.000126355963029962 0.000126317856103039 19.1783883980353 -0.978664834606986 -0.232877059385462 -0.233237126234329 -0.233746329660068 -0.234255144961051 -0.234652727503607 -0.234860125362074 -0.234823978507148 -0.234511407819073 +AB1_EBV chr6:1412516-4412516 6:2912091:T:C chr6_2912091_T_C chr6 2912091 T C 0.0014968 0.849767 0.39658 0.0321336 0.000467799155561597 0.0178107769162849 0.000999898076620487 -1 NA NA NA 0.000467799155561597 0.0178107769162849 0.000999898076620487 -1 NA NA NA 2.14748023589044e-09 0.000129030020977694 0.000108607514244686 0.000108695992067434 0.000108820817330923 0.000108945579309375 0.000109043388481492 0.00010909516101071 0.000109088003495295 0.000109013906271378 1.7706840523719e-09 0.000105696396775546 4.49708995093949e-05 4.50550210557385e-05 4.5173821756935e-05 4.52927095848752e-05 4.53860323694479e-05 4.54355104110357e-05 4.5428780052821e-05 4.53582133617496e-05 4.23059122786113e-05 0.0102300083056357 0.00513146611005017 0.0051381716493634 0.00514763567977716 0.0051570999330498 0.00516452473746057 0.00516846065280203 0.00516792812105516 0.00516231993799184 0.336333289791154 1.28525610863321 0.90674179140902 0.907595813479623 0.908799794774941 0.910002132328979 0.910944034613751 0.911442419834467 0.91137369825334 0.910660619838302 +AB1_EBV chr6:1412516-4412516 6:2912215:A:C chr6_2912215_A_C chr6 2912215 A C 0.000455488 0.61666 0.755233 0.414205 0.000112004643456391 0.0113904316010689 0.000333457603954423 -1 NA NA NA 0.000112004643456391 0.0113904316010689 0.000333457603954423 -1 NA NA NA 3.24538856818094e-10 1.7385629796308e-05 3.95513804269452e-05 3.95409324142499e-05 3.95261701037205e-05 3.95113845208808e-05 3.94997674441308e-05 3.94936005032933e-05 3.94944279729676e-05 3.95031985094307e-05 1.94930595906053e-10 1.09430535165028e-05 1.25985989515895e-05 1.26084517781145e-05 1.26223360989094e-05 1.263619241875e-05 1.26470466233653e-05 1.26528047819961e-05 1.26520552817327e-05 1.2643909074835e-05 1.72577400195345e-05 0.00396527667424546 0.00376819718924355 0.00377022680744795 0.00377308699655655 0.00377594195077436 0.00377817858479035 0.0037793647338638 0.00377920899661583 0.00377752828442837 -1.55331188807753 -0.719144998187052 -0.103398208696467 -0.103622709454589 -0.103939870307167 -0.104257506731746 -0.104507043474584 -0.104639472108447 -0.104621631762101 -0.104433191173421 +AB1_EBV chr6:1412516-4412516 6:2912223:A:G chr6_2912223_A_G chr6 2912223 A G 0.000327161 -0.974896 0.897826 0.277549 -0.000143823315763218 0.0139369815289686 0.000341294746581511 -1 NA NA NA -0.000143823315763218 0.0139369815289686 0.000341294746581511 -1 NA NA NA 4.15651617156049e-10 1.81159684822268e-05 4.04369439533612e-05 4.04273931742459e-05 4.0413883888316e-05 4.0400344441517e-05 4.03897002915185e-05 4.03840447014201e-05 4.03847944480235e-05 4.0392817733862e-05 -3.94185719873914e-10 -1.42532764371269e-05 -1.61503880112751e-05 -1.6163647332254e-05 -1.61823187080979e-05 -1.62009487904163e-05 -1.62155396124606e-05 -1.62232739594452e-05 -1.62222517278753e-05 -1.62112769985464e-05 2.64761296927553e-05 0.00492195420313575 0.00460091996559752 0.00460353106509021 0.00460720952302008 0.00461088108504921 0.00461375733109938 0.00461528214251335 0.00461508042144129 0.00461291664214722 -1.305869710077 -0.677995203493559 -0.0812549842375656 -0.0814515043997215 -0.0817294705755023 -0.0820080421202665 -0.0822270186896716 -0.0823433444879065 -0.0823278906719001 -0.0821628444267333 +AB1_EBV chr6:1412516-4412516 6:2912235:C:A chr6_2912235_C_A chr6 2912235 C A 0.000894445 0.0390669 0.502805 0.938068 1.64477341850827e-05 0.00582199394394285 0.000275236633743181 -1 NA NA NA 1.64477341850827e-05 0.00582199394394285 0.000275236633743181 -1 NA NA NA 2.35335244481712e-10 1.2109466439747e-05 3.2946910297286e-05 3.29319569945197e-05 3.29108465588616e-05 3.28897269237278e-05 3.28731475226994e-05 3.28643438621222e-05 3.2865502937009e-05 3.28779796697682e-05 8.25512316693382e-12 1.36304292983158e-06 1.88139085922139e-06 1.88260803746861e-06 1.88432243632932e-06 1.88603061134145e-06 1.88736476632332e-06 1.88806645059385e-06 1.88796128625787e-06 1.88693855259211e-06 7.64087884500099e-06 0.0016974102170611 0.00196684251598381 0.00196745566043838 0.00196831827899032 0.00196917766387594 0.00196984963868014 0.00197020521959275 0.00197015760253611 0.00196965166777194 -1.87470609089502 -1.08080149229204 -0.28610131851516 -0.286515584914988 -0.287100571028592 -0.287685995378918 -0.28814568808312 -0.288389821438491 -0.288357665025673 -0.288011711991038 +AB1_EBV chr6:1412516-4412516 6:2912261:G:A chr6_2912261_G_A chr6 2912261 G A 0.0199092 -0.220904 0.106552 0.0381531 -2.97706349596775e-05 0.00212370411917391 0.000423874556068071 -1 NA NA NA -2.97706349596775e-05 0.00212370411917391 0.000423874556068071 -1 NA NA NA 1.98958362816542e-09 2.7132197094122e-05 4.96130716799048e-05 4.96100894958993e-05 4.96057801356657e-05 4.96014037623309e-05 4.95979392558906e-05 4.95961163167247e-05 4.95964347585059e-05 4.95991841555843e-05 -4.3309240151892e-10 -3.52624448591988e-06 -3.26979385529758e-06 -3.2728905431541e-06 -3.27724678792719e-06 -3.28159386611135e-06 -3.28500081535703e-06 -3.28681127681543e-06 -3.28658270906846e-06 -3.28403752762494e-06 1.07857201849781e-05 0.000856007676657065 0.000685441550967017 0.0006859315780635 0.000686621517047654 0.000687310284091608 0.000687850150940657 0.000688136832127493 0.000688099988781492 0.00068769557453296 0.259963503034496 -0.274067882084955 0.123255535306186 0.123235121837409 0.123204504827758 0.123172782159143 0.123147457733531 0.123134412284021 0.123137721535353 0.123159550394981 +AB1_EBV chr6:1412516-4412516 6:2912516:CCA:C chr6_2912516_CCA_C chr6 2912516 CCA C 0.379568 0.196413 0.0298788 4.90908e-11 0.0705334503762503 0.0943211787286286 0.367291808726716 1 0.367106415849495 0 1 0.0705334503762503 0.0943211787286286 0.367291808726716 1 0.367106415849495 0 1 0.367106415849495 1.35826756661766e-05 3.49703766940184e-05 3.49566673877352e-05 3.49372826355214e-05 3.49179144495213e-05 3.4902779017062e-05 3.48948777422873e-05 3.48962391873188e-05 3.4908116627254e-05 0.0705310811656966 2.04829896721797e-07 2.69918706751321e-07 2.70097089742913e-07 2.703467265568e-07 2.70597474615109e-07 2.70798048480558e-07 2.70912405770814e-07 2.7091621239132e-07 2.70793992636531e-07 0.0943204169456985 0.000117874520944068 0.000127207470188182 0.000127257707273328 0.000127328309993585 0.00012739886603438 0.000127454449709537 0.000127484572709491 0.000127482165069385 0.000127442752777842 19.2932004436988 -0.965993856777881 -0.226497482700605 -0.226849888657321 -0.227348327734282 -0.227846347311053 -0.228235373839683 -0.228438069440508 -0.228402166017004 -0.228095464439732 +AB1_EBV chr6:1412516-4412516 6:2912529:A:G chr6_2912529_A_G chr6 2912529 A G 0.000327219 -0.97546 0.897843 0.277281 -0.000143953079553094 0.0139419926770435 0.0003413887961905 -1 NA NA NA -0.000143953079553094 0.0139419926770435 0.0003413887961905 -1 NA NA NA 4.15918375867138e-10 1.81252741493716e-05 4.04475101763738e-05 4.04379681637341e-05 4.0424471223517e-05 4.0410944096783e-05 4.04003095978191e-05 4.03946591265783e-05 4.03954082031063e-05 4.0403424239079e-05 -3.94667582594136e-10 -1.42693628376224e-05 -1.61645419108553e-05 -1.61778173275889e-05 -1.6196511395159e-05 -1.62151641464748e-05 -1.62297727421942e-05 -1.62375165200616e-05 -1.62364930492056e-05 -1.62255049563498e-05 2.64930551234239e-05 0.00492466914772154 0.00460244397516612 0.00460505719958363 0.00460873865501266 0.0046124132135329 0.00461529181034602 0.00461681786950121 0.00461661598421526 0.00461445043652855 -1.30522813156292 -0.67748166330158 -0.0809937171452315 -0.0811899588040905 -0.0814675321615681 -0.0817457110643343 -0.0819643796238738 -0.0820805419244524 -0.0820651095746605 -0.0819002949561405 +AB1_EBV chr6:1412516-4412516 6:2912588:C:T chr6_2912588_C_T chr6 2912588 C T 0.0254686 0.216313 0.0955807 0.0236265 1.47824004154632e-05 0.00146431229272919 0.000337611425283768 -1 NA NA NA 1.47824004154632e-05 0.00146431229272919 0.000337611425283768 -1 NA NA NA 2.72685699254245e-09 1.78248894962867e-05 4.00108805539477e-05 4.00017784606194e-05 3.99888907079696e-05 3.99759826507461e-05 3.99658550555137e-05 3.99605097334193e-05 3.99613013955271e-05 3.99690615313357e-05 5.70381630748495e-10 1.46467314122321e-06 1.65960336828124e-06 1.66106000252363e-06 1.66310974647536e-06 1.66515656502215e-06 1.66676273894097e-06 1.66761930038806e-06 1.66751779919677e-06 1.66632737178103e-06 1.19846816026332e-05 0.000515215638913325 0.000483618358084242 0.00048389907275914 0.000484294387774508 0.000484689129467184 0.000484998689925372 0.00048516333488638 0.000485142766776561 0.000484911717134194 0.575187779444314 -0.694193222496162 -0.0918473746636161 -0.0920351938219852 -0.0923011738682922 -0.0925675127810841 -0.0927763619162958 -0.092886408524647 -0.0928697090375366 -0.092709141603013 +AB1_EBV chr6:1412516-4412516 6:2912610:T:A chr6_2912610_T_A chr6 2912610 T A 0.000546427 0.109603 0.608951 0.857163 -1.7075059345342e-06 0.00689201795132011 0.000271185931488116 -1 NA NA NA -1.7075059345342e-06 0.00689201795132011 0.000271185931488116 -1 NA NA NA 2.37990235015101e-10 1.17752178586987e-05 3.24833629428342e-05 3.24680806295094e-05 3.24465064204478e-05 3.24249247619617e-05 3.2407984976631e-05 3.23989929381462e-05 3.24001832167821e-05 3.24129409677569e-05 2.37142738719595e-11 -1.10061281359889e-07 -1.99916178590291e-07 -1.99847078568636e-07 -1.99750046290116e-07 -1.99653339751115e-07 -1.99578768359799e-07 -1.99542536537732e-07 -1.99555699429243e-07 -1.99624719921246e-07 9.40928980680266e-06 0.00197272591095902 0.00233243168230848 0.00233310172729173 0.0023340440487014 0.00233498253666501 0.00233571624093277 0.00233610464497459 0.00233605323933057 0.0023355018923268 -1.86348751723285 -1.10879184787886 -0.300270756823125 -0.300701636214049 -0.301310080139224 -0.301918943002603 -0.302396985543454 -0.302650778155976 -0.302617152121965 -0.302257079085101 +AB1_EBV chr6:1412516-4412516 6:2912748:A:G chr6_2912748_A_G chr6 2912748 A G 0.00233851 0.432498 0.310772 0.164016 5.15277704475579e-05 0.00489060027618745 0.000345015214635036 -1 NA NA NA 5.15277704475579e-05 0.00489060027618745 0.000345015214635036 -1 NA NA NA 5.99956963572397e-10 1.85725654097157e-05 4.08444675074e-05 4.08350760770347e-05 4.08218020645658e-05 4.08085011268683e-05 4.0798045921e-05 4.07924931202076e-05 4.07932351053201e-05 4.08011247074387e-05 2.52393731776312e-10 5.20198836606858e-06 5.77436514648892e-06 5.77908582942781e-06 5.7857384974689e-06 5.79237838581503e-06 5.79757978057528e-06 5.8003381874085e-06 5.79997622281808e-06 5.79606763775497e-06 1.27576077926897e-05 0.0017479247396613 0.00161168129135818 0.00161260430278 0.00161390516855485 0.00161520381422027 0.0016162212837495 0.00161676081838625 0.00161668971463515 0.00161592466322812 -0.938859235450003 -0.65310347551445 -0.071227427838604 -0.0714176886195066 -0.0716865540907756 -0.0719559321725098 -0.0721676417039498 -0.072280045985508 -0.072264968322628 -0.0721051873355378 +AB1_EBV chr6:1412516-4412516 6:2913008:C:T chr6_2913008_C_T chr6 2913008 C T 0.0024573 0.210031 0.306679 0.493435 3.92406011765618e-05 0.00439665175954045 0.000320463764927226 -1 NA NA NA 3.92406011765618e-05 0.00439665175954045 0.000320463764927226 -1 NA NA NA 2.94217037452583e-10 1.61738961036959e-05 3.80830747887176e-05 3.80712201708452e-05 3.8054480217481e-05 3.80377114807026e-05 3.80245222278381e-05 3.80174874673335e-05 3.80183473959848e-05 3.80281706575051e-05 5.99168022206555e-11 3.7312241576727e-06 4.42780444533946e-06 4.43095386156343e-06 4.43539335038771e-06 4.43981828187131e-06 4.44327482738013e-06 4.4450921744079e-06 4.44481732150604e-06 4.44216283963094e-06 6.26270446596284e-06 0.00148867738379177 0.00146017212945563 0.00146088436225525 0.00146188805981231 0.00146288920935577 0.00146367244969592 0.00146408612234863 0.00146402790663259 0.00146343325414789 -1.65139944310006 -0.791390398419185 -0.141228853471726 -0.141500487863944 -0.14188403419878 -0.142268277828509 -0.142570554492937 -0.142731867937992 -0.142712360329771 -0.142487616557522 +AB1_EBV chr6:1412516-4412516 6:2913068:GGTTA:G chr6_2913068_GGTTA_G chr6 2913068 GGTTA G 0.00870822 -0.212726 0.162239 0.189793 -2.37283424156391e-05 0.00243803334876037 0.000332005245500255 -1 NA NA NA -2.37283424156391e-05 0.00243803334876037 0.000332005245500255 -1 NA NA NA 5.06606768772462e-10 1.77032100225915e-05 3.93273587257883e-05 3.93177261339309e-05 3.93041069449291e-05 3.9290448887226e-05 3.92796864739962e-05 3.92739177867756e-05 3.92745660964864e-05 3.9282510890253e-05 -1.0073654079631e-10 -2.44868290682411e-06 -2.6520195225503e-06 -2.65431910644349e-06 -2.65755833349504e-06 -2.660787970917e-06 -2.6633110343195e-06 -2.66463612161047e-06 -2.66443261042391e-06 -2.66249407251444e-06 5.74903715846843e-06 0.000868265459189036 0.000803893013679785 0.00080434590358937 0.000804984014857361 0.00080562066225268 0.000806118736528231 0.000806381510540053 0.00080634382910633 0.000805964947295852 -1.10798206205858 -0.701043009065389 -0.10907837817911 -0.109283644613869 -0.109573841275992 -0.10986489435211 -0.110094326242466 -0.110217489313817 -0.110204093492312 -0.110035430413266 +AB1_EBV chr6:1412516-4412516 6:2913328:C:T chr6_2913328_C_T chr6 2913328 C T 0.00154494 -0.519913 0.394744 0.187808 -0.000161875377546235 0.00962934039254081 0.00051506802660084 -1 NA NA NA -0.000161875377546235 0.00962934039254081 0.00051506802660084 -1 NA NA NA 5.4587505222394e-10 3.87066516912138e-05 5.95239253736136e-05 5.95343368544001e-05 5.95489780746768e-05 5.95635596947194e-05 5.95749601233526e-05 5.95809961611447e-05 5.95801979507378e-05 5.95716287271369e-05 -2.76619917891995e-10 -2.22515297726671e-05 -1.73788721996806e-05 -1.74002600883788e-05 -1.74304291144383e-05 -1.74605810644305e-05 -1.74842285353933e-05 -1.74967743770226e-05 -1.74951137467133e-05 -1.74773120275926e-05 1.49409272427542e-05 0.00426209489159326 0.00304332489050872 0.00304607056877729 0.00304994306445174 0.00305381280763353 0.00305684735233557 0.00305845712540929 0.00305824402669091 0.00305595981473945 -1.03332705323842 0.0812224741817866 0.305379533210553 0.305594127626087 0.305896277752671 0.306197619731943 0.306433525758578 0.306558548553959 0.306542039972419 0.306364597941301 +AB1_EBV chr6:1412516-4412516 6:2913533:T:C chr6_2913533_T_C chr6 2913533 T C 0.0012859 -0.294808 0.424332 0.487208 -6.72214334167609e-05 0.00655757133201166 0.000340136198118679 -1 NA NA NA -6.72214334167609e-05 0.00655757133201166 0.000340136198118679 -1 NA NA NA 2.97030235883492e-10 1.79362995702231e-05 4.03144028679426e-05 4.0304892078558e-05 4.02914423661257e-05 4.02779649470781e-05 4.02673714613803e-05 4.02617452136461e-05 4.02624971379401e-05 4.02704935005059e-05 -8.54411777701489e-11 -6.59200715981395e-06 -7.55700070228509e-06 -7.56326851622423e-06 -7.5720966468441e-06 -7.58090683326348e-06 -7.58780813207615e-06 -7.59146793888667e-06 -7.59098800441846e-06 -7.58580404177101e-06 8.76352213705461e-06 0.00230223142130359 0.00216660864196398 0.00216784043660966 0.00216957596391961 0.00217130839872426 0.00217266569311009 0.00217338540816428 0.00217329058718609 0.00217227022799917 -1.64188322277177 -0.687962421044489 -0.0842900092695089 -0.0844862553502468 -0.0847637589382444 -0.0850418089214982 -0.0852603280145701 -0.0853763504676799 -0.0853607861709023 -0.0851958051434307 +AB1_EBV chr6:1412516-4412516 6:2913796:C:T chr6_2913796_C_T chr6 2913796 C T 0.00941746 0.335343 0.157435 0.0331681 4.55657828344948e-05 0.00318684629739834 0.000431444673078984 -1 NA NA NA 4.55657828344948e-05 0.00318684629739834 0.000431444673078984 -1 NA NA NA 2.30538610436447e-09 2.6805498999232e-05 5.059013531817e-05 5.05900962554694e-05 5.05899572315649e-05 5.05897803915044e-05 5.05896517792012e-05 5.0589656828871e-05 5.05898513725928e-05 5.05902415684262e-05 7.66614930776059e-10 5.10995658072454e-06 5.03852061861102e-06 5.04382160905973e-06 5.05128657355206e-06 5.05874533200358e-06 5.06460140973759e-06 5.06772529294742e-06 5.06735321256222e-06 5.06300559036585e-06 1.76272160940659e-05 0.00125143232076605 0.00103343253982729 0.00103422980064459 0.00103535302842997 0.00103647526981979 0.0010373559353431 0.00103782480336434 0.00103776684453523 0.00103710997247525 0.407286288005157 -0.286181936403061 0.142757797463833 0.142796722497035 0.142850225803715 0.142903234516531 0.142945217211903 0.142969026586909 0.142969760675168 0.142943868591193 +AB1_EBV chr6:1412516-4412516 6:2913933:G:A chr6_2913933_G_A chr6 2913933 G A 0.002884 -0.0552494 0.28875 0.848259 -2.61276804153581e-06 0.0032733607704216 0.000271474463967358 -1 NA NA NA -2.61276804153581e-06 0.0032733607704216 0.000271474463967358 -1 NA NA NA 2.39012529715526e-10 1.17947687710751e-05 3.2516816670443e-05 3.25015864228945e-05 3.24800841838334e-05 3.24585740548146e-05 3.24416903315117e-05 3.24327282233183e-05 3.24339151204045e-05 3.2446631739486e-05 -1.29384107884142e-11 -1.92414475066341e-07 -3.01585412168728e-07 -3.01865121378649e-07 -3.02253440974626e-07 -3.02639599248237e-07 -3.02942240776185e-07 -3.0310396405733e-07 -3.03086152715454e-07 -3.02864696739472e-07 4.48984746872096e-06 0.000937747091442122 0.00110769083204224 0.00110801250403537 0.00110846483835581 0.00110891534026423 0.00110926755788719 0.00110945402956772 0.00110942937943025 0.00110916474980723 -1.85920118456897 -1.10713288072139 -0.299241414111716 -0.299670207306508 -0.300275749949801 -0.300881721012933 -0.301357493587462 -0.301610075005957 -0.301576591443828 -0.301218195436843 +AB1_EBV chr6:1412516-4412516 6:2914065:C:T chr6_2914065_C_T chr6 2914065 C T 0.068155 -0.0898974 0.0585507 0.124691 -4.55635943984205e-06 0.000739866387618746 0.000292226090615189 -1 NA NA NA -4.55635943984205e-06 0.000739866387618746 0.000292226090615189 -1 NA NA NA 7.9672360969625e-10 1.33992478416794e-05 3.49075399455961e-05 3.48931938639127e-05 3.48729310299871e-05 3.48526823296835e-05 3.4836842233479e-05 3.48285391892714e-05 3.48298863700927e-05 3.48421872513886e-05 -7.20626698041368e-11 -3.76640606337197e-07 -5.21489879796873e-07 -5.21762991004918e-07 -5.22146662381582e-07 -5.22532208711195e-07 -5.22840636116295e-07 -5.23016665719313e-07 -5.23022840888174e-07 -5.22834886216702e-07 3.03066573362912e-06 0.000226971358502165 0.000248648293365666 0.000248740065882767 0.000248869189393097 0.00024899818088449 0.000249099682591987 0.000249154498042909 0.000249149679517198 0.000249076993648462 -0.655209330404995 -0.979590415247006 -0.228295955476589 -0.228667316499166 -0.229191944030024 -0.229716250733307 -0.230126316235066 -0.230340975980472 -0.230305407785422 -0.229985904820172 +AB1_EBV chr6:1412516-4412516 6:2914194:A:T chr6_2914194_A_T chr6 2914194 A T 0.00109866 0.110877 0.477429 0.816353 2.7396613039195e-05 0.00576889745077796 0.000283171893704526 -1 NA NA NA 2.7396613039195e-05 0.00576889745077796 0.000283171893704526 -1 NA NA NA 2.41209623129335e-10 1.27864463223858e-05 3.38532563572749e-05 3.38386411599051e-05 3.38180096087545e-05 3.37973688864156e-05 3.37811670182675e-05 3.37725691856805e-05 3.37737148124725e-05 3.37859275631042e-05 2.65158009872724e-11 2.34098095477853e-06 3.12591812671548e-06 3.12766098049386e-06 3.13011841781008e-06 3.13256955050937e-06 3.13448944903574e-06 3.13551052109538e-06 3.13538478299653e-06 3.13395373995902e-06 7.5232896894886e-06 0.00173938842606516 0.00194239563817294 0.0019430608545397 0.00194399730246343 0.00194493075990924 0.00194566117493293 0.00194604829464393 0.00194599767055386 0.00194544938818315 -1.85005079976932 -1.02640326055915 -0.258963611281714 -0.259355729380369 -0.259909367859147 -0.260463396969321 -0.260898369481019 -0.261129207875246 -0.261098398065117 -0.260770463217731 +AB1_EBV chr6:1412516-4412516 6:2914212:A:G chr6_2914212_A_G chr6 2914212 A G 0.000394946 -0.38577 0.705803 0.584676 -7.44339347826138e-05 0.00955359514397322 0.000306979642194949 -1 NA NA NA -7.44339347826138e-05 0.00955359514397322 0.000306979642194949 -1 NA NA NA 2.72783652698342e-10 1.48189846981526e-05 3.65695054388094e-05 3.65567584070366e-05 3.6538753489373e-05 3.65207283449848e-05 3.65065684534308e-05 3.6499044020748e-05 3.65000288380522e-05 3.65106793034488e-05 -1.04259352605892e-10 -6.69676778652577e-06 -8.44708051792153e-06 -8.45288940020747e-06 -8.46106900742361e-06 -8.46922407475e-06 -8.47560366041431e-06 -8.47897779681471e-06 -8.47851789772401e-06 -8.47370038147977e-06 1.31352569982239e-05 0.00309901695153985 0.00319013108056283 0.00319155667504451 0.00319356441400493 0.00319556700346377 0.00319713453525078 0.00319796460227068 0.0031978534339785 0.00319667259720857 -1.72703816066681 -0.878879880785989 -0.181784097253166 -0.182093030823458 -0.182529420330056 -0.182966353574209 -0.183309625847713 -0.18349204928215 -0.183468179071459 -0.183210033274174 +AB1_EBV chr6:1412516-4412516 6:2914375:A:C chr6_2914375_A_C chr6 2914375 A C 0.449913 0.073551 0.029878 0.0138277 1.32216141683604e-05 0.000761528659714504 0.000536628417467266 -1 NA NA NA 1.32216141683604e-05 0.000761528659714504 0.000536628417467266 -1 NA NA NA 3.83151076045243e-09 4.30887802086399e-05 6.16439638151432e-05 6.1662884637203e-05 6.16893492695819e-05 6.17155657828099e-05 6.17358050208852e-05 6.17460236951692e-05 6.17435083053162e-05 6.17265813132754e-05 2.67335670903368e-10 1.95755214840349e-06 1.40105574913152e-06 1.40306874183386e-06 1.40590161913239e-06 1.40872631846429e-06 1.41093060082874e-06 1.41207967234115e-06 1.41188033327313e-06 1.4101516492809e-06 4.68962630555799e-06 0.000351002342680615 0.000238095951014486 0.000238338134818822 0.000238679108788393 0.000239019238319394 0.000239284959287019 0.000239424093383826 0.000239401448933722 0.000239195231478321 0.915297298109937 0.188473653824918 0.340376508638551 0.340723095858899 0.341208437665329 0.341689827993623 0.342062243003785 0.342251461519015 0.342207611579621 0.34189981886268 +AB1_EBV chr6:1412516-4412516 6:2914401:G:A chr6_2914401_G_A chr6 2914401 G A 0.0503909 0.0677083 0.0674605 0.315536 3.37903989382671e-05 0.00184438530310166 0.000575749305072093 -1 NA NA NA 3.37903989382671e-05 0.00184438530310166 0.000575749305072093 -1 NA NA NA 3.50376911289106e-10 4.96758289149142e-05 6.56915862278989e-05 6.57172605597296e-05 6.57532207183963e-05 6.57887312841917e-05 6.58158891851144e-05 6.58290990600566e-05 6.58245890958145e-05 6.58000155287485e-05 2.09034212952765e-11 5.36761997901145e-06 3.53441819355873e-06 3.53981002719482e-06 3.54739911063546e-06 3.55495242620422e-06 3.5608180386453e-06 3.56382125348346e-06 3.56316872122301e-06 3.55837028488939e-06 1.67453037152077e-06 0.000883963392274344 0.000570211614939072 0.000570825147685962 0.000571688993428737 0.000572549442921174 0.00057321911261408 0.000573564973504713 0.000573497202384529 0.000572960109854784 -1.47670769626281 0.330729485650394 0.40397204815025 0.404402500327367 0.405005797061682 0.405602213754799 0.406059458283852 0.406283857214265 0.406212233246936 0.405805239605961 +AB1_EBV chr6:1412516-4412516 6:2914502:C:G chr6_2914502_C_G chr6 2914502 C G 0.00245766 0.209088 0.306687 0.495389 3.9047537953121e-05 0.00438973837463217 0.000320070210425683 -1 NA NA NA 3.9047537953121e-05 0.00438973837463217 0.000320070210425683 -1 NA NA NA 2.93611457820829e-10 1.6136632346308e-05 3.80386339173337e-05 3.80267475798636e-05 3.80099629270098e-05 3.79931496362746e-05 3.79799255720913e-05 3.79728724915218e-05 3.7973735146357e-05 3.79835850669857e-05 5.95215988605005e-11 3.70917890764339e-06 4.40649934645662e-06 4.40962790391042e-06 4.41403798711645e-06 4.41843356693725e-06 4.42186713149864e-06 4.42367232750498e-06 4.42339916298949e-06 4.42076209746486e-06 6.24753246060531e-06 0.00148490108672485 0.00145806488870796 0.0014587742047661 0.00145977378640281 0.00146077082315774 0.001461550840378 0.00146196280796287 0.0014619048298717 0.00146131262253826 -1.65345983946009 -0.793697000984798 -0.142396480346853 -0.142669312665948 -0.143054549454361 -0.143440482085462 -0.143744082152327 -0.143906095325838 -0.143886489351122 -0.143660740246291 +AB1_EBV chr6:1412516-4412516 6:2914521:C:A chr6_2914521_C_A chr6 2914521 C A 0.20614 0.0703608 0.0371003 0.0578935 -6.93534828254422e-06 0.000613286361002598 0.000359322953560004 -1 NA NA NA -6.93534828254422e-06 0.000613286361002598 0.000359322953560004 -1 NA NA NA 1.43860220089813e-09 2.00500835682009e-05 4.24379611194419e-05 4.24317697338114e-05 4.24229792199924e-05 4.24141112696858e-05 4.2407050279112e-05 4.24031542841006e-05 4.24033531754856e-05 4.24082265653458e-05 1.00425433816151e-10 -7.24534959424452e-07 -7.73748294791053e-07 -7.74509417674381e-07 -7.75580896950382e-07 -7.76648408012684e-07 -7.7748073561931e-07 -7.77914685709095e-07 -7.77840636185839e-07 -7.77190673610843e-07 2.99064346024811e-06 0.000225325439176917 0.000201202997636832 0.00020133605363049 0.000201523476548863 0.000201710396361092 0.000201856448338797 0.000201933123478497 0.000201921237009441 0.0002018088618075 -0.0642899333933222 -0.576555667676539 -0.0329555345288934 -0.0330617406250537 -0.0332126789418901 -0.0333652329989018 -0.0334871992913355 -0.0335553653514253 -0.0335537863048128 -0.0334724685389816 +AB1_EBV chr6:1412516-4412516 6:2914554:C:T chr6_2914554_C_T chr6 2914554 C T 0.135995 0.038435 0.0429496 0.370848 6.47053539247435e-05 0.00223777734319599 0.00118259558244027 -1 NA NA NA 6.47053539247435e-05 0.00223777734319599 0.00118259558244027 -1 NA NA NA 3.18689749020099e-10 0.000179061771950524 0.000125039542732018 0.000125179499792941 0.000125376975572424 0.000125573802098581 0.000125726856413785 0.000125805389205695 0.000125788588193547 0.000125663596335013 1.05757319394502e-11 1.69376568691927e-05 5.92538130863974e-06 5.93861193914095e-06 5.95730092452391e-06 5.97597060943443e-06 5.99054552494914e-06 5.99811035205237e-06 5.99669074573156e-06 5.98507507534674e-06 9.6175766833845e-07 0.00137701733722103 0.000620409140951185 0.000621347699584995 0.00062267242076679 0.000623995031064763 0.000625027784623004 0.000625565655334832 0.000625469940626079 0.000624654936818488 -1.57149910479072 1.61294185159444 1.04763122349573 1.04878959712318 1.05042214635872 1.05204729742902 1.05330991974445 1.05395806450647 1.0538213965256 1.05279363140955 +AB1_EBV chr6:1412516-4412516 6:2914711:C:CCT chr6_2914711_C_CCT chr6 2914711 C CCT 0.320268 0.107084 0.0314276 0.000656009 1.00794454196126e-05 0.000672864308448527 0.000452688558775338 -1 NA NA NA 1.00794454196126e-05 0.000672864308448527 0.000452688558775338 -1 NA NA NA 6.11914659554838e-08 3.08908269105225e-05 5.27123673074265e-05 5.27172540270212e-05 5.27239006708949e-05 5.27303941178295e-05 5.27353567648797e-05 5.27378404526941e-05 5.27372196452797e-05 5.27331031379703e-05 6.3388208695703e-09 1.27285056200579e-06 1.09552090789256e-06 1.09683247897076e-06 1.0986735500374e-06 1.10050887668888e-06 1.10194389643366e-06 1.10269929246762e-06 1.10258691219298e-06 1.10149012205341e-06 2.67517074212337e-05 0.000282191907234753 0.000215119636993929 0.0002153036373749 0.000215562279839644 0.000215820285181988 0.000216022181983816 0.000216128707906801 0.000216113399346476 0.000215959815439381 3.68604585289092 -0.144329713316701 0.18385129614376 0.18398369439256 0.184166018796946 0.184345674945186 0.184484309070744 0.184555114723677 0.18454023165196 0.184428566659044 +AB1_EBV chr6:1412516-4412516 6:2914809:ATT:A chr6_2914809_ATT_A chr6 2914809 ATT A 0.000447573 -0.408113 0.842956 0.628283 -7.51047265041005e-05 0.0109514034494035 0.00029793787915644 -1 NA NA NA -7.51047265041005e-05 0.0109514034494035 0.00029793787915644 -1 NA NA NA 2.64208065930658e-10 1.40086248601729e-05 3.5542574882971e-05 3.55291586107761e-05 3.55102126508247e-05 3.54912513681538e-05 3.54763622448605e-05 3.54684571175488e-05 3.54695050079257e-05 3.54807223963272e-05 -1.07011080440611e-10 -6.57193498994277e-06 -8.54707725731676e-06 -8.5527244378763e-06 -8.5606772789815e-06 -8.56860768545149e-06 -8.57481477742161e-06 -8.57810429112542e-06 -8.57767155549499e-06 -8.57300721940919e-06 1.50551291235237e-05 0.00345722524180414 0.0036686736517724 0.00367018759650938 0.00367231949345491 0.00367444568572225 0.0036761100317613 0.00367699193386601 0.00367687539704228 0.0036756240603886 -1.75898023945295 -0.935115788042269 -0.210267535254518 -0.210605379803196 -0.211082521692463 -0.211560127054884 -0.211935205428116 -0.212134348712334 -0.212107916286927 -0.211825316827286 +AB1_EBV chr6:1412516-4412516 6:2914984:C:T chr6_2914984_C_T chr6 2914984 C T 0.000287449 0.354267 0.999662 0.723049 2.703077715997e-05 0.0114938828249797 0.00027396398143209 -1 NA NA NA 2.703077715997e-05 0.0114938828249797 0.00027396398143209 -1 NA NA NA 2.49377474437249e-10 1.20122914886385e-05 3.28004061135531e-05 3.27853154081789e-05 3.2764010103102e-05 3.27426964355368e-05 3.27259666666453e-05 3.27170867158878e-05 3.27182643369172e-05 3.2730867781985e-05 8.56525709482312e-11 2.27070858295486e-06 3.08874315563543e-06 3.09055516420125e-06 3.09310137612001e-06 3.09563882057952e-06 3.09762571652866e-06 3.09868271811578e-06 3.09855490771735e-06 3.09708106554615e-06 1.65152603369177e-05 0.00333543589553106 0.00388477309447274 0.00388594889741285 0.00388760268835554 0.00388925012559928 0.00389053841161184 0.00389122064397984 0.00389113073047462 0.00389016315885473 -1.81674945056399 -1.08885857313042 -0.290557908812972 -0.290978394291082 -0.291572197093555 -0.29216642522216 -0.292632977426566 -0.292880647345178 -0.292847765353393 -0.292496233211677 +AB1_EBV chr6:1412516-4412516 6:2915098:G:A chr6_2915098_G_A chr6 2915098 G A 0.00527826 0.0543171 0.215615 0.801105 -9.76874223221042e-06 0.00254577990784939 0.000278829707978434 -1 NA NA NA -9.76874223221042e-06 0.00254577990784939 0.000278829707978434 -1 NA NA NA 2.42262873180657e-10 1.23875645887906e-05 3.33603046810113e-05 3.33456713272467e-05 3.33250067920336e-05 3.330432922802e-05 3.32880930729057e-05 3.32794671508043e-05 3.32805938368856e-05 3.32928003399584e-05 1.295652743051e-11 -8.03849430033876e-07 -1.11801393012677e-06 -1.11877089860112e-06 -1.11983380012676e-06 -1.12089149424497e-06 -1.12171656441473e-06 -1.12214929538475e-06 -1.12208221811762e-06 -1.12144755768726e-06 3.41909754540966e-06 0.000751860547074559 0.000858931641245195 0.000859218031145905 0.000859620868089426 0.000860022209225979 0.000860336005987524 0.000860501937197444 0.000860479407270481 0.00086024269987228 -1.84569377129065 -1.05809587573848 -0.273632095064505 -0.274031139810482 -0.274594787229997 -0.275158957447462 -0.27560206020752 -0.275837513584008 -0.275806770292984 -0.275473667121246 +AB1_EBV chr6:1412516-4412516 6:2915224:C:T chr6_2915224_C_T chr6 2915224 C T 0.0682216 -0.0937519 0.0584923 0.108977 -5.20821122150971e-06 0.000759432459797402 0.000297929467887204 -1 NA NA NA -5.20821122150971e-06 0.000759432459797402 0.000297929467887204 -1 NA NA NA 8.86204646290237e-10 1.38755065518203e-05 3.55598635899948e-05 3.55458691901148e-05 3.55261005201324e-05 3.55063481147054e-05 3.5490904371254e-05 3.54828262789226e-05 3.54841786182658e-05 3.54962335818677e-05 -8.34908632156713e-11 -4.3910943201162e-07 -5.94966695873379e-07 -5.95295798613142e-07 -5.9575829500644e-07 -5.96222632316608e-07 -5.96593086246553e-07 -5.96802610781167e-07 -5.96805807681173e-07 -5.96573372116411e-07 3.29034782342141e-06 0.000237055757649994 0.000254735053338896 0.000254834640236507 0.000254974779840339 0.000255114813479599 0.000255225044981329 0.000255284621984149 0.000255279476045557 0.000255200670056554 -0.548769259026239 -0.944663822205111 -0.209781231050728 -0.21013515614902 -0.210635204852288 -0.211134852203649 -0.211525379115496 -0.211729305677665 -0.211694305328359 -0.211388240157156 +AB1_EBV chr6:1412516-4412516 6:2915401:C:T chr6_2915401_C_T chr6 2915401 C T 0.0200603 0.111735 0.106574 0.294442 2.65891628161913e-05 0.0020077713393796 0.00040294679285513 -1 NA NA NA 2.65891628161913e-05 0.0020077713393796 0.00040294679285513 -1 NA NA NA 4.07700965734278e-10 2.44724106421007e-05 4.73314047000324e-05 4.73276541540497e-05 4.73223445037227e-05 4.7316985754982e-05 4.73127293219974e-05 4.73104211616356e-05 4.73106421248228e-05 4.73137418639691e-05 4.51133110511089e-11 2.97815421029617e-06 2.94157430655294e-06 2.94440721491119e-06 2.94840324770975e-06 2.95239262933605e-06 2.95551581206691e-06 2.95716627297094e-06 2.95693551488462e-06 2.95456849415166e-06 3.08439022087468e-06 0.000781076959890063 0.000652341106383485 0.000652801889347646 0.000653451763617018 0.000654100709838075 0.000654609010407683 0.000654877985192742 0.000654841122017456 0.000654456890573819 -1.32518318215438 -0.377242602623381 0.0761752166918708 0.0761356706107117 0.0760797265014599 0.0760229850739513 0.075977550276229 0.0759524734576429 0.0759540325158889 0.0759859442442865 +AB1_EBV chr6:1412516-4412516 6:2915430:G:T chr6_2915430_G_T chr6 2915430 G T 0.246598 0.100933 0.0338728 0.0028847 1.43993571296611e-06 0.000398239121275289 0.000277947681035617 -1 NA NA NA 1.43993571296611e-06 0.000398239121275289 0.000277947681035617 -1 NA NA NA 1.95359444118897e-08 1.22955320476787e-05 3.32598606505559e-05 3.32449896815547e-05 3.32239748206454e-05 3.32029577224263e-05 3.31864839176995e-05 3.31777877703217e-05 3.317905715224e-05 3.31916490750607e-05 1.94451230905899e-09 1.15787296945157e-07 1.64947440322177e-07 1.6504072388084e-07 1.6516985957693e-07 1.6529933656796e-07 1.65403292068897e-07 1.65463735006879e-07 1.65468712156058e-07 1.65410804132149e-07 1.46776813293119e-05 0.000116878731773104 0.000134349021559498 0.000134392183985858 0.000134452796106265 0.000134513232529988 0.000134560645405559 0.000134586041806141 0.000134583378036163 0.000134548827726025 2.54429419021292 -1.06555304117893 -0.276647521581845 -0.277055038984506 -0.277631108644985 -0.278207392851923 -0.278659145756337 -0.278897509364558 -0.27886236153174 -0.278516524325726 +AB1_EBV chr6:1412516-4412516 6:2915652:C:G chr6_2915652_C_G chr6 2915652 C G 0.00124761 -0.701166 0.415776 0.091717 -0.00027199989545806 0.0132006112124618 0.000678245680799772 -1 NA NA NA -0.00027199989545806 0.0132006112124618 0.000678245680799772 -1 NA NA NA 9.39384588742169e-10 6.37939353453445e-05 7.671516910991e-05 7.67489388243181e-05 7.67965239366717e-05 7.68440271393261e-05 7.68812340125626e-05 7.69009327205877e-05 7.6898261707839e-05 7.68701825591455e-05 -6.42735476372314e-10 -4.60272210722758e-05 -2.81010998919438e-05 -2.81430624371292e-05 -2.82022796605134e-05 -2.82614948535306e-05 -2.83079517080839e-05 -2.8332590368358e-05 -2.83292911595377e-05 -2.82942615712114e-05 2.44607892199364e-05 0.00655992902413388 0.00403496960382416 0.00403931933825822 0.004045455705724 0.00405158954622103 0.00405640032779591 0.00405895155185838 0.00405861086096073 0.00405498528629517 -0.490492192682643 0.580869139139011 0.559100655149265 0.559580451515449 0.560256520781342 0.560931393109285 0.561459987811208 0.561739887129169 0.56170204193343 0.561303223541298 +AB1_EBV chr6:1412516-4412516 6:2915708:C:T chr6_2915708_C_T chr6 2915708 C T 0.268358 0.0578417 0.0337544 0.0866024 -1.55092260036492e-06 0.000399053682974076 0.000279038690448874 -1 NA NA NA -1.55092260036492e-06 0.000399053682974076 0.000279038690448874 -1 NA NA NA 9.99918434915411e-10 1.243560802191e-05 3.3379819889901e-05 3.33653505888417e-05 3.33449084962539e-05 3.33244451596598e-05 3.33083654268511e-05 3.32998042428375e-05 3.33008838108988e-05 3.33129225685633e-05 5.67792470097502e-11 -1.31052851840977e-07 -1.77031336788425e-07 -1.77166611653612e-07 -1.77355750962205e-07 -1.77543235607044e-07 -1.77688395240212e-07 -1.77762765137038e-07 -1.77747460064138e-07 -1.77630972318274e-07 2.08244294300018e-06 0.000118345449793908 0.000134579726341672 0.000134625602728603 0.000134690086743743 0.000134754291173147 0.000134804427965216 0.000134830835004099 0.000134827025019115 0.000134788913447476 -0.428043450143346 -1.05422501733844 -0.273047283166974 -0.273441154479705 -0.273997765178134 -0.27455513627753 -0.274993248416976 -0.275226599939566 -0.275197292241188 -0.274869447893877 +AB1_EBV chr6:1412516-4412516 6:2915966:T:A chr6_2915966_T_A chr6 2915966 T A 0.00245882 0.210171 0.306692 0.493166 3.92701027237077e-05 0.00439785149583809 0.000320520194884821 -1 NA NA NA 3.92701027237077e-05 0.00439785149583809 0.000320520194884821 -1 NA NA NA 2.94292097139502e-10 1.61795668167075e-05 3.80894059344709e-05 3.80775558887321e-05 3.80608224057336e-05 3.80440601186295e-05 3.80308758920353e-05 3.80238437443115e-05 3.80247031979213e-05 3.80345224615323e-05 5.99684729433109e-11 3.73475040204798e-06 4.43104049842842e-06 4.43419305617215e-06 4.43863698689777e-06 4.44306635199132e-06 4.44652636209089e-06 4.44834552482554e-06 4.44807037697052e-06 4.44541319581013e-06 6.26486940101891e-06 0.00148932064238078 0.00146053952541834 0.00146125220886662 0.00146225654360647 0.00146325832977999 0.0014640420683366 0.00146445600294546 0.00146439774651902 0.00146380270883261 -1.65114435891219 -0.791039850900761 -0.141062621632935 -0.141334084193934 -0.141717387334125 -0.142101387972814 -0.142403474591471 -0.142564688408347 -0.142545197074177 -0.1423206015918 +AB1_EBV chr6:1412516-4412516 6:2916048:T:C chr6_2916048_T_C chr6 2916048 T C 0.0131298 0.210555 0.133103 0.113674 1.16850326021304e-05 0.00172479358807696 0.000297227997039795 -1 NA NA NA 1.16850326021304e-05 0.00172479358807696 0.000297227997039795 -1 NA NA NA 7.32208667558176e-10 1.41088836751237e-05 3.54387090129078e-05 3.54260275712218e-05 3.54080972950051e-05 3.53901497406694e-05 3.5376061220974e-05 3.53685893673649e-05 3.53695947911179e-05 3.53802223669006e-05 1.4525943007589e-10 1.06636629333676e-06 1.32361564115894e-06 1.32468806841809e-06 1.32619359643205e-06 1.32769517608615e-06 1.32887230721187e-06 1.32949842177912e-06 1.32941971911649e-06 1.32853811916089e-06 6.44078982806224e-06 0.000550740041603564 0.000577003314918781 0.000577256969646812 0.00057761377192192 0.000577969673755596 0.000578248449890955 0.000578396376971511 0.000578377155963118 0.000578167815871085 -0.739651622406083 -0.927984342542101 -0.213194107964821 -0.213512316356463 -0.213962325976297 -0.214412827325076 -0.214766473181057 -0.214953998071122 -0.214928682878758 -0.214661860910844 +AB1_EBV chr6:1412516-4412516 6:2916260:G:GT chr6_2916260_G_GT chr6 2916260 G GT 0.341792 0.130362 0.0314309 3.36008e-05 4.16354152320501e-06 0.000467766307919146 0.000321953196178781 -1 NA NA NA 4.16354152320501e-06 0.000467766307919146 0.000321953196178781 -1 NA NA NA 8.59497209082973e-07 1.63936356890018e-05 3.81301738872002e-05 3.81196184125329e-05 3.81046546470492e-05 3.80896300407893e-05 3.80777742469636e-05 3.80713916328134e-05 3.80720403650775e-05 3.80806788282489e-05 1.08143809015176e-07 3.95760299744966e-07 4.56128958390794e-07 4.56516363030521e-07 4.57059793071343e-07 4.57600210214348e-07 4.58020975083182e-07 4.58239807917504e-07 4.58201176240885e-07 4.57870130496292e-07 0.000120149912259846 0.000154870439427054 0.000149886260903151 0.000149965833929052 0.000150077714409609 0.000150189190849025 0.000150276261048877 0.0001503219935621 0.000150314966550464 0.000150248004591374 6.32838569594526 -0.777895797641916 -0.13999287140045 -0.140230039891527 -0.140566413222216 -0.140904285152593 -0.141171069056003 -0.141314994010152 -0.141301065694943 -0.141107798588142 +AB1_EBV chr6:1412516-4412516 6:2916310:C:T chr6_2916310_C_T chr6 2916310 C T 0.017997 0.104869 0.112272 0.350269 -2.06608058363164e-06 0.00127991421863132 0.00027246801160119 -1 NA NA NA -2.06608058363164e-06 0.00127991421863132 0.00027246801160119 -1 NA NA NA 3.49143165505881e-10 1.18679125238671e-05 3.2632254549013e-05 3.26169189667467e-05 3.25952710097641e-05 3.2573613882791e-05 3.25566114476792e-05 3.25475803654816e-05 3.25487625120573e-05 3.25615472502304e-05 3.45132787067508e-11 -1.58827418440601e-07 -2.38338489475163e-07 -2.38361809245648e-07 -2.38395479473152e-07 -2.38427362863984e-07 -2.38449401383151e-07 -2.38456508572611e-07 -2.38445317959044e-07 -2.3841330949699e-07 2.77649313724686e-06 0.000367990900120619 0.000432980305550763 0.000433105021681782 0.000433280448304889 0.000433455130915465 0.000433591631112812 0.00043366377644883 0.000433653963752813 0.00043355098836745 -1.48023510627835 -1.10095065776515 -0.295697603057446 -0.296127968007364 -0.296735640306601 -0.297343782289945 -0.297821363172618 -0.298075088353595 -0.298041879874098 -0.297682774748979 +AB1_EBV chr6:1412516-4412516 6:2916347:C:T chr6_2916347_C_T chr6 2916347 C T 0.00234823 -0.472653 0.319524 0.139076 -7.30572554286086e-05 0.00577508239019442 0.000388321169953576 -1 NA NA NA -7.30572554286086e-05 0.00577508239019442 0.000388321169953576 -1 NA NA NA 6.85520808868472e-10 2.2868798622248e-05 4.57069624728122e-05 4.57020544071268e-05 4.56950926405767e-05 4.56880889393967e-05 4.56825623082732e-05 4.56796131516313e-05 4.56799871004238e-05 4.56841289950174e-05 -3.16876086277397e-10 -7.94803449252908e-06 -8.11205163551567e-06 -8.11973214656119e-06 -8.130556928672e-06 -8.14136422670553e-06 -8.14983138284861e-06 -8.15431809887712e-06 -8.15371694660398e-06 -8.14733269420916e-06 1.46564883619148e-05 0.00220059709444493 0.00188328457825256 0.001884574271071 0.00188639223184881 0.00188820763332014 0.00188963024641197 0.00189038432858177 0.00189028374688367 0.00188921187334649 -0.805538306419943 -0.445015513483716 0.0412518323613069 0.041184142630815 0.0410880529833104 0.040991275198154 0.0409148284635545 0.0408739783371201 0.0408790532144785 0.0409361160965389 +AB1_EBV chr6:1412516-4412516 6:2916373:AG:A chr6_2916373_AG_A chr6 2916373 AG A 0.000633458 0.378783 0.6089 0.533891 6.80076325399599e-05 0.00837578208513209 0.000310595076911557 -1 NA NA NA 6.80076325399599e-05 0.00837578208513209 0.000310595076911557 -1 NA NA NA 2.82344029787875e-10 1.52314386571413e-05 3.69681347368441e-05 3.6955920892769e-05 3.69386700840338e-05 3.69214017424662e-05 3.69078399003114e-05 3.69006400664021e-05 3.69015986878916e-05 3.6911824259152e-05 1.03225036954902e-10 6.28247035175782e-06 7.6961275257194e-06 7.70176953384914e-06 7.7097180876155e-06 7.71764860272493e-06 7.72385925292066e-06 7.72715284259484e-06 7.7267227077645e-06 7.72206040997617e-06 1.1830194013809e-05 0.00276098092003172 0.00279124475241787 0.00279255308801732 0.00279439618701084 0.00279623524135786 0.00279767548721564 0.00279843899835864 0.00279833846850423 0.00279725601392684 -1.69259086954052 -0.851427364906821 -0.170942487387372 -0.171233233235637 -0.171643885040182 -0.17205497689485 -0.172377835971572 -0.172549221446217 -0.172526354760116 -0.172282894380725 +AB1_EBV chr6:1412516-4412516 6:2917032:A:AGT chr6_2917032_A_AGT chr6 2917032 A AGT 0.0680982 -0.0965194 0.0586032 0.0995589 -5.67228412404672e-06 0.00077565669488866 0.00030211835616889 -1 NA NA NA -5.67228412404672e-06 0.00077565669488866 0.00030211835616889 -1 NA NA NA 9.52600995304864e-10 1.42308031348723e-05 3.6038247010799e-05 3.60245211065824e-05 3.60051298055076e-05 3.59857562531494e-05 3.59706146822702e-05 3.59627073189676e-05 3.59640618618359e-05 3.59759265813746e-05 -9.23290858444292e-11 -4.84690774651301e-07 -6.47133510036717e-07 -6.47503854785806e-07 -6.48024417118163e-07 -6.48546824271423e-07 -6.48963039774495e-07 -6.49197352402227e-07 -6.49198512506053e-07 -6.48933509414694e-07 3.48472222779747e-06 0.000245061586619285 0.000259819386500546 0.000259924962621006 0.000260073545200983 0.00026022203960888 0.000260338956286304 0.00026040217319824 0.000260396762721103 0.000260313248205561 -0.476521027518269 -0.919380139346068 -0.196418014179931 -0.196759260029226 -0.197241434204558 -0.197723152303706 -0.198099481564319 -0.198295624588887 -0.198261071517732 -0.197964826024948 +AB1_EBV chr6:1412516-4412516 6:2917176:C:T chr6_2917176_C_T chr6 2917176 C T 0.00910822 -0.09442 0.158339 0.550964 -5.41421170898064e-06 0.00183684381811164 0.0002756075401944 -1 NA NA NA -5.41421170898064e-06 0.00183684381811164 0.0002756075401944 -1 NA NA NA 2.84232871122056e-10 1.21041071443067e-05 3.29944702128207e-05 3.29793530034869e-05 3.29580109574804e-05 3.29366660533178e-05 3.29199227122373e-05 3.29110558340144e-05 3.29122790564568e-05 3.29249636459084e-05 -2.75059829382995e-11 -4.25657770042768e-07 -6.22568345957953e-07 -6.22850021454831e-07 -6.23246251132018e-07 -6.23644423514365e-07 -6.23962855072759e-07 -6.24144638057899e-07 -6.24151420349237e-07 -6.23958477415872e-07 3.10160324945875e-06 0.000534191677338166 0.000620696835564093 0.000620886366756455 0.000621152978432888 0.000621418734081884 0.000621626868351387 0.000621737656185905 0.000621724366873825 0.000621570005169982 -1.68592328980415 -1.08124416098852 -0.284658827520475 -0.285077409368548 -0.285668501050224 -0.28625984591446 -0.286723799812775 -0.286969473406826 -0.286935418001979 -0.286583691244332 +AB1_EBV chr6:1412516-4412516 6:2917233:T:C chr6_2917233_T_C chr6 2917233 T C 0.24434 0.0999066 0.0339502 0.00325327 1.17275027333658e-06 0.000394264827547286 0.000275700504050569 -1 NA NA NA 1.17275027333658e-06 0.000394264827547286 0.000275700504050569 -1 NA NA NA 1.75632579654595e-08 1.21147943333276e-05 3.30022108794099e-05 3.29872041986018e-05 3.29660016367499e-05 3.29447963476298e-05 3.29281712886146e-05 3.29193868298062e-05 3.29206481604014e-05 3.293332313767e-05 1.73093484856806e-09 9.27937305938899e-08 1.34512791773872e-07 1.34587916993572e-07 1.34691492371402e-07 1.34795501405842e-07 1.34879543304851e-07 1.34929456975357e-07 1.34935875093499e-07 1.34893029975727e-07 1.37970324369143e-05 0.000114686143418646 0.00013313386124762 0.000133175189798825 0.000133233233124206 0.000133291095415736 0.00013333646639927 0.000133360733104092 0.000133358107471219 0.000133324921510813 2.43784722260616 -1.08036161148688 -0.284424250069105 -0.284839373749135 -0.28542608014791 -0.286013030071064 -0.286473266322151 -0.286716368748884 -0.286681165173086 -0.286329828223755 +AB1_EBV chr6:1412516-4412516 6:2917240:G:T chr6_2917240_G_T chr6 2917240 G T 0.019097 -0.159466 0.109105 0.143855 -1.06007158704344e-05 0.0014462718648323 0.000302294232483735 -1 NA NA NA -1.06007158704344e-05 0.0014462718648323 0.000302294232483735 -1 NA NA NA 6.26153740838257e-10 1.44071633015345e-05 3.60294952601571e-05 3.60183204861313e-05 3.60025374024845e-05 3.59867629945237e-05 3.5974414300767e-05 3.59679229569778e-05 3.59689346483901e-05 3.5978469812833e-05 -9.45614227094562e-11 -9.42774140466601e-07 -1.20310496862918e-06 -1.2042899547144e-06 -1.20596013986471e-06 -1.20763224967048e-06 -1.20895012347198e-06 -1.20966170492439e-06 -1.20959718085394e-06 -1.20865084641601e-06 4.64312917459456e-06 0.000463145208134987 0.000483564555405223 0.000483806510448037 0.000484147448334549 0.000484488181541861 0.000484755810549468 0.000484898864214593 0.000484882652700948 0.00048468546985797 -0.896121227373675 -0.907063455021076 -0.196660889852437 -0.196931397061284 -0.19731343773209 -0.197695176587837 -0.19799385596227 -0.198150606034225 -0.198125590256057 -0.1978941359327 +AB1_EBV chr6:1412516-4412516 6:2917276:C:T chr6_2917276_C_T chr6 2917276 C T 0.322012 0.110478 0.0313368 0.000422698 1.16203311784586e-05 0.00072476971171035 0.000487556916038523 -1 NA NA NA 1.16203311784586e-05 0.00072476971171035 0.000487556916038523 -1 NA NA NA 9.07661742481001e-08 3.54577585259985e-05 5.64822031250835e-05 5.64917294193233e-05 5.65048911449434e-05 5.65178814771113e-05 5.65279262925007e-05 5.65330884422946e-05 5.65320761040156e-05 5.65240852900257e-05 9.70184583297669e-09 1.55745503383001e-06 1.25118072636648e-06 1.25276882573555e-06 1.25499911161245e-06 1.25722326398891e-06 1.25896298292562e-06 1.25987939423479e-06 1.25974414810072e-06 1.25841584583106e-06 3.35268268988337e-05 0.00031533030047395 0.000229694983463847 0.000229904152209295 0.000230198240225988 0.000230491662996154 0.00023072132284593 0.000230842536310908 0.000230825180163132 0.000230650554226152 4.08032480416118 -0.00644690209912024 0.2529267939592 0.253135137015608 0.253424346235863 0.253710721570567 0.253932958830646 0.254047984566061 0.254026965973055 0.253852000911881 +AB1_EBV chr6:1412516-4412516 6:2917284:G:A chr6_2917284_G_A chr6 2917284 G A 0.000185896 2.17343 1.33618 0.103821 0.000613554969808555 0.0347133916275768 0.000548510584581052 -1 NA NA NA 0.000613554969808555 0.0347133916275768 0.000548510584581052 -1 NA NA NA 8.4773204979451e-10 4.37818021334355e-05 6.30578137241499e-05 6.30722778698699e-05 6.30926174150932e-05 6.31128835806283e-05 6.3128733630544e-05 6.31371227967198e-05 6.31360029299488e-05 6.31240802836088e-05 1.79370456202886e-09 8.949699191577e-05 6.52179650543132e-05 6.53014400084777e-05 6.54191687976645e-05 6.55368291330057e-05 6.56291037498186e-05 6.56780492507771e-05 6.567155176552e-05 6.56020764286465e-05 7.26163392126802e-05 0.0158929736566894 0.0108758088609039 0.0108860145849345 0.010900406902664 0.0109147885991464 0.0109260658449788 0.0109320473330933 0.0109312538622067 0.01092276412053 -0.593152553870173 0.20442926523208 0.363053178106279 0.36332222809547 0.363700907419618 0.364078573063729 0.364374204594942 0.364530795146633 0.364509946503933 0.364287483000271 +AB1_EBV chr6:1412516-4412516 6:2917286:G:A chr6_2917286_G_A chr6 2917286 G A 0.00245976 0.211607 0.306676 0.490192 3.95662131838167e-05 0.00440843832881892 0.000321126231617619 -1 NA NA NA 3.95662131838167e-05 0.00440843832881892 0.000321126231617619 -1 NA NA NA 2.95218797816824e-10 1.62372798357836e-05 3.81578001835832e-05 3.8145998770834e-05 3.81293338499226e-05 3.81126398975393e-05 3.80995090421955e-05 3.80925049378021e-05 3.80933601060031e-05 3.81031383111853e-05 6.0571039331081e-11 3.76880877656163e-06 4.46368651768963e-06 4.4668710239593e-06 4.47136000792566e-06 4.4758343419471e-06 4.47932955082859e-06 4.48116730343495e-06 4.48088951852954e-06 4.47820557190098e-06 6.28796818169363e-06 0.0014951406154105 0.00146376129947197 0.00146447844632928 0.00146548908090669 0.00146649716193881 0.00146728583268409 0.00146770237436583 0.00146764374925418 0.00146704496098117 -1.64800039177406 -0.787479166502865 -0.139268607899495 -0.139538237731458 -0.139918953887551 -0.140300369876369 -0.140600431855955 -0.140760576327229 -0.140741238231277 -0.14051818561178 +AB1_EBV chr6:1412516-4412516 6:2917318:G:A chr6_2917318_G_A chr6 2917318 G A 0.058009 -0.131377 0.0635492 0.0387035 -7.99509123567877e-06 0.00090682772847784 0.000319083731836756 -1 NA NA NA -7.99509123567877e-06 0.00090682772847784 0.000319083731836756 -1 NA NA NA 1.72502156829369e-09 1.62653333570556e-05 3.78964433213557e-05 3.78853134403692e-05 3.78695978162394e-05 3.7853849576543e-05 3.78414495824049e-05 3.7834809455084e-05 3.78355596771822e-05 3.78447033793833e-05 -2.16350082937035e-10 -7.84421219436232e-07 -8.9882425130246e-07 -8.99544960274532e-07 -9.00561224799436e-07 -9.01574155145299e-07 -9.02364627045751e-07 -9.02778153844284e-07 -9.02710464096011e-07 -9.02095829651832e-07 5.82552481850122e-06 0.000310416825672205 0.000300705351122607 0.000300858991487216 0.000301075549948497 0.000301291560771109 0.000301460471977938 0.000301549460046424 0.000301536383244006 0.000301407317389571 0.117277672031676 -0.785752934333665 -0.146141541716107 -0.146395579645013 -0.146754235364545 -0.147113672002027 -0.147396776220942 -0.147548554389052 -0.147531837128308 -0.147323801769661 +AB1_EBV chr6:1412516-4412516 6:2917351:C:T chr6_2917351_C_T chr6 2917351 C T 0.0551008 -0.0252723 0.0655198 0.699705 8.76563114378706e-06 0.000953081915894028 0.000324133599466037 -1 NA NA NA 8.76563114378706e-06 0.000953081915894028 0.000324133599466037 -1 NA NA NA 2.55308349576141e-10 1.64770820887932e-05 3.8498544760935e-05 3.84882018246528e-05 3.84736412993685e-05 3.84590928995726e-05 3.8447696457833e-05 3.84416897867099e-05 3.84425830315233e-05 3.84513011942451e-05 -6.77317053600951e-12 8.35503496407967e-07 9.88349177806006e-07 9.89186716492214e-07 9.90372368094158e-07 9.91559209796093e-07 9.92492417400822e-07 9.92991882622889e-07 9.92935723200553e-07 9.92246925136894e-07 1.11796892423137e-06 0.000324638038118916 0.000316223270509299 0.000316393706294589 0.000316634430626803 0.000316875070328934 0.000317063943593495 0.000317164552651642 0.00031715224106143 0.000317011488610749 -1.79324513204221 -0.772818538324604 -0.130378363475097 -0.130607360246403 -0.130929491861886 -0.131251198503196 -0.131503043786358 -0.131635576107759 -0.131615451449086 -0.131422298142715 +AB1_EBV chr6:1412516-4412516 6:2917734:C:G chr6_2917734_C_G chr6 2917734 C G 0.00245994 0.211399 0.306657 0.490594 3.9524191170592e-05 0.00440673010071969 0.000321045681277887 -1 NA NA NA 3.9524191170592e-05 0.00440673010071969 0.000321045681277887 -1 NA NA NA 2.95091129039371e-10 1.62296994164451e-05 3.81486982592076e-05 3.81368904147737e-05 3.81202164335897e-05 3.81035134521812e-05 3.80903755433228e-05 3.80833677277856e-05 3.80842234498353e-05 3.80940070589232e-05 6.04839396679464e-11 3.76407889292354e-06 4.45904038663407e-06 4.46222044321167e-06 4.46670315578475e-06 4.47117123112336e-06 4.47466154078533e-06 4.47649670406267e-06 4.47621928120657e-06 4.47353905092034e-06 6.28435419419683e-06 0.00149427406313861 0.00146323196856338 0.00146394847452663 0.00146495820496947 0.00146596538275929 0.00146675334579134 0.00146716951316541 0.00146711094031488 0.00146651268904095 -1.64843294008008 -0.787946128299184 -0.13950717013481 -0.139777042428051 -0.140158100645596 -0.140539858357503 -0.140840188064173 -0.141000474072818 -0.140981116047954 -0.140757860005393 +AB1_EBV chr6:1412516-4412516 6:2917881:C:A chr6_2917881_C_A chr6 2917881 C A 0.00116755 0.370974 0.425921 0.383759 3.53736518553954e-05 0.00545300435877033 0.000294806223373634 -1 NA NA NA 3.53736518553954e-05 0.00545300435877033 0.000294806223373634 -1 NA NA NA 3.40527758013535e-10 1.37669358643734e-05 3.51818012726602e-05 3.51682166763565e-05 3.51490289628559e-05 3.51298263665128e-05 3.51147499283518e-05 3.51067488721605e-05 3.51078174978935e-05 3.51191886372793e-05 1.23616403503735e-10 3.09563226436544e-06 4.02551039150873e-06 4.02818215912796e-06 4.03194076257432e-06 4.03568836199183e-06 4.03862275252425e-06 4.04018062501683e-06 4.03998251792172e-06 4.03778840396079e-06 1.02563779922254e-05 0.00170919184224034 0.00182823169391561 0.00182896711066571 0.00183000229100878 0.00183103458991319 0.0018318426997936 0.00183227111349744 0.00183221507839199 0.00183160844974472 -1.50522051711543 -0.952519223989972 -0.220469866027951 -0.22081636908141 -0.221305864754931 -0.221795829164183 -0.222180559793347 -0.222384730765843 -0.222357403332269 -0.222067168807617 +AB1_EBV chr6:1412516-4412516 6:2917970:G:A chr6_2917970_G_A chr6 2917970 G A 0.062377 -0.00141246 0.0610677 0.981547 1.27354036804579e-05 0.00105882371419752 0.00037421893669809 -1 NA NA NA 1.27354036804579e-05 0.00105882371419752 0.00037421893669809 -1 NA NA NA 2.35118899459539e-10 2.16104077010826e-05 4.41025022891234e-05 4.40971333598514e-05 4.40895829848684e-05 4.40820023943699e-05 4.40760067470158e-05 4.40727569685085e-05 4.40730346409258e-05 4.40773046136013e-05 -7.95686349561399e-13 1.37289483938082e-06 1.41555781180205e-06 1.41693236704312e-06 1.41887466937707e-06 1.42081459579131e-06 1.42233290866819e-06 1.42313298367922e-06 1.42301415005418e-06 1.42185015034832e-06 9.26739669098075e-07 0.000398779291250974 0.000345973236210372 0.000346207904069368 0.000346539213308086 0.000346870131954123 0.00034712928751056 0.000347266191582495 0.000347246721450257 0.000347049466693444 -1.87562581937529 -0.50161395275396 0.00551771645585486 0.00543566869900669 0.00532068394824137 0.00520523735131473 0.00511374182841173 0.00506371743170675 0.0050669063026163 0.00513018064624138 +AB1_EBV chr6:1412516-4412516 6:2917981:C:T chr6_2917981_C_T chr6 2917981 C T 0.0106991 -0.179925 0.144395 0.212743 -1.46565807176174e-05 0.00193638244665908 0.000304691406547875 -1 NA NA NA -1.46565807176174e-05 0.00193638244665908 0.000304691406547875 -1 NA NA NA 4.83699583433394e-10 1.47995421524578e-05 3.62857206053768e-05 3.62729810870845e-05 3.62550074572633e-05 3.62370233763348e-05 3.6222902803972e-05 3.62154049104344e-05 3.62163926891588e-05 3.62270121787056e-05 -8.29883274842750e-11 -1.36433698015565e-06 -1.65744443378711e-06 -1.6586210465356e-06 -1.66028304758396e-06 -1.66194249736079e-06 -1.66324242346387e-06 -1.66393134904925e-06 -1.66383901756174e-06 -1.66285693379194e-06 4.90772202982209e-06 0.000633019966572649 0.000646002662206916 0.000646290010074167 0.000646695172156275 0.00064709951297063 0.000647416170251992 0.000647583981357366 0.000647561655030434 0.000647323077262763 -1.15425314209433 -0.880192744763693 -0.189574513510048 -0.189885967007717 -0.190325348593937 -0.190765011444355 -0.191110234953281 -0.191293539992217 -0.191269376694 -0.191009801374269 +AB1_EBV chr6:1412516-4412516 6:2918284:C:A chr6_2918284_C_A chr6 2918284 C A 0.000833908 -0.556691 0.523047 0.287183 -7.44030003889521e-05 0.00777112881458192 0.00032953541860814 -1 NA NA NA -7.44030003889521e-05 0.00777112881458192 0.00032953541860814 -1 NA NA NA 4.06846382975239e-10 1.69954040133585e-05 3.91104286633995e-05 3.90998019339285e-05 3.90847734770854e-05 3.90697107981359e-05 3.90578611687294e-05 3.90515458445335e-05 3.90523375456124e-05 3.90612055195211e-05 -2.20524022350319e-10 -7.18967998108677e-06 -8.37893111831845e-06 -8.38551510128346e-06 -8.39478151687654e-06 -8.40401980457779e-06 -8.41124403165043e-06 -8.41505667879456e-06 -8.41451703711257e-06 -8.40903459522916e-06 1.51066998856523e-05 0.00268035395968752 0.0025741739876467 0.00257554281723802 0.00257747058093055 0.00257939380192304 0.00258089918016679 0.00258169547515111 0.00258158646111776 0.00258044903598945 -1.32728148392639 -0.741846034061399 -0.114609656474713 -0.114841707112959 -0.115169891099532 -0.115498845938077 -0.1157576615097 -0.115895656525368 -0.115878494926097 -0.115685046477238 diff --git a/tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.bgz b/tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.bgz new file mode 100644 index 0000000000000000000000000000000000000000..2f298d48bc31d5441209715a24abefbdc1779c86 GIT binary patch literal 35105 zcmV)ZK&!tWiwFb&00000{{{d;LjnLMUA4W-&LzunH8%eIE?tO-wc^GZO&FYcM0k2Y zi?l63wggeO;kP#`GAnC;M*`e{`v-{b(~DWTFSRQ&E-P02$G`vepa0|Q-~ax<{`0^7 zA7B61*T4Vcpa1stPygG$|Lgz$umAYh|NZY@|Msu{_~-w)|F6IP%fI~nU;eK9Z~OY+ z|N6gt{m*~@kH3EX6?FumAJcKmFtDpZ@XNKmP6S|MbuQ z%Rm3qZ-f2Izy9C9{oDWf@BjQC|IgRI{Qa+g`|aOtuKBI|>mPr+|GZb-fB(}z_MhLb zzp;0V`PbUz{?EVu%fJ0^f7SJut-rAT;`+5+q@^Al&Z~w|~|Eh2Q>hG^E|J@(|d;RTy z`0xLx`-aCKlxnW=gX{0F_=8oKMEv2;fB4HR`E3Kg{rR^Ad~M+C&tHG}l8cHA|B_4F zEb~h)u`YH0(n~Ee{{BPdMq!;Dih@}yZzwHb7E-NRjeC=H5M~H=dw%J{q~X;Fy%&AS zRo12<@QnU%y!Y=`=tXqHif*LD-+zdI;R?wt53PtwJ7BOxP({l=AOm1$dAT3Zvg*dV znFmy`Is#3;mrID4G4yk#BqEpzdcBudWwWHTDx#bB|CC>Ld1|O{8J za6#eeP-bV<0ll3e<%KxA*{sFe*x4Fx*2WaDSvPt;^ji8Upv|uOD)?WJpq516sTx zy}y)ph$D5a()OTcrnQ^)4K^fdL+;jH0>8hO1+|nhQnQE!rL-{^?N#+ao>5tB z>1VG@qFkgX_!*;uus{6q4}X4*3c`N-<8Oa{jS9lP{`mFh+o%wx^Yk^*^UXp;Qos7L zG|#{PP#sf4t^27fy?@nnUjju>D7}HWziYiWXNf_isq4YFa?ygSi^h8L86_}Q1^=6G z{`)1opTqK0^R8GAc3g0Y1YAqga6pSP;k|gCgata=ifWLs1C~{kGbHkWw*{J~&HP;% z>W$Vk{RW~vY4>APXV3ewu=~j?9J6A)FNIp$ebtv;3TUT`v0?i69}>6iEbDgG{o@4= z=&EvC#DejFC6$&%$GrA{t>Uc>v~?U1DEDf+e=7%MsG>?k^j%5XJPs0kS6Z+oit)Vq zzRyxFZt84Z4=kc3#hjIF+a|>56|Ir???UVWL$&QTsGZ3JG8LVP@{R+RSy$b6n)-k# zUDoMHAr5GSccDuqU(hT}pjlS$WYVgy?Y5~25{#!PkZHX?8%NYVT4(b{`V zAyWJ{>KV5g89+t!zWeN;FNB?`l}y!*RPjD3sNriHlk!G51LQ_rBdx4Gv93yXMh3my ze#)G*{NXQu_|u0uY5DChzy0aMoV0xX*>PVyhSlvVB*vdiwDDD^&r^>#>9zv`lNC2l)|b^gcw6`GbJtkpC1fb9qk!)<3D zkV36ejr$>dKvxS(yS*oU!Hy0y;68~TFevw{grnRJcz9;<86oK^?%)BV0xpWQI+(De#cmhSzkcZbC1s|B-rw}SU)={V=n?c~iSA!RZHqVee3=JpAY!2+iAJ$;rCAR?qV+Se8x*h%X4(z(` zPbKacP+8}2@FeVZ5byV}-3>26+##iv>hghaEZ}JklXaHfn2N?7A>60w!EVpZTWOhh9s#YD4r$Ps;qg~{7@$#vl@P$v_xjkrZJYfg&|MR!z&0TK_CQA|l9 zxglcSNczu3Dgl0vR08}Ssoeh1id00pMSMPocvf+O0(Y6DNXqS(-cM-AINr3%&iAOu zG-TXvH%uR=F0IS%2*d0Cq~9O)U$c>2^c>@drj%}@F=m|Jar;M4dcmR#c^m%wtU6#p zt&(yF!3Vs5^hy_a@qiSGEbC~q1KNF4yt_XySDNGBy}5v17;b# zUFqw{T3|Hp2M-!P{@V53Q?iyO*uWgvH!6xifNikeS9wXIS6o4zi^(bRpF7 zs}T2Dv!N8r8)(s3(47@YGE57;q0GKNgx!(jfW(`*1uZ;V#T`A~ft~kH z`58fe4@iRi9*_jN1Cl#sl`GKH+hX4T#rvdMImz1vds12Ms-@B*jP+fEI`6mNGeFC< zB{lEROul;uziJ;|_u@NLzfVTBC$B4lJNkq49n`t#e#H{KGmN-h^vna^4>)R4M>t@~ z_~G|Ka6r|H`_iiDI3SC6IC(p`XHSjW^5yNPHec|L^~&RZ(hi8YfAs;o^J{kH{lb)# zfZTzT>KAW+&ykSU$MMliEbb`xPV!pffQ`-okSX?n_m9Zpejb`K!87nT|X18GwM%CGLZN zfN;Ivu;0JF0qVAE(smasZomW)xAnXu^#Ro2B@$d)m`aE?i+2X)-lC(+-8_&#V;>6g z-K!|bcdw!#$Ey%l+wz#^lip8NQq}I~gT(^Be&luM(QdPOA0}@$PC7Ftkwd-R!u^=u zuAAS#LXiI59r%S?*)^B2Z!f>|@hk}5<|Xc^6OR|*J?B`(?X1NER#UlsSROF_KAf~~ zkLG}$_tWEMk2ijUw9MQ8ka{t=WZhY{w0gmi+aFeyw)g5g@~L7Sx55Q=ThKefdq0zR zFcKH;$12ekC!6@AE}3&m%S>dlDI4|s+qzRf01tX6Ba{(&2XzO%bHa%`b)jQbHk$Vv zfNQtqL2m;Y?w*$Zi3(G^Z(khm4$iQ#SIV{T?ZvyqY#_Bgt~na+Y((&`;M`Br!frRP zOBs}>Rsy>XKil$-_3OdzhZ@nh3mFgQcQ&N&bb#rD-HF)SJC%}iZH9T=$-v}+{Pt>Y zFGda)aEgJhxq>a+KN`zYiFMAm-Z^vXeN@=?25z53=Rv>y#tq#^5%0`(+D3|ZIVn(R zEJ(46Zmiru_?;fjxN_O>cYp-E922 zt?UAew?CvMem~-4Hmm?|(!=ef3~)ai*?p(Yq#p1tOR9Bc$gvQp%sV~j87MoTN8hJG zhn+2JlCyUdiuYpJT|m6^`o}t1B`uWqNSb{S+)_r7!fw*&uy-r0iN z)40>gFIsmJFYnNACs-FMai5d-|K%?lc^6VU+kM$NUSpi@RK#s*KcY~rJ8+Ncw8Db! zT5wwDokj{hSorPV?UdJ4XJMXs8)}>@??8@sCO>!ZaTjde?jlar6K334iS+yc>|K`u z-qAsBPw~bO>37Z2F)6;vxU(ms_xE=H9xu5AHM>6mL|1g+X?`c+?@$rkP~0DDa?dOF zw>w30yMmEjxi74oBO|1CU~@Y%Qh=#<#)d%51EjgFl;1~Z8q+)Q?0!37y3)5lV>Vj7 z{_w{SvvC8z{qe(W+`!i#m)XFb1{eb+MUQni?|U&{`|Y@`lkoPF@2WcTP6xl?Xtmrw zP%BeLp6r8G_EgR9`0w|IWHp{ovYNbvk(izXJjb-Zk|Xq|{xL zV^eTIZQKP5)5??dVCS7v3u}3?Qt`V~V;y@yqdX~- zAGm~e=k$X2@>T8rq1T-ofywPPd#=2}?I9;4Z_nAcAG6V$tq^8+eokLBs)a?;!e6ws zG4EnM_#3?=Lle^KbvejX3VK-+Crp*Z0E8<4vk)p%bh zRB2;BA0h}uL+F%tizjY@s6)$&fv~Y6hz?pQ#oJ?k+uf{~FRo(w0z~f^5-gj6Ic7bx8LW7q0K4GPD?n0@+3?s<3r#@+&rYW7 z=9IEuNYmPb?$!IkjABHJI{atsgi-$Rr;qGL8|Ak@ePlPDReo|a6w2Iv|B%1gy;$Cl;vs1Lr4K@J_jeW{NZigfc-#l|gYp^JW;*yIxS}t__v814`u-5X}SUg(Rz~i3sa_SBfDE zi1mRN*btYd6!JJ5o#9CDR(HeWY?N45T3E^ zZlIK##EXt?V}$#>*^Q!u0#a6GcB4$wqICy9c3@@E#5<`UC}=tFYx6_x()O+d&2r^( zpYuQ_Yaumf`E%RhpiZ<_i_*M6D$-FyvOJ)x1J&Iv@8zKpC2nRL^8|D)qCF+cAA=pxAzpHeqTfR z6&vYgny8Rug=_u&hx8Y&fLcNjT{~3qE4|A+vL(#)0Yd^B0LcCP`<0;?;uRF%B?rGI z9Zn4~7$%e+urp0WEc9iH8C)ttOzC;RG(sA#66bP=CWawTKbJ#u@(I;u{eljM zFl7@j>@sU`s6>TRZ!R;|#F8d>Q=-o54tg+BHiaAASt<>wj9M5)6k4EZll@>}VWEU^ zezGDIRTDs>!&4v3CELnMst=Zmmg1Ze4;Ga!q(lZD44op{4dXHcP;;+>QuE;!Asf|} z$d%Z&$@#O>oy!jts+tL{?iD zSINjG6`HQ?-Nm1=5|I8LkwE%;L;~p(k(g-bGIwT_{*r4M?=csup=YqL{r+_Sx}EHrI)p?ZrsOwXMrn>)?xnpq=OtvDNSe8AKY^DJ ztvzK`*#!??LUSRkl}`zVmr$kXNX3hxJHpLro_Ik-6PeQ6%NUTB#D*BYybQl0+ff#dojr-Etu)&`g+-To(e5auXxOG({)mf`VkV_25q(gl zxN&OJ;V;_GOhY)fKky?i+KPUftS2KosF-Ij*44X17RAM-xU*JqbxOIUw3+J%4|G0S zQ{1C#V`Wp8G;6`;V`!O%n`CJ}#BeQxq%}Q5-*sdQ41{d+uqq7gOi}P$8(Lls_3#f& z7-GS|+0QLFiSQ&RIbF1p2B5L>50MN3jSa~sM=0&urX5wSwlGz1rz5O|u`xPOl1M(} z$Yzx!OPsc!T%kDs$3`kfKl(C>`U zhQ5qYg%j)bL%WAfM`TPTwkm=C`e7HL>a0q!u6Q4Oi$uda1IN{?X$6=ru&DlhsPc<; zvI`jsI=a(Npc3UUC&j!^@gp-M)JSyr0mHhz72D-a2HNB(@`9@jlAn7@?dyI`-7#V zmoZ!-4weY0kY`m%g{F%gLz6ilnMgUtEHjRKg{-T`Y80R6r0))!5b4Q0-Qz zC*2>&8!4)(y{ji->Qd}!yYUUB6wRT6ogE!6@pyr>+z7IQZdImuAZBI{Bl@{cG+XEr zP5hMA@b3*L4gcP7((u!8f~m=Gj`uW8&No&ftD~@63;mTBtb(l$)m7g-Ed=x9QFlBk zZtI?;ySaPWchbc#+sLN9%*#Vt6nWSim6fT|Zl~?kgDElPH2DBQr+9_-DcE{wlm~{c zi3jv<#gxl%td4uR7Rnj*GBrFix>Kaz!ijs9YfP|O@18X^y$nbmyI~-ew5dt2LTp@B zRg;|U;9Q?yby-HLM;R(YAFx2nhE5FsfNfP>=ELCHAjB}upBBBlS?hvys1r5K9uKHM z2{UykL>Fjbu2y9$<+Co2q)=(SNb{4n)d(!^o_|(RFulUw)s4fY1IKR?Z}rnq0v9#b z6!~0uK+qY{34NG+aPe^Ku+Tn57?(s;=koOzQWnL620k_{wV*_%Jp@v?M2VvKq%bO7 zM7ki!MU?32E0v+I%fsGCKzlNt+T2hO(VgA?Ff2+6h0J7Ze&^6K+FDwb*ifZ0#6^>v ztRWQLcRwqgq(my2+^rkxwGf&PqUmDi?k4Y z`PI|2#HwkcJ1#<-3lLEeb{dkcnU)>-u&Qb}d5rTya6w`EG5B=AdKSu~V9fo6Opx3{ zibmj6WCU{c zAe`4ok710COZ!~wR%Q+loj}DKK}I;tS1)_JA~C0qW~~R`q?AJ>sNF_lrs{=^>5agK zf`~9N$Euy0nObAq2-;p{uG5|EQ=o#K^BGN^O34m+^K%hGZAlCJkj3x{Ok-up+nDB2E)hLM zkOMkdF+aP*>j-BGt=mzj?IoNguxaDMyV5H}(-N{n7`p=Hv`7(pEtjdU@Dy#NLX1xb zyLNf1x0XMJBE7VmbDBujwq1IehR$HszJe4j$qpo{fz=bT>fzLoGFi^OmWEfmd&urKEA7r{yaa1QrkRC8 zD-{nkEv;inbzfLr7!qXl&)A8ie@|m1{d*cC=^u@)n!?rP8`U}F-@xwBubtTuF(u2Y z36XuD%p&J(YY%;zjbK?cO^Hdg{THpRzhWm`^sb`XxCh%-#8jZff`xuOfNImiG+Lb| z)@qe%HN07WTvhEfQw9SsnB7wmOFtn-Rb;xQM0vd{Lsz?frhb5?G@+>+CVEvWv|QWp zu*rFag=L^)NLPzZZkKphXI8SGJnRpN64@r^z4TWhBH5|Opxl8 zdJw8ecBM6jqrqyZPLjqskw>~&tEA+Jyv*aOJ=vD&3t6jFJBqCl-P+5rn23dSh-Sby zgeWt3DR%#>Nni;ksOSS_c1-t)*|S%M#5h8}jm>n*YqVyY{(ti{TEtj?_|q{O3;69% z$7n3z>rcyQM1(C1DM<$&o`y}<$y8V?_bJ}Jc$S=p=OtHEdZ!kxp2*P_htZ)X|B0sY zi+1u0G;~|hD13TQ%v3}T?Orb%T-!;nRX{ml2?TDHng{c0GpfD1Ur?koQ1jD2ph?wEp<2=E@{csM)^y;NK?AilE!*b z7!cKE%6?I%Cq#jz$g|OaXho(jVT+BXN6j%G{;{^eE+G?IAE*l=MfK4NTRSv7SS5To z8m5xr9-eBI-#8keGNeYdV}o!?nTMrz#0!>WkD-6^8`JV;!&3-+(lLg(rC@(hJTj*A zv(=h5Sd#Fe+s_u z5WQLi`1Cp!@ZohV;PE*)ed@6t|^DUCCos%ik=ZO!8 zTdQ@#JefEXAyukuS@tqa7yES})70w2;{esdwe{KOqWs;tmK} zIg}yo7c`sP;Pf~iP{9af^}~}GkfYrv_GVWgmf9&vK8LVfoPbPYv=Dlk4_=~VLq^YV zXC{C6<&HFMvsUj9G*n#O+Ts18&9KPq9Z@VaEQd*H5*;t?_)P$Z#nnzL{ zRbFs@fJoCIMUMe+NvFl){^`3iV3E_?``8bc43kMBoXFfI*-{}RQ#s}%LX#Xera#sL zYLpCpI@wCyM?JvAbR(Ud@;H(X$eN@;+<>q+@_TiNj53n}vm98P(_LNtMVoP1n} zGI}*kuU`k7NiDO=Cp(BwarKJ#k zQPO3khJ0s42Tdqhvdz^7tF%~EehRXKdTWci<>v-L5k!O=AN@hq%oc`MnNxteWL&=9YYV>#MzxNQXU*>{gvN(td(E%PJ&VhhNGE8~K_{6>JkVF1WW@DCBwB z9`S;VQQ^e7c77#FbaQ!w<3hE=a%DfJx3wg*&108UDW;)AZSmd^)eX~JwDX_APBp3N zLfaR*Tjn%X^iygb)*wnk+xCT_ovH{XB_AjvQ&YBhsKsiRiU_vkPZ^G~?_LMXzIz=k zJGl-Ut2cIorMi09+{fOBVB);IN>depMJ?{&)2m!)cAE1ocUf{WMv3tLec1Brwz4Zm zO`+05hhMZ>*}6NIPi}9ESxGk|WwL4ynC&vX4=<6!GASeEm3<71Qxq7z; z)9HT|N9~YBRW-9TQi@OWimG;rq&WL&a?uG?%skZX-7lhDL)vyz%6#5NF+6rtO801i zghszDC@D2cJoWUT#6%fs@wgYQ7H-2ke!5F^>@p_i*iCK*Y0+{${P8w-V>r@wUu>Ie zN4mK_OEjT~Y9{(;j7Ca)HySDN-Dsr5V>BW{IIkqh!m4h0Zvw0o%_c$N;?UKt>7V(D zgok-2s`SJd(`;vjt%rmtyz9+^l%=*?fxAq-(VAnh#E6fz zZJk0ZUv=3BC96o)V|!+Ya+gBsYR=2WZf8TYG&{dHrG!t$BJg+vlg4m08}Jtrv7HW*CoPlEItl!t@6F-Lu*_48`4(!i=sI zwq*5+Sai`@-DR7G%xr~Q&~^^hatbLZ@lCrIZL$|87C5csH zWj?aLmSr`mb2+6o-QrnYqtO36^+ju6yv#{0eO5+((MD4_&~8@ZE>q9mOsxb z)I>pLey}>NQ!xcQ)3eetCgEvKyoSbI8uYGywKELmB`N9hv8Q#3nK|*MP;9k-%ntA6 z1T0e>xw9pHK}L0Bb=5AZ;mN?X1`v_T6+ZA{(;`Qh|A`)Wu_Gl{*<;5qyj(<)0ga=o z!^;IL_V-HD1BR9(tMSAhye3P~jUHOW3yz$t1~lHww`zz;>k&y;N?L|g#s2J@8PTZ- zxAZ5{fRB`_wpGc)(r~t9<`7}FASm1=3Sq&FoeRNeDPbA_RGD4GqZ}xd034xyi}tia{F}H$_B7_|yWS=Dv=FaRXE=O3L|AF4Wo?nwC<(6BGY+ zJK=(x`*fio`>CurMh2DBJoPvmX`IOk(w22F>!_L*^y31gsl^xzmqRsAZTCq|;pu9_ zm0G5v2yMQXYjTy|s=Z#&TUw#5FIFKoBVp`O3g$H(Oc|Xh?1tVSdU=YQ(%VXRqwWvA zhByFZH`(^0I$e&c@cjk`MZ!lu%{HZFP){Abmr(oIH!U@NseZp`Nu$|z#E;h6Ek*2GpDDmQOfAr=8dgXZiRK#-^ng=dYn}#gD#|5RQBe-MSxc#_`0_hR(!hcFg{JNbYN=<31S|>^&+FKGPY&{gs*6P&G zBv1a*TRaFK$KX`}tQdvkG8`K-t#=hI;!7)UfetOztQ64$(ycQzH0(nHFqYfHJ_!#m z&nX_J>S>8n6v$)?E0&uvY9cD(>H*p99k}4FBTmGV5uH3M%$?+(ytF_f8z$xvFPJn% zwKewifz>$E9_G=xJ&M&hLp6jiZp*@?7+uz~lpXigqV%ds)(z-asA347ZONBbu26S% zx_Y9^t?-&rzEF6O8f}O@1=U z7x2L-U%)HMqhPOIzL1*!ct-2+eATP-p?;0Na%BLmz0LD5s(iF$1oku(6uQys%Jlz= z`t>Vzip$bH+mj;|KL*1tLkqQ>o!n16uMq&vVm-|-8?-`HAfLd3mzy$LoFR`3z{^d7 z-3sZ#yQhetLHA61K<)4j1ZbO(K41zhCaKcz<=Qg3alQE-KX^$^r;>m@dYjAFP$Op4 zRI9$8g2qTEqM-JA6&gxU)}xuT!HMo7N0s(PNpe`E-?sHoV2*L(MZ;ierAGPKA#~Kw zTiri*u;?0s;dV7_Dz1h=7xP1o2^ytgOjmt0v@9_tCNK7*?QA*H0#U~5{rFpA#K$SA zxdRVNicgbFclp80r_T=MyIswa2*41N{yf+s_^Xo#)2@+Yki>EC)-+O@_med#uUPN7xW&qFsaM z1ON+0sSo4~!1DCEG;&~-^)b+5p#aLBbDTMz%7Rk zo?@7Xxnpv2`0BNXkA2kaC*L0-gkmssnVuoRMn@!psK*j%$|`0Fdh1X z<>)Uy*wEh1r}zKCm|Sv91ihs{W{KBp5 z@@!y)SpCTZ=oOhMT~g%RSW-oJI5lV|k6DG$y7G#8#biobT4^|nIk>8X=>S4cHMeqx zXK18~y{=)=`E&RdRCgw zOgk@?S3K0HtUz@u{ea#vob-C+I|O3UGn-LEVK11KGkHkt1)VawVay)mku}<*ZtdF> zPiH2LW?8la6;pa3sw%f|u$o4eR&=A5F^8g9{!`tSgj z(=X2KZ=QyWS%A@>o<^#Sf%0#jMrMWMG@~p}WAb*vZ=Ob%H)Frf@-$e|-b*<6cp53% zl(s#5aeEqLN^5<}$I}Rnhz#vdMpN_`ljT(~n;b_=Y71_IA_`}iYHuNKfu^bJhPVtPNj>Zr!A8GRCXaMRix^>@{ zqmj(=6hByTTB#FE{ctp#kaBxpW5~7%2b3n%4bwG=>~% ztq)YrX)(srb25&GGy~Z?FFK9}%@ysIucINZHIvj|M?=HL6xi!%On0vBq{ebIGPBYw z{b#I2OMEM}YD;`8wQ5VerPhSi;TK$qynV4=vJnHJQ!bdAWyyR&r>M)IA)pXMz zE1paCDoay6{;RCUuhB)AMR_r`>?9o#B{TU>RG|k< zNg=D{Bt;NYfn|B}2zHI~)iCtb<$WEFuvUsaJ+ZTL^siei?cLKo(QRI+lP1WR9HBz; z>1fl-6dgT39lQk;iw?N?(DLn|DLKL}ZrFl)Zo!MlS`mt{dR6AVdX``iR3|}&KIJ2KkZ@5h}3e}H2sqvyrALY#hC!{K%EqK8E@rao~@?ob#}|D#ITpfa4RO0c4>0i(nQ=V?DCaD%Au&XHXAcrJHExhPPn za=*^679BQ8Qa%oT5v= z^n!B|Mikl2ZzAel;n~HiTz;PpWxK7|@mPa7+gHP9vQ+;oBk~J&8g>@6V#;Xtbf~z) zJ(v+5N7*7NU}Z?-?NCYd49oG+XEO`47j@XEt18o9G^D!Q>UdsjZC7Pj<&(P^lr5z9=ggQj)5a(C ztha_1R&XoyOdi{%R0v`S7{c15jDSQ8k%(&YtQ%SAt_;eilm^sCbaw>e{lSVpBnUn| zCjk>v6{o?~fJ#+jW|OUI9HZ;z8*eKZ&3DOFpfhwHQt?1BtuNN%+qDSEE}hZ*TdqSxGDCC-*E=5);%G1&jGwEj zw8w{p`2s#9%op%TmH=3g$L(aD8yOVQV(1z0Us)4k zq;+}yeqPG2+6p7@V^cOY=)>TEvO7E0GqHcT#FT%{%N(UTg+7J@&#ah{3!GgXEM|@(9dh7$v*(NH^=j7(qNoyA}I%Y2AFa}0L%-p8? ze>av)kHn}}MOJK8jzO!bEmjgwYl>deuTjWe608qsWI83x@v(FQ1aXQygV^+{prdof zK78+tmi5NYi|brDTn)Vdk7l$j2wqq9G#qSaZhM8T5WCq~c23BVlS({0d|Ga6fK z?&AZ_fSnnAZclGp*BCCZXr9rHT}`(%8`0-?Wkgl?5_)|>ic#9@X^|S5AK<1#W7)sr z3S=kESIV=y5H%v&%uO06#3Pv+$xZqsXmHOFAbM6cwH+(2To5_Dc2Tcgaper-?_{cN zX+e&#pml-122~qfJglV0ZtV+`ZDGj3PgxL?&X1W(3;3A1w17Qx38g-_Q&YDd5=J=a zORtvIGyTjh9TB2QeV1*6&2~i-jI!q*D13=K4#)bx5bya#`xN08S`{9Z`h}}CsbV$r zb04+vhFat2i40ST;Q?L5+S4j_X2h>XY6P^jB;J)(obkyXAD4H|_%N-g_to2% zI#h!%IF_f|7@3hl{2stX&VthK(!kFI$6)^eiS$I|GgYaf26u zdhCE|!kjtxUw2%2Q1tA0$V5UIx1xK_f z;2#%jm6S@l$CqNs!VASV*U& zREz4I-Vj(TK<-JC4 zc<`JdTGW-2>f?u_jUFmAN zg!XcgH*k$WlWLk5`*|qIHbq&&umcCb=#z^W)yj9JYZ~VC7FdNj3MT!%hqE2#jGBOSw@z(X$6lu2Bq_hQVO zrq%T6Am;s=W+=77r^9t24H`r!W(%pA+|H5qsfAc2gpq~edYCSSyR>i)1=cjXF{6!9 z`4dHyiCsUN*arA)VjEyLu_3z_Gqk;y3bf6jqAwrq>^0$BE(x2WCYC2Y3tO39l&RwN zm_JJvSQ;}a=Svr6l{q2n3;~|;D-{-Qv~{=tTWxN`7v%M0Xbad3;s9Cj&bM;BmPu zq{T%^MbrUxW~OCFJi(4ym?Wm9Np=xsz;sT{_o^(KT0lfoa zeZ~{31vJY;ho$wUeQ6e|FvA-sBiUIIobmQE(_zM}SdA$CiZ~YpL#l;=u-;HIU>zZ{ zJ6N!!;n^dtvY~KG%P~f|p$buRgl&;Q1Iy4^E~$L>@?pxAwj_Savb27dX9xH!&kk_P zv&x>kRP0>-Qpcj_z!Z2yS0Z!fhZk{wHC;PV;RqRG-pW;#h*vWRHdT5M}3ZXS569K`iLGXt4cN9vmuT22W*)_ zCrjD(f<5|o`u?7!rE0Xg-TLEdMN3#JW_V`iG*-AF=jnbvk9hQG3Z^?(3tLKc`|2Or z)f*RgF%|bj9Y(As$}~M1FH5#9rM+0`hx>{-YiERao-GM?mBrc016t*XSe5Cy+G~C& z)2y>W8&PtztazGiriIZUA_egi8@NIlp(-QU{a~f3!A9&`<5QRQ$j*Eu{#-q!y8+?$ z9$ZJ+H<1iEuMou;p<#I<7uU$P()rHLc58|tvRlWVU>bIYr!;}>j%%Vr9rM`=kvN*jstu+Gts-6wau>zs(uJY|)3$_(0~ z@lZ78C`_5)`FI%ZI#Ty0Rqvkc&PbAqPY@xy>k%(VT6S8mV|wi=&@PuIPOY*Rv(#Hc z&KTQSF>~?!83d)5d6#$fP!Ep|a7fWdS3>!MWH8o3@emwLFy&^ac1u;;OHct11_CIXI-oaTd}Iz_D2^&H!RqL3*{6a4_Q$>sDM zu&p%bm%D4FxO!%)YuA*5Pqoz?mP2i}x6&Evf+|t9eA`e%V?7UMFAXEK3|hlY&36M6 zX;$`ITN+f|(V@nWcu|8T+M9eyA7P`vpO%&Wxr16pcR6>8*sfr%xX;!!@26q0ZRaFs z(9^;~q;kk0q0)`BZrq^ZRlyG? zCA1mY+z)205j1749yW+qbgIZIG>%K`(>w(C^`w*zpOc34-QPN6*VKhG_)E{&HEX7L z9z_l*ll3kpyRz4rS;|lgB6aUK%1Pwh!LtsQ)iOEmO#mqy&zz&EfS-y%4E{;h7~qqv zF~CdKsG0LQK}X9P@9knWx42Of^`+OOqd={p$($d6myK>d(Qnbq(hU(n5?bFM`d_z` zU2wpSWNE6Mq0!QeAUeiSm!I7V-0POZo2lnP^B2|Rq@JlRdrglA>WpJ8`ZpY zdxjD%FbJZ;)K10LD%_`{EYk1QIpKKcqgz~;xvN1V8YtVjPy~cFWi|C@;6c?4FZDKC zO*0F$kP;-LLJvsT^fp_=4j2Ln9nBFxU}9)4GvxJvB@y%$hrcU3)M`3H^4+sh?&fWK zT4P>8sjQ+;O!;=cMo(o+Q)08lwIhn=>RpeVtac>bBZPQ{5okw2%d)y3DA5N?F>%xOwr#AmQD6dE16!dZN0rpmL0g3-GL2&E19?Wcn8zbo zTr=%Wr`A+IWh+9zXF@IXdnVLEkMeN0>^E)FGmc6nd>MJn-0!a+c6Fu%*gm22OR7*q z3RPQkM;O!ISaX*pY6%~GxEjB1E4_LYC1V6er|)7$Dl1Gor+Me`DmrvIg5A$}g8|1- zwpTX}D8rk8N1QOdLzFV3EU}+=UhSQYHurkE&jt(~k(zwQzb%kpdb#+=<*h|^_eU<^ z2d~MKP}H@0S-&5+d?vVXbUJtY*$-S6Gfl?bI{ih{s>KQQO+OznjgggY`4A~E(zre2 z+jI?m+NjSYK8NR{5;_L2#)T+-nB@!gvXtqih!-6_G0QOaQ`M0yPZKS6)+-ph=Fnth zgr_`0<55nkyL9A}!;mQNMQS;z(V4oXt~naee87Z?6{R&EW;C}Dk`JhlQVtyOD^;77?I|wo6O@g;*pc@;%dxQgDs682_5fQ4K~E2ni0?& z5xQ`Qs03s_HNO7zr%let_Lz6w!PR+eU0L^!shEiq_vh*+W1tuFqPo+K(qZzzW!V6_j^m zqPC*lK%6@F%pI=>64MCLl{b<>ZCVD=UO>b ztdpQ%y3RbC#7E8}J4yrjkgX=?LFPnoZz_W;gWZvZgNZ=eJ4^e`WHiG^PWEHvH13wo zK){d7L8GA`p6H}EBGSvesfahd5sk1>G_;SLMbN3OQvQ)!m`rHG(yKmFAsw!YcJ=;D zWN1(sOx8!rqBAXN+WbglU?hW0jV3nBIPi=R?G1RCqh2zaoxV4rF81ig4O>Y(LqHh)? zLcNQoZf85vg`;rlzOx%zLoF;nDt@sq!*%hNK%GEM~xHGUB{O&kWQaZEpW5%Te{IwlTn` z+QtBn+Qv+ZSlp7>I*aXI>$ot4#dIWP8AYhh*t)zE333G4Z03sVDS%|GL8pa``dygu zt9G(0Ey|lfiVv0bM<2aIGB**L2TXqMFiqzzl83q+;L8({(E4 z9r>;dSR$ujex@9BF-oUZ^y6;NNEh`$wc>QNvk+RN3qpHThZplrh9UAWz*roC*O3-| zq7oLPTCEwW^{(u$sBUxQ+0u2|ZR!+N1{*z<%y`7X=4#cE0iw|kCWEZkq3@++V$GKjb={7~U=%WB^EFi2o zBiRoaB_&}aZE`?{OU&@YHxo)@8#A;+-#sOJSS@h`d<G3^A{)b-Ot)W@oV$Y~Gy}FAjSst~NeKwjd!muGN zzi2ox$Gob)sApvqyqAW+gVLsBBJ&x&FsM&{*?NsddG}4YpdfR4T_>RMsMPL1gH8Q3hQ!Y>aM3AWSK% zSP4<6NQt61?G!SiU&K&ku^>Z7*QhZ0UNr|@zo-o5jM`jN$ z`2AeQU$;|t59!I%X1w|eSv<|dDV=)qoYUM8>5l|1FDbZWNTz`AGSd>1poiB1rCd}k zD&9C`(gEIw+$7n?hMJgUyx^q8#PLy~oiZv@@(7#T<%ULL=8NbVG(Xdq5i!$ew?OA` z6kHjs$)s+urM#?1KkWg_6Vvw;V{8U%!%TpmPQn9@$aRVDNigx{*NkW@tr<9Z7WQhP zC|yLB1%}L={B#~y#nR`??djBbJ6VT#CnHIkMg}!kdUInl%fM&6&4HF= zgIU47MiaJ`Akw)Wl4&?R~y~T=J)Zg)MO>6NbWCK;P4*mom4GWni8HaetOuZX~N88 z%{7_X1_?EzdPeS^501o=V3oRI>Jq{O4ntrjvhET}To#r>hId%;^(Z*gw^7K}ecbDGTiCR$ z_)cETgxDe`S@JyLjNw~(m)dTk*AaDYlZN}dA0mzH=w~W|GAI>l!@I_dS+>L|?44@~ zn6F_Nldo|_HwhJ4LK)eL7IaZtYqi2pd7~Vo$q^fJ5mr{9$qiLeK z3$edyE5C*}n^99|d{`Tq0#gw)e-+CcmhejRtkZ?k&`b*j8i!BOkQq=hH|^vgtc9$a zF{lU2jtVBaPYlwQXw9sNH(rn)Aswuv?bTN1D%E0dQd1-mPRX8FXN{hp2I(2*W!!v- zA9lqE82h9k@q&c5N|0c`Xu1~w_R|;p!>$O)%1&v`7n>dw5W0KZ2b}KCnM%lyr6epy z$iMYPjZH~&CHeRuY(c8mNV$G=$wyf@M@?SvfP!j>BC_)-Xp|827I^jZ()V!CDm2lEa#)7mE)p_fW_nS_bgN-^YW zOqP~#JXFw@EX))s)s0vo$WT-~i^dC7OMlG4YGQ1{ZC|G{)u=EUx4@{b!>utC`v($A zgKeAXp*n+QAXHey&zKR|?_3Vp?_3Vp>vE`b4`^}i9+wf#oK9t}tnCer`&Q}WgvQhI zJZdWxv^k$an&Rp^bQP)pQn~yW?Bo}{(#6dT{q(+8N`z~)n(o4hU+soTh#{s<(2KGHaYhIlPI zBmSaAv-0Jwyl6)zH{bgMbyXxI^>c0JL}}^0*XqSlHB6(W^UaLX!lYTP-q>Xf2mxcY zxYQ#sJoHfUsLU0mq?~ZH-rl$o9@upga&D6$0s>6oL9=Uye?Ti_s z-dtA&0iPyqpFj-~?&U0Z^qONNF^t>p84$P-#Hd(P@l)9hIPk4a4Gw&3Q-cFbf;;w2V2de({(yr3rC z6!BDB1~hgmvMDc^!cp-!RInd)IjNeNDLO-;*6>$v6G`Y06;cH=ZChA3Vv{_9u^nI~bMmDPInXxjcbd?CHhxV11mzPkenY!mw z*OdV`o6#ogo7uwzFt>@O5IpHfjTyT^?Z9q7n-nsI7^f{BY)uY9Yj1bMG!;_lN~1mN z(jq(>b8bHvP;)p1tqx`_UNiLStTRh7%kI`m@&>gEGb>a#%e=B<1U0y3w#g;M8TZ>3 z?Q$?ym^)mIQ5(xZdNs<(T}pMC2~TaxUVIKIw-8u~((_@DJ2K$H$jI&on;~hM(ucL~ z+Fs&H5I-|LT9F;!y^ieo?sa6x@j9B*NFx_8(ui`Y=Vno0=2?94at~kA7uoz2m8~d- zb)xHO7bSEOkQn)b@4mvX+sVeXSIq2{b;kFXT~RWE&)v4metwC!C}ti~en8S;?u_F} zh*!6#K|PLShWPj)Xm#P|%9u)|iP|?X%IcBXTv)jx&D-kZ#V1}X+75N{iU?(6l-;~{ zQdFhe!%NcWkN+2^$*~4<-O%jxKwg3Kek1VvbvJ zupwev(Ryp}j8taMh!*WU>k{rgzi2&B8B3N_$H6k9$dO__n93aCxe_dt#YL>uTl+A| zOB=zV>wm4ZgJ+H$Xic!AwCqT;p&R-_%EioU;p+zs@BG= zkxj^xd{t=;CVK>am%L)zj7#&V&Yj{=Se2wHw!c0%O{uE8@yxm-WwbwHS3agB>&b3* zEyn>ORhp)v*4V{Xg(n+cxfMBP$bv|^i1)Htn06ZP2|aCZucoE#L==CG*|v<<*g+o)zQ!EN^7WC_v0hrV!1^b ztfNkWgoP-Y0Uzo+W#wqCwtU&>Wjz{`BK`3?G_5pO)MGsIG6{?5l98ZU#sgPIP(lJo z?fsDZWy9PrRvIlZnP+Y&><8Z6rvH~sr^hp zS?DaUDouXuFa#>oSi`FFYYa_<=dfke-rH^#oeD1()2A_I=46*O5FQZ+Op1h8*;0I2 zU`1>7q_kgfdfBClXG%ORxQg=ti*i6CCa)BjO?K zHKVz+y@l$=+!YtIvi5do#%`K7tB8cgJ3z^-wRNO4M2s6s4Rc8VkMhov1Pa4b$TbKk z!$K6MJ&-shvwSNc+YF~`VVUSp$$U|pfB5648PVo%fBZBf+Wh+ClNgm=*&NlG&@9)C zjOj1AN&}8ByLOpf(CgQ>dsj26Iyu7``nlDGk)1^f{XbPuf7wQByq#g}^kYCQE69M& z82J6TqDHwBJ|n*lhzhEDw;VSGr6tlKm3}-0WakXa3F@hmAWJn;bQ)j|nC8)19wi?j zX7sx;?FDOkF#{@R=wY~ejmWO0aSwn6rKiUrvwTG!xYWG0)FRb)dIFbPMo5>iQSpG7 zqjJcba#oIzsk^u|@_=MD*#b`<(0#RboA4;5>j1==BUswNYy4Imu`#=NspXZxazZ4rZ8ZdiJ!@c($v23yJ%|P z_+2!$kA9S9ua`%y%6Ez{?Nl#DPMV7Z8qBhIsuPn7afi3-YL+=utfI4H8XKCw^EiIp zR(7RjVGSD{y62v)q}4OeSp5`B!=qhh*6+&!4fhz?!4$;15|s{CgdJ2iyZ6uXd z*;SGwnKX6J(kx1+InG2zlZIflGMXcAMZJjpF6u?(BkD!&daES%K)q>z{hH>ovdk~NV5Xte zD1`9b^+~ncjDk_R?9`1fyJ4-h^YchGo2 z3onJj(vP4`0v^*$u%Zxrn_y-fybl)-}RE6Uay&{(g_4j!i*5XvSgr{>$EJl=z~axs%M9-*U}ZFRan%|mnRW!7V=_`@f6M=ZZw&{G|H=mO)u6^U{PR8FzfC(Q%#kzV?}c;BzvX?pEj zXU4g#j3}?3=Fw_pE1!qcqtz?Svby>Khs7uEBhYuiiS$#4>UhAagxYWc^8srHRcN{D zE~dogWi35~c#n8NdbPCIBhMF?*Nk;=tK?P&vrAe)Q){=Gwy65$7p8lxvqc{v9b67_ z()R9^IAE7f9=TcefI~x5m4GuI(AjRolc1fI!?qU8?306`OwBRExi2`0*cnl(XUjs0 z6|4H#rARuaLSV98j=8ZlUoYBbu={g3q?qcIj#{2RGfycSZP4-~_d5#4)eTzZ!}{_x zAH~HseV7|Ez3HuLp?sR$JmJAkxbPWHFP>nhiH+c6pDjBkWOwEi?Jj1dWOP_q&EpA4 zYnI0dTTGD!ohB@G*;N-(%vaWb@-Y+_@$8h)7-0a4#|T;HOb#6+#G77LC|_%0{yR-+ z;Tut#cAe89nFuw9&oYZ1S2<)kAT^)iWhI z+oAS8;zaeZf>+42PYxUIz-S?k8o+ll`LEf?u12WRvrOttT?@r1QSn1EbbkLc$_zLy zsU;T4^z7Z!wZM@HZ_72Pe!%R8vIAjxswdgmZ8HIqQ95i4@lJ^hIFdqLsT$kR@S+w0 zk7%rn%HpT%$PFmFf(JyT!P13$p7((0>A0+BA5b!EHPmjuCN5(-Mo&;2Z=p+Ey6aVI|pNl`pnb zOHyA5YGLN%cMOpn^e*%W6fPn;qH{(nG5aBeqk|=2lw(5?Gj=DuxmQ7)&ZZN4rW!Hb zb|y!!T!|#5Ox>_Yzj7viv}0y{Zm5Pk3Rkt~!U#!iP?&y`{G*$Gll-HbZplBB zSRH{W*rG>Bl_iYhS&f-=JR2f=8Jg(JdQI0#$8NKS0gs1WJ_$oT=3|lVM(P%4QZFb;w^vNpZ1k zJu;+5fjX_Ia>TuiG6GELEdKz@EPQ^7k8ZFC%?Xr-`sR=42r09@X=R0S^Y(KX{Ef-FRCq( z2$kZa1k^3Tk+opyYd59}Y)-Uh^qE{FYGU0PBeQkD)p#0^Qv?xp!-#m*(@P>Ino}7@ zsPubvd5{HE_WL>w%>26IC%81_q)B}*m#WZ`T=m7ERX2bJY(1-<;ge;F3|~c8f%Qz7 zo~#m{nJE&QGvH2qwUr29YLV5{>P^@J5I$4?s%5DG%CuK#_b|D4sS|LYmSb1iGE{>h zD_n;zQ1t>3Z)acdp3ucwqL=?;eM!-7&7sZ!e z6OgTNd`3X4e(4pB43CP|r)uk0o1+wyw8@9<)u>QyjF!GXpyy1Mp`UIMzdF4~3v;F| zK46P;k#<_oo?SD*z-F?-0V7(Ho>*0+0m~&_(!tnosjtxVj3}V!kHD8GGDUL|2l|Xc zi)o^mx32npn_&uO%t*ZM16rhwmlN3!h92#A31*xvU2ODIW@oA% z(nf(z763@Mc8;j;06jjLOE4EuGe+FaV{MU*zMPyv7a^VdKB{AU;a zxX_P(e%$EmLSO%U{qxU%zU*)sv%lR_Ev!R(Ics&MbGZ+!iqe_#LogInbR43seuYVZGkTYvdsJ=V;D1sS`494yeY z+)B(3SPU7<2tJTLW?9ywd7zt*fNIWrxrawqs%z?n)!Oep!UrTK75r(X!yH~y<_q(1 z@1}K5oNT^uu-IXdG>e-5ep`Owu!*3JRSzQ{nC(>cl+_D6%OlLZbTOEPbysG0_X9EV zo|Jp$1DP`%Dl7TI=o}Ek`M6wTAT50V^8DhF6(}nabg%bjvx&i2(N*CWL;#bXGd`RL zx_Nqe=|z!bP5Y$uz^1VSjp@E&8o9(9`9QGCYAoCELb%VkQuVBiiD`sdD04x$rQl`n)5%&?~Fh8i&7*@(4E zC#>`&rOxY*@FSG1D4;_9uFMGwvl_;`XIqgz>8#UOedDk)z}h5BVE>};JfbkJ=^3$V zkK%V9p|UDQIr6d&-+e@|D0y|IKQJw;SUrt+VW(AgD4=<0siCS8k$7pt!~q!MODhWz zq0xNo;ATaPY;fDRun2HK4}KU9R}>i(+60{}v$H${RlHFdx?(bHyl+OwXk4Me%_K#3 z6v?<*0%nd%_=9FJ?wx7RrP&oeWg?%Ch@>pdyYuTCM@3H?eH5Jd`nF>{fT(U0ne+8+ z2Vu+#4~7DLp|tRtHpk?NV9g+MsPqd(8dw(U)`iA{7GeX}g@nu11-k1-R)b{cu&xW6 z;mBsO^LI?gwEp_%uMcNqT0j2z@!@Jr>+7Gd|J*jev?km`%=)rpW-|F@N6}65#iPuv zst_~JO$RjI&8DWNuFc9PeHwDw{Bq9!%q|+QQb}IaT-JgHu0ibdzE3gRY!9GEIBr-S zj*vB~=Yc7M!5qeii8(2c@azXNCig!Ie;~>_3(;NgN;LPGJ*an2x6Z1@T;H;f$Y7L> znhyT=8~EbEva*img!(iJepq+O5s@D98;(T;QZz6g=rDLCXod$yt{)RM!3S0{3K{Ht zK4?0dh2A6a!U&C3q5S=zvW8`Z@-j&{G7wBV|MU+d;U4nniEvn+tXoxXBNab?$8nfFY;#oHg10!bcquyiTEg*ru*9}NZ`P6(cxRskq`&&P+MLME8RQ~miqj5g_ zEz)P!vUhiMl*3KyX>cr}#=60&8%?#3IIzADqKGLQkBAcynPQNMN%}v=bz#pi{g0rZs2QtjEhUWfWZgLLUj}38l+qf0S;<=bL%0gJ1{9S1= z-k#m@?8sr8xz9vvAr4Mtu;dn4Yw_KOd28fm?!7)leSPBq+Th~`=Iw7Rt+Jj@1NhQu z_qJV|52f`$BTDn`Kol}kg34b?XgJ}miVteNzCP|i<=jE4h{`VBR!10{3WaPgQ?$3Y z0-d&Qk|M5q-qGVofeK9AaELYs6k{(;T4!62cQQkUFb9p*#!ct*zBAEj|4bx>lN-?S~ za%Sp8f#`|SX~IsFMoXhr+@!q%9xb!Jk!6)}IG1-)q4LZELH&-C5#-NKMvy-{89_cy zM)qytY`g;d?;Wk7Z;qp<>gziWS`#1Z6mU zRB~*c-nAg!6=4~HrroDFBBs-$XGZ-7EgP1P0UwYFv!KDL2U@xsP+2@sV`K6HKUlMw ztR6C7C{!n!8%Et%C*7mbc|D82`*4kQIIcVM z)pQa1xa;(?h>A+h%$=JEm4}79cTYaBT`8kySUqq`Rnszm9y=TLH(WI_FC@dHX*fS% zD~V}AAFxK1riiE)ZkEd;Bld8Hx^fs}O!`J9BV~G%v;eS2*rpnESXl86R^Wk1XpDFG z;z3gf+1Db#_y3!x<_=#NUIT=lAg4HPkyV*-p&Yq|vpwz$y_+&bA%4q@$ndink>O`E zBExM)XqL7X1E|d`z%tfrC^(9{FKMz<Zh zSe4oSRK+@T8$6_fI710*Um?(>P&@pqlI#dI_CAIk;c|?tlegJy*Cg)d&&BNz|2O93e_>8Nty5B`ZP$ zTv-Jkqn0^UWplX>JSOPFbYEL zV=!4|KOdy7nhp$0e4zl+P>Sb+Hdith<&BL}oPl;U5%N9fUPUq2hWP2RRHA~&9CTS< z$Ro=VRzRJ|YBtLUT~@Br$my{1i8B{dF;x-9N^&j|+_0rmFOoR}(cL%d7>N=)an`N9Y9Ej@T zt%t46o2VRr7-!rwvL+uUa}F)9f65D{mxo#Q<=C9C z3q+(vzi^YeaGn{~{n2Dq?lay`Lv|fzAiKI8wTAYBF4t^@WHkN4;Z`A{Le71J1BJVN3KV+4mAtz<;FZGr7d zG*qT4h-kE~Xr(N|yr&hG7fJ`LQ<}b3HLZNO2JPCbGX{!^bgv8bRM>bbqHmP+L^w(p z31m>tDE^k4k-@+I`Rm`;W@PZkKR^C`ZAJ#a{`vZM|7}23u2)qCWPaHpM4jcQL1Dk^ z9$?H&Pbm64>2R7Q&N%8Z`qflqTg_DQ=SbyGZ8V3E_hvLijh%)MrI9t67I3{Z zp;3p2Um2lq9_>~WC~LdF?owxrdA5Ii1?O;pnD9H&;?W#25d{M0hw+edE5w-b)MGnx z6PaTa+^=IUb4^Z~H+7XtWj?-T4@Q$0Qz)V=?qZAV%nv9@nBOdA=B%2yiY>C}2}Gcy zy&smaPMfrPuGMs~lN09=(3pl8=BB#8?1;9`fe(BP__8Bnuzwmfr$LLDXj32%XXPQ% z7#Xx``1(4LHVz+$&6xv^T>qo10j{?aiSp?#aFBBmqY2}Few~DylO6)6f5%jm_0JJW zS^pf7l=T&naDM=s%&6AsFS%EoX`;?F>gyXvgbkUtAr?Qqg0b*yPRX9?c8z)GnABar zSUh%H9Zf{smzn6nUMs}p1E=wF)HB^~Rg`I_zZyXMbl^@9FawfiU zRGGs)t6N(&xhOId$nXST>Jb$-N!rKSkkccr>`r#zsLF1ipf2soMJrhu@G|cYs#2Pz z@JZ8cijHQT`|_~nKAhdm@DZW_?FwmXeC)$9SeM;}EIu$9Ydky6UpQu+B`>|OVr#_Z zP(83s;yw_NFLY}UGug}wS;jC75bvJd1D8s*K7!aLR~Y23Jd>j$!%*(6>rwBfVmmUY zZX^LA)zqCY^_i*+to^w2r4jICRrp0_oN6KG{0gifGiJu;F1J)*ibJm7eO=f&QuTQM zUTtME|N4|}#AN>X6mG<1e%-PSD!Tb~CTzB};+8r5vP0v@{PJU124m67K1yoxsSaqO zz3sXK&P^+hy4A!_hyKs)#AC#vO~L0pZ}x>FQ4^VsEPKxIrrs^arLuWoRYY`mIq|@> zCS_4?Vdd0O7_g6?2fB4d56z!SY#Y7hB{}e}q{+k(twt2lCpow*G^ z%2i=9n(9(j>5#)@nil&iPw9{gn zXI^DeD9Ol>r{!fIpmauW?Q$+l0HsW{Z1>CuDbkFi(93+Pl!3A5BfO1e-OgMyo&jj9 z#~?P_SOfFvura&bb!ej|BAw%LA$M5|<8#^u2!--4cY)WcO_p{|v2m@U%vFljW?m># z*)3~V-AI8cE1_=Wv^2!vcRY?X^z~J3q^W=W=f^*fwMbKc{pahSGcEu(fFvwp zbD*nN+^nc)=?e$4!<=b^+v{GUs2Y!Mt9;VkQ)JDQ`P9TOIfy^CQyorPXI4a`^4564 zy4$k?x0=Y0kayV_I96{NgK4ZNka(fBO>My;9_5#hK~u8K(E}?jLE4=M^&>pDh?wQ6 zdEvyU20*>AjihIeU7-OE1!Et`6@9BvEYou{mY$E$(>Hic0nwueD!VOPXqyKn#$!@W ziw9OtpwjwPSU9_jZbhK$fwKk)RRexNOksfYue>e~Z(xwOrO89G>S?y+naR2CYlM}| zgfx%T1*^>5p+VPO0F5d}PEINtd|<+>uz|wECMU})BIf>_YN|JBWfSp0pIl3a0A5&y zh)KsC8%H&=8qozia}MQ`f#gcRdcA;QP0PjD6Mt4M93xmwo)5W20s1D$A5E7}(iv09 zs{jiLY?d?Z@+hE9kT@C^_o@k>X)A!d5j&e*U00R^%0#CPtba=(noRra-@pESBb8*@ zkAHvs`-UpXw6A}^{#{AAQ2Xx>H$P3eLvHisho{+{^hbIKL8pa%WH2}1m2New?!Fxa zOgJV{`!o2t_VLHI>LW%|`(WhMlj`&F^i6%+V-y;0DBnWvqr+%m5-l+N5tBAd8A_FD zo|V>Rl8Z-kkrtdq8P?yO^tBAayraPH7<_J7+GTMDO7Q8P%Yi^QF z-kvF99hwa`jqyNMGGXejdf?W-Jd$`3G?@{m)O)eoOfm4%S02vxj(r{k_jp%TPt;Tj za|#;ZGs7;(OqVZV^v(DWf_0(D6r7LL3$ih>1Hm|Rs}NAI9E&u&kwbGjaKRfHEF(vw z=R(_BQ8&cjF&+;4Da(-#`zgzj4!i5Fs99wuOucy)fZV{+!&WTI!!oTc+S8Hm(*vv- zi0E|qt+psC+kiCSXL;wJ+sO|N3Lo)3>is2IlE@A6}70M%t#VSVQ} zqMIf386uro1Rmb*tsYe%{evdjhKjsQ!AU1nqjCgaDwZcZeXfGbNi{NfJL2Qh6dBf0 zOng#>qh^~PJ}BI6G&ysnH>y(O@V%QK$}i8Fv01y4xQ@P@)8-ZtR(CaHBepOi8`XRu z+oX0=_j9=`0&auQU0{)W>~9|6o1{BvovX zFQkQJ1?ICdCv2FqsLQD6i0roR@$vbDjHKK8mk+E-I0EUHp*rDo!A$h`gIZwEgyD*K zzWqS<#u2RYIfZQBb&Ptubr>8w6CB@t0JK1Q1S0FDQ%)!&Z~4Y|A8?DvVQ2ASf~UwV zI|W`BopyM$)o)u0LXNiSuEM)Rcp*ob=JS3pl{<4p8{9FLGE&{1m|J4EKR%kpib5pK zd0Nr!Y(+JLkvCJ6Sg^uP(uZ`kx*yiGN_VGSO)ltl7pW9*wJ)H@yyfg@)XUgj%C!>D zwHv>=>ylozMR>cVFxSIf3PZ%^TI>&Dg=oc6m8Et|n+ zI2&;jsM$)7(0QG8`Bb|XcsA9@y2ip+wAiuz!w!oHs~jMrem(4e zXeS;`myA;z-Cb@+&>`=Pf*uzmCs#e_*?PQ*oQR?IuyP*eon^M_!+szk!-gfv2X0+; zY(>y7w3Jzf&J&%Q&>mt<0}}2 zE74I$87e2-TZ0!)p{*+saWhn^w?Lus8`3-0Fwri(RvpDcu*Delvn8|0vZ~axC26_8 z`npwVIM8l5T^M`sGb?S{hTCAqiw!7MP^hk(F~Y6eQ2wlwPom3kHESDdk{%R1s`F-* z73w1HM9P_t=$ZED59Z#=4io*kmXQV@W^YmP)P#0U$j;E91M9PCEU!!k%0^Ugy7O#K z!SHx}M zHS;hk2v|l_53~TMH+}ag#8)7i42Ry_=#(XtP1chziAk1~qbENuPg$Z9?T^kZrec`U z@Y1E5n1jx-j+V3uf>U7L%CR%m?{}<{hYIe2b@x#(4f76FM_WF$jS>pzC$7mu9m=Ly z5I^Xo+twB|@zBAPZ-Mp{&pxyRjP39I*ugz)1m%@^@k6a5$*L*Ud)KM3M06(RZd)_P zG{!r_?5=niW6pl9B<#67&B{YTv$?|!=60$0r1jH0pXS~Z8=Bn$Gp5Oia!m7OK3T~k zZUM+bIMuX>vudKsAdD60tMzp-e)bM2EjLoYq|gSTW*isVdvC@e^F&2KF4`^N!c@@_ zbn~)nRojdjk@B;egK19W)o}|JX&=2=9ew* z;tFAc6}Hy+Brs;AovYEb7uI;VnX4b4gA={I{H1R1BNuG zM$wnr{j6a4W)-X48~1Y~X*t;*k+Lgu z|D8-}(fz0DY|;Iv>TJ>ds?Jh63+FRq=N1H}qsx%=6u^+>qHRBtyP zJD+o3W?{0h$oaKE`{%Z*NN3b!DE4vLYeb+Lxx4f61Kf;+b&YIZd$Fx^*uK*DY-gGw zJxd=erJ8hAaz3m;qrmQj^24%tXLH(~5NBfnHPU4GFih@r_?=e4QEuaEb>!(l%-0;O z@$&7oJr8ex;cx|5gDSsI>B6o-uAcveIoJ~p*$ZnT46|79D_GukMCHtM5^=98q-U4TXi8?R#GLCPr&EVEf0~0eQ!1P}8gsTS?Q06hlhZ z*{pHJisnX+=u~@o{Hh@RLp$ZcM0m;Cn@?x`W<1l~5UzMbl+grpTLT}6cAB<{^bK%~ zyD8Pr0^VQFwj+)4?3CH zI<}YE3-q};dq{vo&-Ubm*?pN zjFqJ;!iY}Hfl!~-=m020&-i*%+> z-?B~|jGp8)hw;n=WK7rih^}W!Aurq6FspZE3$foHcK615Y;Q+T_fLZY9pU?QRrx)A zgWJb9?Mm-kI8M%(5Ibw*S?RWU3R|5>KTv~A&LnP`$fgk%{1!*1@3KwDW<^?yHn?j9VPBO`dGRIct1kT^feNSG|wO{X~X~awR=_ zi_i=&SZ!<+Tj62&I461wI=MtEB35MDTE?AonUgW)wvimeJ7{JDbmO-SM(d|hKE>8g zqkM|3XOxejqbI+3pj`fi!~N_qgL&T?nS@7Za@zXTnVN5+Zjh{~+ror8VEePdrlCKM z_#fM-53Om|)F@6VhYqhs8{sa$y2f(Lzr9KV+!03}zTINgIh`MHm?$A^t_o`&4zSy! zE!qA+cDbipT>erX4zEpSEqNyU=Wuu zKaU`4GIr|H^CiO#l9{#YUAf7FuBlAL4bP4sxZ6E{D33@E%uCa_EO;J~)uv&r*6M|b zeG10&**jb&#ZKkjPsl{N55c@=AK77u9!_~iW}o?ZtYqK1SNAm+>NF^NM*yB^c5vGm zGq39gs_vM99>lbamZnY{0-J4(cK5s68#WGxcanD2U13933beB`B*qbp@Ge8`S+-4E z=Smm10GV##s9s6OoMhEpZ4+zjJiW}uYA5KP7s;xl+|rBZMe^Fz=x8>(=&3kc43)WG z*;8RN$}pdsuuJV{qv-Ehg#6Va|8o7m!vjK%(TS1HJSF4Gr-o$FOzoO8yp|Gr9 z{U$b((3+3IIt}cjb}IZs6FL*C;lo6{kYO6HwvV2c9bo}WGk@UD7;Xp9aT^+o!rpaQ z<8D7$bx3Vy?wSOuL+RNWsO>3aC0%(Z0>317Z^)Jh5l)u)-PDBco7C z)l%*lw3~%`tZgjw=`%@9Au;|)HI&7Uc4o_-byY?$dB#;>fwJ~Y&UU>#%%n-11~%49 z_2$%2rL%%jU8+H57&}kbe@nC5*gKjVg*Uy*p{p9=Gw2ELu86Cx#N5nty3)`33uI{~ zM@LNDveHV6+BvhlSCv5D!IAYSP0{E`pnj`9;^?1Eg`^PV6Si2f8*tHWCpmGO-Cs}`h*rtE1psdlxEBJRS+ zvh9heLi7C?sm#5`pKJwJUT@YMFY~z7TrCmA$di7lIs2GiSqE0C^uUSzowF48(mBi` zL8@lt`A|d&5m4~1ddy+;RI*G_w;Xg1W7UpWmUv4?m%@~*;>O2Xn`ln1qG@Ry&Fy$Y zhn<4Dyl=1xD~v1h2}R7x!UG`l7NYw(A#kv2i>C$;s|d}gHP1@y{0FN79)-_#!>ui< zdr+~tVUrhTN>MNL^ma;S?+3}sQQi})CIm(W)CW1*3v9Q(FQoVCb~1m<-SGH% zidT93JjJU#o+(}txF!)AgLUSYG6LJyma|>GeN;v|uoJK?KMsDlHD;TsjQ6eCkqI%U zSg?Kx9{$)?JXZVTd$@PL-05$0Q(0XIa>A=7Bz5 zmK;UDFnw$3%)ZqWj_e96bN2Z27%EF2d5mk~fk;I#cUJQYgF6Zi^7m@&AZu7@r+L*l zSNw0{DeJ0YqhrlZ+L^<2^J>rSR=&y!ZfWJY^3{WO}V%JtJ|o+{Tfnum&Wd?Nwh`{kCo z(-SooqYW@93{V!s&-fzC4Q7SOr@g1P5<&SK{V|^HK%*X4o8k;X0A$6a} zjY&GE8N*k$)%*K4K~+!rqd_B5?$Pr?^MEUe#@o_1<`p4#zE;QDeP1R}@4Vr*){YjO z{_LqA?IX5_PYS}e1UH#ATE*HKs>rH)IP^C%h%uRWrpuoG_atf3Ij{PSU&Y?0MCH{CsZb4!)t#-ojhwPmVEcDB!)XMFka~4vq#=sPp~2nVT8&4DOeFpmJYv* z8Qbmdm~;9_KWx-01v8)MVqHGdN)w(4W;N5SxL>9MWmfo(?ZDf4U&skKGK81GM&?jI zpPK>h@58q1`bf2$BeT99wAD3Au-CHc4o$33JFf#5sj74Qian7DqG6WWKHV$8*nwpp z6?c=GwG2C^9Cdf3nVCwqhU!{1t7kIRgzoqys&ln3lwhIRVVJPZaCQmf)Z#Qs!Y^*I#tKkbq2ihHiVYV|C33VO{>f z-eH!r?NK*$jBH&Hq&}D8>zCXiJ76KUU5(12sgwdscO8Wti`q zo*3y>GbBImHp&Kr7I%Ea$(l_^jt}CtAEIoe^ajb$A3u--}I zHU3Bj$t`bbN_rV$8v-!zrD>Zpm3D&M5f(c~xPUlYP9_)_(x>NsqXA7hJJG7<(oWDC z={eTWMc@{ls?a47B~csSA%(LApCsC18`rbZ{Y;{mLEtH*N!a;Pr|p@keeait%(G2V^WJ!i?iB`M>h4|k z64B$C)Ap<2>JM$i!((TJj$(G~);6kP3zoG#30DN6LokMsJ>X?SbZBSzxW{zhg}wK! z)!84Awy(|cyN;q`^HP?jveVnYAS-gR!>*uaq1(>Z9)7MYHzhpp=G*T&CbH8tk?@)S zeq-xY#h!IM+WIbA3K3Baw7o+!gdC@5nH=xR?o#eV{>u}=A=>7F_tA?7(pSKq+0Uzw zHlEVc+FW{ztG z$VYE0L1Vn)Oh|?y;S;RzYph2E>Wq9k)EARSX6KAOS={iQ6f|lIm%JosK`#$DbWI@) zoHMD}0&ZsMh0+FI6*tSVbmKGfbi?1aR8sKYN{&wdlGvF3C9yI6BsO3{6J!iHk;G4%hm0UFw zB7%xXOqJ{?o^Qd*7Z#^sUG|E$k`ZR5DCU!dNO~FFE8X>8PL8q7=Xq07@|<~&SERNu zH_JYV{2{V{tgvSo;Q2zPHpJ-}7Nxpv(u|tjd%>4#HBWD6Sr&KIl079Gw{-bk%7U*R8xLJ96A>?v6SgV^ySan!H8_ zorQLH!%Sur^VVXe(u}#)&nM||zRF@FSRw0yt9T+J`AjBP*-U^_?*w#}u!A-Oa~JCJ z{1<6>lxMy$cL0N%?(E2FFx{${tlT4k(6%JeM!rw~Zricv`)x^0B(R-Ar&b(2 z-7#6VFQq!gRx7hs$N2b-D#HLte$`S2fYCMKyrQ20$782?@ZJl|S+`r)|2La9_`VtJ zH|$4qU&Z$NAI*Ig+v|Tc_f>3{R-%2UIDXp^k-76(gKir-vTGfvI<)noef?EjMMgW$ z87W{l6pgolI?(rlx2QaZS_oYTRw3#-H z?|kS7?yJEzxBAQ~AIm9N?JsQ}7}dFhw>j_1Kqz(wg}-|ScR*_-_^~Hw5t!gLi}afZ z?d%crO^e;tmFNT3(|)G zO6JHg`AfqhbA$TGmnu-^1u*K}ch;E*^6i!nSmsQ3WboCIZ(cKeVzO_?lTq3!yOp{u z1*3Mn0*c=DEGyN%+~|@qEt_@KFueQJ7;QbXGOfH`yToYZgetHvGr3uGDgj(`xA$6U z%wu}aF&oYu*VjJG^UWwb2_?;C>p1SQg|yB8?eTSc4jdF;t)@YdTs%_>?Rnhx@{eR@xtSvNp001A02m}BC Z000301^_}s0stET0{{R300000002@G{aye7 literal 0 HcmV?d00001 diff --git a/tests/gentropy/data_samples/finngen_credset_summary_sample.tsv.bgz b/tests/gentropy/data_samples/finngen_credset_summary_sample.tsv.bgz new file mode 100644 index 0000000000000000000000000000000000000000..148f0f22fa564b24d7c9319c85a5fa24e6cd27f1 GIT binary patch literal 1338 zcmV-A1;zRwiwFb&00000{{{d;LjnLD1$|acZ`()=Jx~6N1(L&0-CQ?Kf=!dabr-v* zL7J#p1Fap{Zj0StznGDowyD#DA@W&6iEkd?Xt%jt?$Ty>Sgu#;Zkz75<38>-Rg<*G)X*5AtGm(6nbZw0r@|AzFiUf-8-Ea!;7m6riweXte^FZc58Wp%B(CBb;8+vn1Cnj z+zBi}$E*PqLYPV26Levi!6>qUn#YJx7)C&ZiKpdiw^^^s*W1nVcC|}?T+HXEmw0lf z_Gj(BazM(ooTc0;vW^T)po4G}t}R0*P*w#a%bZ)!U=ai%**JI) zIJ&uF29f6^+?-Emi}{NrAoU{31}YQrF=#BP0-AtD#sr-M1EPU@BmSp!?4G_$;Yi7< z1nR}GW&LcV0jV6IGAn0^Ito&3JV1vaNcCxuC3^@tH~?d9+QSJQ0~JV`UY;Y*Xt03A&qOuJuhRQ&o zD6^D1A#NYGu(bIZS)UB zOlJTNNZoJdA_Os$=Slb&Buu#jFb`3P+@=wYMI#4WP0igx2?_PU2j?d~j+AV35vnaY zSc8@W(wsp$(JVAfpp70vpp1Gj1*k^g0jTvqB48D*+ohI|AkXyh=KNAl{y;3#oAbrA z%ui?U=k*P!C&=N-5)%R)qb3K*pycZ8{GtmUU0pC$T)1Ti3!w4@H4mUp0A(+IA~CC~ z1Bu!;6Dt$vy3rx364mxZUGxa@+ySQZ*ZI}S>$m0V^yYkieKTF0|6NZ1dV4)T-M=)k z6GF}tA{(QGNje}W&PpDY(QwAlFjlClRK)-$2PN4PA@Cy;zZM;tAk8g1OaYZAsEq-2 zOaMMi5b78pGjeSl2r&y8M1YHFgGj^;g~C8VjzP=SZrD5wtL5E+k(a4J>{TIxCwaUk zOw<{oY8KD6z8eH6BLOwrfiS^F#XH^pkwxpl#&qFMGq)f{qnIGiK*~@%$E=aIkDr!zLm9t1p44!0^=@);_J)2#0`+7j zYCe(i+E^fwOyL6do3DUhmhOFzTGB6`e1DIWMLYhC1jddh&~eVA_^9u-i None: """Test finemapping results (SuSie) from source.""" + hl.init(sc=spark.sparkContext, log="/dev/null", idempotent=True) assert isinstance( FinnGenFinemapping.from_finngen_susie_finemapping( spark=spark, - finngen_finemapping_df="tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz", - finngen_finemapping_summaries="tests/gentropy/data_samples/finngen_credset_summary_sample.tsv", - finngen_release_prefix="FINNGEN_R10", + finngen_susie_finemapping_snp_files=finngen_susie_finemapping_snp_files, + finngen_susie_finemapping_cs_summary_files=finngen_susie_finemapping_cs_summary_files, + finngen_release_prefix="FINNGEN_R11", ), StudyLocus, ) diff --git a/tests/gentropy/datasource/finngen/test_finngen_study_index.py b/tests/gentropy/datasource/finngen/test_finngen_study_index.py index 6fc4665dc..5b2be30c4 100644 --- a/tests/gentropy/datasource/finngen/test_finngen_study_index.py +++ b/tests/gentropy/datasource/finngen/test_finngen_study_index.py @@ -3,6 +3,7 @@ from __future__ import annotations from pyspark.sql import SparkSession +from pyspark.sql import types as t from gentropy.dataset.study_index import StudyIndex from gentropy.datasource.finngen.study_index import FinnGenStudyIndex @@ -11,3 +12,75 @@ def test_finngen_study_index_from_source(spark: SparkSession) -> None: """Test study index from source.""" assert isinstance(FinnGenStudyIndex.from_source(spark), StudyIndex) + + +def test_finngen_study_index_add_efos(spark: SparkSession) -> None: + """Test finngen study index add efo ids.""" + study_index_table_data = [ + ( + "AB1_1", + "Actinomycosis", + "FINNGEN_R11", + "gwas", + ), + ( + "AB1_2", + "Some unknown trait", + "FINNGEN_R11", + "gwas", + ), + ( + "AB1_1", + "Some unknown trait", + "FINNGEN_R11", + "gwas", + ), + ( + "AB1_1", + "Bleeding", + "FINNGEN_R11", + "gwas", + ), + ] + study_index_df = spark.createDataFrame( + data=study_index_table_data, + schema=t.StructType( + [ + t.StructField("studyId", t.StringType(), nullable=False), + t.StructField("traitFromSource", t.StringType(), nullable=False), + t.StructField("projectId", t.StringType(), nullable=False), + t.StructField("studyType", t.StringType(), nullable=False), + ] + ), + ) + + curation_table_data = [ + ("FinnGen r11", "Actinomycosis", "http://www.ebi.ac.uk/efo/EFO_0007128"), + ("FinnGen r11", "bleeding", "http://purl.obolibrary.org/obo/MP_0001914"), + ("FinnGen r11", "Bruxism", "http://purl.obolibrary.org/obo/MONDO_0002443"), + ( + "PheWAS 2024", + "20161#Pack years of smoking", + "http://www.ebi.ac.uk/efo/EFO_0005671", + ), + ] + curation_df = spark.createDataFrame( + data=curation_table_data, + schema=t.StructType( + [ + t.StructField("STUDY", t.StringType(), nullable=False), + t.StructField("PROPERTY_VALUE", t.StringType(), nullable=False), + t.StructField("SEMANTIC_TAG", t.StringType(), nullable=False), + ] + ), + ) + + study_index = StudyIndex(_df=study_index_df, _schema=study_index_df.schema) + assert isinstance( + FinnGenStudyIndex.join_efo_mapping( + study_index, + finngen_release_prefix="FINNGEN_R11_", + efo_curation_mapping=curation_df, + ), + StudyIndex, + ) From 2612aa7275f485e46ff2d8d2458debe5e180f2b8 Mon Sep 17 00:00:00 2001 From: Daniel-Considine <113430683+Daniel-Considine@users.noreply.github.com> Date: Fri, 30 Aug 2024 14:30:46 +0100 Subject: [PATCH 018/188] fix: using h4 instead of log2(h4/h3) (#740) --- src/gentropy/method/l2g/feature_factory.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gentropy/method/l2g/feature_factory.py b/src/gentropy/method/l2g/feature_factory.py index f037b57b4..1158c6067 100644 --- a/src/gentropy/method/l2g/feature_factory.py +++ b/src/gentropy/method/l2g/feature_factory.py @@ -1,4 +1,5 @@ """Collection of methods that extract features from the gentropy datasets to be fed in L2G.""" + from __future__ import annotations from functools import reduce @@ -59,7 +60,7 @@ def _get_max_coloc_per_credible_set( colocalisation_df = colocalisation.df.select( f.col("leftStudyLocusId").alias("studyLocusId"), "rightStudyLocusId", - f.coalesce("log2h4h3", "clpp").alias("score"), + f.coalesce("h4", "clpp").alias("score"), ColocalisationFactory._add_colocalisation_metric(), ) From 93a6e60b86bd43650e3c0d9a64f5c4798084d747 Mon Sep 17 00:00:00 2001 From: Yakov Date: Tue, 3 Sep 2024 11:44:38 +0100 Subject: [PATCH 019/188] fix: adding carma_tau parameter to susie_finemapper (#743) * fix: adding carma_tau parameter to susie_finemapper * fix: changing default * fix: defaults --- src/gentropy/config.py | 1 + src/gentropy/method/carma.py | 15 +++++++++++---- src/gentropy/susie_finemapper.py | 10 +++++++++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 2d0cf5b8e..22bb3c55c 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -451,6 +451,7 @@ class FinemapperConfig(StepConfig): sum_pips: float = MISSING susie_est_tausq: bool = MISSING run_carma: bool = MISSING + carma_tau: float = MISSING run_sumstat_imputation: bool = MISSING carma_time_limit: int = MISSING imputed_r2_threshold: float = MISSING diff --git a/src/gentropy/method/carma.py b/src/gentropy/method/carma.py index af8816706..b46f23a56 100644 --- a/src/gentropy/method/carma.py +++ b/src/gentropy/method/carma.py @@ -1,4 +1,5 @@ """CARMA outlier detection method.""" + from __future__ import annotations import concurrent.futures @@ -18,7 +19,10 @@ class CARMA: @staticmethod def time_limited_CARMA_spike_slab_noEM( - z: np.ndarray, ld: np.ndarray, sec_threshold: float = 600 + z: np.ndarray, + ld: np.ndarray, + sec_threshold: float = 600, + tau: float = 0.04, ) -> dict[str, Any]: """The wrapper for the CARMA_spike_slab_noEM function that runs the function in a separate thread and terminates it if it takes too long. @@ -26,6 +30,7 @@ def time_limited_CARMA_spike_slab_noEM( z (np.ndarray): Numeric vector representing z-scores. ld (np.ndarray): Numeric matrix representing the linkage disequilibrium (LD) matrix. sec_threshold (float): The time threshold in seconds. + tau (float): Tuning parameter controlling the level of shrinkage of the LD matrix Returns: dict[str, Any]: A dictionary containing the following results: @@ -38,7 +43,9 @@ def time_limited_CARMA_spike_slab_noEM( try: # Execute CARMA.CARMA_spike_slab_noEM with a timeout with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(CARMA.CARMA_spike_slab_noEM, z=z, ld=ld) + future = executor.submit( + CARMA.CARMA_spike_slab_noEM, z=z, ld=ld, tau=tau + ) result = future.result(timeout=sec_threshold) except concurrent.futures.TimeoutError: # If execution exceeds the timeout, return None @@ -71,7 +78,7 @@ def CARMA_spike_slab_noEM( all_inner_iter (int): The number of inner iterations in each CARMA iteration. epsilon_threshold (float): Threshold for convergence in CARMA iterations. num_causal (int): Maximal number of causal variants to be selected in the final model. - tau (float): Tuning parameter controlling the degree of sparsity in the Spike-and-Slab prior. + tau (float): Tuning parameter controlling the level of shrinkage of the LD matrix. outlier_switch (bool): Whether to consider outlier detection in the analysis. outlier_BF_index (float): Bayes Factor threshold for identifying outliers. @@ -604,7 +611,7 @@ def _MCS_modified( # noqa: C901 num_causal (int): Maximal number of causal variants to be selected in the final model. outlier_switch (bool): Whether to consider outlier detection in the analysis. input_conditional_S_list (list[int] | None): The conditional set. Defaults to None. - tau (float): Tuning parameter controlling the degree of sparsity in the Spike-and-Slab prior. + tau (float): Tuning parameter controlling the level of shrinkage of the LD matrix. epsilon (float): Threshold for convergence in CARMA iterations. inner_all_iter (int): The number of inner iterations in each CARMA iteration. outlier_BF_index (float | None): Bayes Factor threshold for identifying outliers. Defaults to None. diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index 5ff36a764..f2997eb9f 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -58,6 +58,7 @@ def __init__( run_carma: bool = False, run_sumstat_imputation: bool = False, carma_time_limit: int = 600, + carma_tau: float = 0.15, imputed_r2_threshold: float = 0.9, ld_score_threshold: float = 5, ) -> None: @@ -79,6 +80,7 @@ def __init__( run_carma (bool): run CARMA, default is False run_sumstat_imputation (bool): run summary statistics imputation, default is False carma_time_limit (int): CARMA time limit, default is 600 seconds + carma_tau (float): CARMA tau, shrinkage parameter imputed_r2_threshold (float): imputed R2 threshold, default is 0.9 ld_score_threshold (float): LD score threshold ofr imputation, default is 5 """ @@ -113,6 +115,7 @@ def __init__( susie_est_tausq=susie_est_tausq, run_carma=run_carma, run_sumstat_imputation=run_sumstat_imputation, + carma_tau=carma_tau, carma_time_limit=carma_time_limit, imputed_r2_threshold=imputed_r2_threshold, ld_score_threshold=ld_score_threshold, @@ -654,6 +657,7 @@ def susie_finemapper_from_prepared_dataframes( run_carma: bool = False, run_sumstat_imputation: bool = False, carma_time_limit: int = 600, + carma_tau: float = 0.04, imputed_r2_threshold: float = 0.8, ld_score_threshold: float = 4, sum_pips: float = 0.99, @@ -677,6 +681,7 @@ def susie_finemapper_from_prepared_dataframes( run_carma (bool): run CARMA, default is False run_sumstat_imputation (bool): run summary statistics imputation, default is False carma_time_limit (int): CARMA time limit, default is 600 seconds + carma_tau (float): CARMA tau, shrinkage parameter imputed_r2_threshold (float): imputed R2 threshold, default is 0.8 ld_score_threshold (float): LD score threshold ofr imputation, default is 4 sum_pips (float): the expected sum of posterior probabilities in the locus, default is 0.99 (99% credible set) @@ -721,7 +726,7 @@ def susie_finemapper_from_prepared_dataframes( if run_carma: carma_output = CARMA.time_limited_CARMA_spike_slab_noEM( - z=z_to_fm, ld=ld_to_fm, sec_threshold=carma_time_limit + z=z_to_fm, ld=ld_to_fm, sec_threshold=carma_time_limit, tau=carma_tau ) if carma_output["Outliers"] != [] and carma_output["Outliers"] is not None: GWAS_df.drop(carma_output["Outliers"], inplace=True) @@ -1225,6 +1230,7 @@ def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries( run_carma: bool = False, run_sumstat_imputation: bool = False, carma_time_limit: int = 600, + carma_tau: float = 0.04, imputed_r2_threshold: float = 0.9, ld_score_threshold: float = 5, sum_pips: float = 0.99, @@ -1245,6 +1251,7 @@ def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries( run_carma (bool): run CARMA, default is False run_sumstat_imputation (bool): run summary statistics imputation, default is False carma_time_limit (int): CARMA time limit, default is 600 seconds + carma_tau (float): CARMA tau, shrinkage parameter imputed_r2_threshold (float): imputed R2 threshold, default is 0.8 ld_score_threshold (float): LD score threshold ofr imputation, default is 4 sum_pips (float): the expected sum of posterior probabilities in the locus, default is 0.99 (99% credible set) @@ -1443,6 +1450,7 @@ def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries( run_carma=run_carma, run_sumstat_imputation=run_sumstat_imputation, carma_time_limit=carma_time_limit, + carma_tau=carma_tau, imputed_r2_threshold=imputed_r2_threshold, ld_score_threshold=ld_score_threshold, sum_pips=sum_pips, From bb8558c526b0532f2ec7d88990f570ed7496f10a Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Tue, 3 Sep 2024 13:16:09 +0100 Subject: [PATCH 020/188] feat: logic and airflow pipeline for validation (#730) * refactor: generalised validation logic * refactor: generalised validation logic * fix(airflow): fine-tuning DAG for data validation * fix(validation): study locus uniqueness fixed * feat: add invalid/valid dataset generation in validation steps/dag (#734) * fix: does not belong in this PR * refactor: to be moved to orchestration repo * docs: respective docs pages for the steps * docs: ammend docstrings * revert: maintain dag for now but it should be removed eventually * feat: validate study_locus dataset to produce valid or invalid df * feat: adjust DAG to parametrise dataset validation * fix: duplicated row * feat: increase abstraction of Dataset validation of rows * docs: increase clarity of what the function does * fix: error message * revert: unintended change * test: testing dataset filtering by quality flag --------- Co-authored-by: DSuveges * chore: pre-commit auto fixes [...] --------- Co-authored-by: David Ochoa Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- .../steps/study_locus_validation.md | 5 + docs/python_api/steps/study_validation.md | 5 + src/airflow/dags/data_validation.py | 96 +++++++++++++++++ src/gentropy/config.py | 42 ++++++++ src/gentropy/dataset/dataset.py | 79 ++++++++++++++ src/gentropy/dataset/study_index.py | 46 ++++---- src/gentropy/dataset/study_locus.py | 49 ++++++++- src/gentropy/study_locus_validation.py | 57 ++++++++++ src/gentropy/study_validation.py | 74 +++++++++++++ .../dataset/test_dataset_exclusion.py | 100 ++++++++++++++++++ 10 files changed, 530 insertions(+), 23 deletions(-) create mode 100644 docs/python_api/steps/study_locus_validation.md create mode 100644 docs/python_api/steps/study_validation.md create mode 100644 src/airflow/dags/data_validation.py create mode 100644 src/gentropy/study_locus_validation.py create mode 100644 src/gentropy/study_validation.py create mode 100644 tests/gentropy/dataset/test_dataset_exclusion.py diff --git a/docs/python_api/steps/study_locus_validation.md b/docs/python_api/steps/study_locus_validation.md new file mode 100644 index 000000000..350558806 --- /dev/null +++ b/docs/python_api/steps/study_locus_validation.md @@ -0,0 +1,5 @@ +--- +title: Study-Locus Validation +--- + +::: gentropy.study_locus_validation.StudyLocusValidationStep diff --git a/docs/python_api/steps/study_validation.md b/docs/python_api/steps/study_validation.md new file mode 100644 index 000000000..8e2b68099 --- /dev/null +++ b/docs/python_api/steps/study_validation.md @@ -0,0 +1,5 @@ +--- +title: Study Validation +--- + +::: gentropy.study_validation.StudyValidationStep diff --git a/src/airflow/dags/data_validation.py b/src/airflow/dags/data_validation.py new file mode 100644 index 000000000..875a169dc --- /dev/null +++ b/src/airflow/dags/data_validation.py @@ -0,0 +1,96 @@ +"""DAG to validate study locus and study index datasets.""" + +from __future__ import annotations + +from pathlib import Path + +import common_airflow as common + +from airflow.models.dag import DAG + +CLUSTER_NAME = "otg-validation" + +# Input datasets: +STUDY_INDICES = [ + "gs://gwas_catalog_data/study_index", + "gs://eqtl_catalogue_data/study_index", + "gs://finngen_data/r10/study_index", +] +STUDY_LOCI = [ + "gs://gwas_catalog_data/credible_set_datasets/gwas_catalog_PICSed_curated_associations", + "gs://gwas_catalog_data/credible_set_datasets/gwas_catalog_PICSed_summary_statistics", + "gs://eqtl_catalogue_data/credible_set_datasets/susie", + "gs://finngen_data/r10/credible_set_datasets/finngen_susie_processed", +] +TARGET_INDEX = "gs://genetics_etl_python_playground/releases/24.06/gene_index" +DISEASE_INDEX = "gs://open-targets-pre-data-releases/24.06/output/etl/parquet/diseases" + +# Output datasets: +VALIDATED_STUDY = "gs://ot-team/dsuveges/otg-data/validated_study_index" +INVALID_STUDY = f"{VALIDATED_STUDY}_invalid" +INVALID_STUDY_QC = [ + "UNRESOLVED_TARGET", + "UNRESOLVED_DISEASE", + "UNKNOWN_STUDY_TYPE", + "DUPLICATED_STUDY", + "NO_GENE_PROVIDED", +] + +VALIDATED_STUDY_LOCI = "gs://ot-team/dsuveges/otg-data/validated_credible_set" +INVALID_STUDY_LOCI = f"{VALIDATED_STUDY_LOCI}_invalid" +INVALID_STUDY_LOCUS_QC = [ + "DUPLICATED_STUDYLOCUS_ID", + "AMBIGUOUS_STUDY", + "FAILED_STUDY", + "MISSING_STUDY", + "NO_GENOMIC_LOCATION_FLAG", + "COMPOSITE_FLAG", + "INCONSISTENCY_FLAG", + "PALINDROMIC_ALLELE_FLAG", +] + +with DAG( + dag_id=Path(__file__).stem, + description="Open Targets Genetics — Study locus and study index validation", + default_args=common.shared_dag_args, + **common.shared_dag_kwargs, +) as dag: + # Definition of the study index validation step: + validate_studies = common.submit_step( + cluster_name=CLUSTER_NAME, + step_id="study_validation", + task_id="study_validation", + other_args=[ + f"step.study_index_path={STUDY_INDICES}", + f"step.target_index_path={TARGET_INDEX}", + f"step.disease_index_path={DISEASE_INDEX}", + f"step.valid_study_index_path={VALIDATED_STUDY}", + f"step.invalid_study_index_path={INVALID_STUDY_LOCI}", + f"step.invalid_qc_reasons={INVALID_STUDY_QC}", + ], + ) + + # Definition of the study locus validation step: + validate_study_loci = common.submit_step( + cluster_name=CLUSTER_NAME, + step_id="credible_set_validation", + task_id="credible_set_validation", + other_args=[ + f"step.study_index_path={VALIDATED_STUDY}", + f"step.study_locus_path={STUDY_LOCI}", + f"step.valid_study_locus_path={VALIDATED_STUDY_LOCI}", + f"step.invalid_study_locus_path={INVALID_STUDY_LOCI}", + f"step.invalid_qc_reasons={INVALID_STUDY_LOCUS_QC}", + ], + ) + + ( + common.create_cluster( + CLUSTER_NAME, + master_machine_type="n1-highmem-32", + ) + >> common.install_dependencies(CLUSTER_NAME) + >> validate_studies + >> validate_study_loci + # >> common.delete_cluster(CLUSTER_NAME) + ) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 22bb3c55c..ed5fe4c81 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -483,6 +483,38 @@ class CredibleSetQCConfig(StepConfig): _target_: str = "gentropy.credible_set_qc.CredibleSetQCStep" +@dataclass +class StudyValidationStepConfig(StepConfig): + """Configuration of the study index validation step. + + The study indices are read from multiple location, therefore we are expecting a list of paths. + """ + + study_index_path: list[str] = MISSING + target_index_path: str = MISSING + disease_index_path: str = MISSING + valid_study_index_path: str = MISSING + invalid_study_index_path: str = MISSING + invalid_qc_reasons: list[str] = MISSING + _target_: str = "gentropy.study_validation.StudyValidationStep" + + +@dataclass +class StudyLocusValidationStepConfig(StepConfig): + """Configuration of the study index validation step. + + The study locus datasets are read from multiple location, therefore we are expecting a list of paths. + """ + + study_index_path: str = MISSING + study_locus_path: list[str] = MISSING + valid_study_locus_path: str = MISSING + invalid_study_locus_path: str = MISSING + invalid_qc_reasons: list[str] = MISSING + gwas_significance: float = WindowBasedClumpingStepConfig.gwas_significance + _target_: str = "gentropy.study_locus_validation.StudyLocusValidationStep" + + @dataclass class Config: """Application configuration.""" @@ -544,3 +576,13 @@ def register_config() -> None: cs.store( group="step", name="locus_breaker_clumping", node=LocusBreakerClumpingConfig ) + cs.store( + group="step", + name="credible_set_validation", + node=StudyLocusValidationStepConfig, + ) + cs.store( + group="step", + name="study_validation", + node=StudyValidationStepConfig, + ) diff --git a/src/gentropy/dataset/dataset.py b/src/gentropy/dataset/dataset.py index b79930c23..b31537a2a 100644 --- a/src/gentropy/dataset/dataset.py +++ b/src/gentropy/dataset/dataset.py @@ -4,11 +4,13 @@ from abc import ABC, abstractmethod from dataclasses import dataclass +from enum import Enum from functools import reduce from typing import TYPE_CHECKING, Any import pyspark.sql.functions as f from pyspark.sql.types import DoubleType +from pyspark.sql.window import Window from typing_extensions import Self from gentropy.common.schemas import flatten_schema @@ -74,6 +76,24 @@ def get_schema(cls: type[Self]) -> StructType: """ pass + @classmethod + def get_QC_column_name(cls: type[Self]) -> str | None: + """Abstract method to get the QC column name. Assumes None unless overriden by child classes. + + Returns: + str | None: Column name + """ + return None + + @classmethod + def get_QC_categories(cls: type[Self]) -> list[str]: + """Method to get the QC categories for this dataset. Returns empty list unless overriden by child classes. + + Returns: + list[str]: Column name + """ + return [] + @classmethod def from_parquet( cls: type[Self], @@ -170,6 +190,46 @@ def validate_schema(self: Dataset) -> None: f"The following fields present differences in their datatypes: {fields_with_different_observed_datatype}." ) + def valid_rows(self: Self, invalid_flags: list[str], invalid: bool = False) -> Self: + """Filters `Dataset` according to a list of quality control flags. Only `Dataset` classes with a QC column can be validated. + + Args: + invalid_flags (list[str]): List of quality control flags to be excluded. + invalid (bool): If True returns the invalid rows, instead of the valids. Defaults to False. + + Returns: + Self: filtered dataset. + + Raises: + ValueError: If the Dataset does not contain a QC column. + """ + # If the invalid flags are not valid quality checks (enum) for this Dataset we raise an error: + for flag in invalid_flags: + if flag not in self.get_QC_categories(): + raise ValueError( + f"{flag} is not a valid QC flag for {type(self).__name__} ({self.get_QC_categories()})." + ) + + qc_column_name = self.get_QC_column_name() + # If Dataset (class) does not contain QC column we raise an error: + if not qc_column_name: + raise ValueError( + f"{type(self).__name__} objects do not contain a QC column to filter by." + ) + else: + column: str = qc_column_name + # If QC column (nullable) is not available in the dataframe we create an empty array: + qc = f.when(f.col(column).isNull(), f.array()).otherwise(f.col(column)) + + filterCondition = ~f.arrays_overlap( + f.array([f.lit(i) for i in invalid_flags]), qc + ) + # Returning the filtered dataset: + if invalid: + return self.filter(~filterCondition) + else: + return self.filter(filterCondition) + def drop_infinity_values(self: Self, *cols: str) -> Self: """Drop infinity values from Double typed column. @@ -260,3 +320,22 @@ def update_quality_flag( flag_condition, f.array_union(qc, f.array(f.lit(flag_text.value))), ).otherwise(qc) + + @staticmethod + def flag_duplicates(test_column: Column) -> Column: + """Return True for duplicated values in column. + + Args: + test_column (Column): Column to check for duplicates + + Returns: + Column: Column with a boolean flag for duplicates + """ + return ( + f.count(test_column).over( + Window.partitionBy(test_column).rowsBetween( + Window.unboundedPreceding, Window.unboundedFollowing + ) + ) + > 1 + ) diff --git a/src/gentropy/dataset/study_index.py b/src/gentropy/dataset/study_index.py index e8c787ed6..f60b2135c 100644 --- a/src/gentropy/dataset/study_index.py +++ b/src/gentropy/dataset/study_index.py @@ -10,7 +10,6 @@ from typing import TYPE_CHECKING from pyspark.sql import functions as f -from pyspark.sql.window import Window from gentropy.assets import data from gentropy.common.schemas import parse_spark_schema @@ -109,6 +108,24 @@ def get_schema(cls: type[StudyIndex]) -> StructType: """ return parse_spark_schema("study_index.json") + @classmethod + def get_QC_column_name(cls: type[StudyIndex]) -> str: + """Return the name of the quality control column. + + Returns: + str: The name of the quality control column. + """ + return "qualityControls" + + @classmethod + def get_QC_categories(cls: type[StudyIndex]) -> list[str]: + """Return the quality control categories. + + Returns: + list[str]: The quality control categories. + """ + return [member.value for member in StudyQualityCheck] + @classmethod def aggregate_and_map_ancestries( cls: type[StudyIndex], discovery_samples: Column @@ -197,7 +214,7 @@ def is_quality_flagged(self: StudyIndex) -> Column: if "qualityControls" not in self.df.columns: return f.lit(False) else: - return f.size(self.df.qualityControls) != 0 + return f.size(self.df["qualityControls"]) != 0 def has_summarystats(self: StudyIndex) -> Column: """Return a boolean column indicating if a study has harmonized summary statistics. @@ -213,30 +230,17 @@ def validate_unique_study_id(self: StudyIndex) -> StudyIndex: Returns: StudyIndex: with flagged duplicated studies. """ - validated_df = ( - self.df.withColumn( - "isDuplicated", - f.when( - f.count("studyType").over( - Window.partitionBy("studyId").rowsBetween( - Window.unboundedPreceding, Window.unboundedFollowing - ) - ) - > 1, - True, - ).otherwise(False), - ) - .withColumn( + return StudyIndex( + _df=self.df.withColumn( "qualityControls", - StudyIndex.update_quality_flag( + self.update_quality_flag( f.col("qualityControls"), - f.col("isDuplicated"), + self.flag_duplicates(f.col("studyId")), StudyQualityCheck.DUPLICATED_STUDY, ), - ) - .drop("isDuplicated") + ), + _schema=StudyIndex.get_schema(), ) - return StudyIndex(_df=validated_df, _schema=StudyIndex.get_schema()) def _normalise_disease( self: StudyIndex, diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 05e6514cc..47a73c665 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -8,7 +8,7 @@ import numpy as np import pyspark.sql.functions as f -from pyspark.sql.types import FloatType +from pyspark.sql.types import FloatType, StringType from gentropy.common.schemas import parse_spark_schema from gentropy.common.spark_helpers import ( @@ -46,6 +46,7 @@ class StudyLocusQualityCheck(Enum): NOT_QUALIFYING_LD_BLOCK (str): LD block does not contain variants at the required R^2 threshold FAILED_STUDY (str): Flagging study loci if the study has failed QC MISSING_STUDY (str): Flagging study loci if the study is not found in the study index as a reference + DUPLICATED_STUDYLOCUS_ID (str): Study-locus identifier is not unique. """ SUBSIGNIFICANT_FLAG = "Subsignificant p-value" @@ -63,6 +64,7 @@ class StudyLocusQualityCheck(Enum): ) FAILED_STUDY = "Study has failed quality controls" MISSING_STUDY = "Study not found in the study index" + DUPLICATED_STUDYLOCUS_ID = "Non-unique study locus identifier" class CredibleInterval(Enum): @@ -99,9 +101,16 @@ def validate_study(self: StudyLocus, study_index: StudyIndex) -> StudyLocus: Returns: StudyLocus: Updated study locus with quality control flags. """ + # Quality controls is not a mandatory field in the study index schema, so we have to be ready to handle it: + qc_select_expression = ( + f.col("qualityControls") + if "qualityControls" in study_index.df.columns + else f.lit(None).cast(StringType()) + ) + study_flags = study_index.df.select( f.col("studyId").alias("study_studyId"), - f.col("qualityControls").alias("study_qualityControls"), + qc_select_expression.alias("study_qualityControls"), ) return StudyLocus( @@ -159,6 +168,24 @@ def validate_lead_pvalue(self: StudyLocus, pvalue_cutoff: float) -> StudyLocus: _schema=self.get_schema(), ) + def validate_unique_study_locus_id(self: StudyLocus) -> StudyLocus: + """Validating the uniqueness of study-locus identifiers and flagging duplicated studyloci. + + Returns: + StudyLocus: with flagged duplicated studies. + """ + return StudyLocus( + _df=self.df.withColumn( + "qualityControls", + self.update_quality_flag( + f.col("qualityControls"), + self.flag_duplicates(f.col("studyLocusId")), + StudyLocusQualityCheck.DUPLICATED_STUDYLOCUS_ID, + ), + ), + _schema=StudyLocus.get_schema(), + ) + @staticmethod def _qc_subsignificant_associations( quality_controls_column: Column, @@ -375,6 +402,24 @@ def get_schema(cls: type[StudyLocus]) -> StructType: """ return parse_spark_schema("study_locus.json") + @classmethod + def get_QC_column_name(cls: type[StudyLocus]) -> str: + """Quality control column. + + Returns: + str: Name of the quality control column. + """ + return "qualityControls" + + @classmethod + def get_QC_categories(cls: type[StudyLocus]) -> list[str]: + """Quality control categories. + + Returns: + list[str]: List of quality control categories. + """ + return [member.value for member in StudyLocusQualityCheck] + def filter_by_study_type( self: StudyLocus, study_type: str, study_index: StudyIndex ) -> StudyLocus: diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py new file mode 100644 index 000000000..06995290c --- /dev/null +++ b/src/gentropy/study_locus_validation.py @@ -0,0 +1,57 @@ +"""Step to validate study locus dataset against study index.""" + +from __future__ import annotations + +from gentropy.common.session import Session +from gentropy.dataset.study_index import StudyIndex +from gentropy.dataset.study_locus import StudyLocus + + +class StudyLocusValidationStep: + """Study index validation step. + + This step reads and outputs a study index dataset with flagged studies + when target of disease validation fails. + """ + + def __init__( + self, + session: Session, + study_index_path: str, + study_locus_path: list[str], + gwas_significance: float, + valid_study_locus_path: str, + invalid_study_locus_path: str, + invalid_qc_reasons: list[str] = [], + ) -> None: + """Initialize step. + + Args: + session (Session): Session object. + study_index_path (str): Path to study index file. + study_locus_path (list[str]): Path to study locus dataset. + gwas_significance (float): GWAS significance threshold. + valid_study_locus_path (str): Path to write the valid records. + invalid_study_locus_path (str): Path to write the output file. + invalid_qc_reasons (list[str]): List of invalid quality check reason names from `StudyLocusQualityCheck` (e.g. ['SUBSIGNIFICANT_FLAG']). + """ + # Reading datasets: + study_index = StudyIndex.from_parquet(session, study_index_path) + + # Running validation then writing output: + study_locus_with_qc = ( + StudyLocus.from_parquet(session, list(study_locus_path)) + .validate_lead_pvalue( + pvalue_cutoff=gwas_significance + ) # Flagging study locus with subsignificant p-values + .validate_study(study_index) # Flagging studies not in study index + .validate_unique_study_locus_id() # Flagging duplicated study locus ids + ).persist() # we will need this for 2 types of outputs + + study_locus_with_qc.valid_rows(invalid_qc_reasons).df.write.parquet( + invalid_study_locus_path + ) + + study_locus_with_qc.valid_rows( + invalid_qc_reasons, invalid=True + ).df.write.parquet(valid_study_locus_path) diff --git a/src/gentropy/study_validation.py b/src/gentropy/study_validation.py new file mode 100644 index 000000000..5bfb83fe0 --- /dev/null +++ b/src/gentropy/study_validation.py @@ -0,0 +1,74 @@ +"""Step to validate study index against disease and target index.""" + +from __future__ import annotations + +from pyspark.sql import functions as f + +from gentropy.common.session import Session +from gentropy.dataset.gene_index import GeneIndex +from gentropy.dataset.study_index import StudyIndex + + +class StudyValidationStep: + """Study index validation step. + + This step reads and outputs a study index dataset with flagged studies + when target of disease validation fails. + """ + + def __init__( + self, + session: Session, + study_index_path: list[str], + target_index_path: str, + disease_index_path: str, + valid_study_index_path: str, + invalid_study_index_path: str, + invalid_qc_reasons: list[str] = [], + ) -> None: + """Initialize step. + + Args: + session (Session): Session object. + study_index_path (list[str]): Path to study index file. + target_index_path (str): Path to target index file. + disease_index_path (str): Path to disease index file. + valid_study_index_path (str): Path to write the valid records. + invalid_study_index_path (str): Path to write the output file. + invalid_qc_reasons (list[str]): List of invalid quality check reason names from `StudyQualityCheck` (e.g. ['DUPLICATED_STUDY']). + """ + # Reading datasets: + target_index = GeneIndex.from_parquet(session, target_index_path) + # Reading disease index and pre-process. + # This logic does not belong anywhere, but gentorpy has no disease dataset yet. + disease_index = ( + session.spark.read.parquet(disease_index_path) + .select( + f.col("id").alias("diseaseId"), + f.explode_outer( + f.when( + f.col("obsoleteTerms").isNotNull(), + f.array_union(f.array("id"), f.col("obsoleteTerms")), + ) + ).alias("efo"), + ) + .withColumn("efo", f.coalesce(f.col("efo"), f.col("diseaseId"))) + ) + study_index = StudyIndex.from_parquet(session, list(study_index_path)) + + # Running validation: + study_index_with_qc = ( + study_index.validate_disease(disease_index) + .validate_unique_study_id() # Flagging duplicated study ids + .validate_study_type() # Flagging non-supported study types. + .validate_target(target_index) # Flagging QTL studies with invalid targets + .validate_disease(disease_index) # Flagging invalid EFOs + ).persist() # we will need this for 2 types of outputs + + study_index_with_qc.valid_rows( + invalid_qc_reasons, invalid=True + ).df.write.parquet(invalid_study_index_path) + + study_index_with_qc.valid_rows(invalid_qc_reasons).df.write.parquet( + valid_study_index_path + ) diff --git a/tests/gentropy/dataset/test_dataset_exclusion.py b/tests/gentropy/dataset/test_dataset_exclusion.py new file mode 100644 index 000000000..361398f34 --- /dev/null +++ b/tests/gentropy/dataset/test_dataset_exclusion.py @@ -0,0 +1,100 @@ +"""Test dataset validation/exclusion.""" + +from __future__ import annotations + +import pyspark.sql.functions as f +import pytest +from pyspark.sql import SparkSession + +from gentropy.dataset.study_index import StudyIndex, StudyQualityCheck + + +class TestDataExclusion: + """Testing Dataset exclusion. + + Calling `dataset.valid_rows` methods on a mock datasets to test if + the right rows are excluded. + """ + + CORRECT_FILTER = ["The identifier of this study is not unique."] + INCORRECT_FILTER = ["Some mock flag."] + ALL_FILTERS = [member.value for member in StudyQualityCheck] + + DATASET = [ + # Good study no flag: + ("S1", None), + # Good study permissive flag: + ("S2", "This type of study is not supported."), + ("S2", "No valid disease identifier found."), + # Bad study: + ("S3", "The identifier of this study is not unique."), + ("S3", "This type of study is not supported."), + ] + + @pytest.fixture(autouse=True) + def _setup(self: TestDataExclusion, spark: SparkSession) -> None: + """Setup study the mock index for testing.""" + self.study_index = StudyIndex( + _df=( + spark.createDataFrame(self.DATASET, ["studyId", "flag"]) + .groupBy("studyId") + .agg(f.collect_list("flag").alias("qualityControls")) + .select( + "studyId", + "qualityControls", + f.lit("project1").alias("projectId"), + f.lit("gwas").alias("studyType"), + ) + ), + _schema=StudyIndex.get_schema(), + ) + + @pytest.mark.parametrize( + "filter_, expected", + [ + (CORRECT_FILTER, ["S1", "S2"]), + (ALL_FILTERS, ["S1"]), + ], + ) + def test_valid_rows( + self: TestDataExclusion, filter_: list[str], expected: list[str] + ) -> None: + """Test valid rows.""" + passing_studies = [ + study["studyId"] + for study in self.study_index.valid_rows( + filter_, invalid=False + ).df.collect() + ] + + assert passing_studies == expected + + @pytest.mark.parametrize( + "filter_, expected", + [ + (CORRECT_FILTER, ["S3"]), + (ALL_FILTERS, ["S2", "S3"]), + ], + ) + def test_invalid_rows( + self: TestDataExclusion, filter_: list[str], expected: list[str] + ) -> None: + """Test invalid rows.""" + failing_studies = [ + study["studyId"] + for study in self.study_index.valid_rows(filter_, invalid=True).df.collect() + ] + + assert failing_studies == expected + + def test_failing_quality_flag(self: TestDataExclusion) -> None: + """Test invalid quality flag.""" + with pytest.raises(ValueError): + self.study_index.valid_rows( + self.INCORRECT_FILTER, invalid=True + ).df.collect() + + with pytest.raises(ValueError): + self.study_index.valid_rows( + self.INCORRECT_FILTER, invalid=False + ).df.collect() From 1a7b0d7cccc29a1d628ce0af93d8a210d17d53bb Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Tue, 3 Sep 2024 14:48:42 +0100 Subject: [PATCH 021/188] feat(variant index): improved data structure (#710) * feat(variant index): upating schema and logic * feat(variant index): vep transcript annotation improved with consequence score + transcript index * refactor: removed hardcoded schema definition from parser * fix(test): fixing mock variant index * fix: schema extraction fixed * fix: doctest issue resolved * fix: updating vep command in the DAG * fix: removing un-used sequence ontology terms * fix: adding example to sorter method + changing variable names --------- Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- src/airflow/dags/variant_index.py | 8 + src/gentropy/assets/data/so_mappings.json | 43 ----- .../data/variant_consequence_to_score.tsv | 46 +++++ .../assets/schemas/variant_index.json | 38 +++- .../assets/schemas/vep_json_output.json | 42 +++++ src/gentropy/common/spark_helpers.py | 90 +++++++++- src/gentropy/datasource/ensembl/vep_parser.py | 169 ++++++++++-------- tests/gentropy/conftest.py | 7 +- tests/gentropy/data_samples/vep_sample.jsonl | 4 +- .../datasource/ensembl/test_vep_variants.py | 2 +- 10 files changed, 322 insertions(+), 127 deletions(-) delete mode 100644 src/gentropy/assets/data/so_mappings.json create mode 100644 src/gentropy/assets/data/variant_consequence_to_score.tsv diff --git a/src/airflow/dags/variant_index.py b/src/airflow/dags/variant_index.py index 9d0736632..98ba48198 100644 --- a/src/airflow/dags/variant_index.py +++ b/src/airflow/dags/variant_index.py @@ -242,10 +242,18 @@ def vep_annotation(pm: PathManager, **kwargs: Any) -> None: --dir_plugins {pm.cache_dir}/VEP_plugins \ --sift b \ --polyphen b \ + --fasta {pm.cache_dir}/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz \ + --mane_select \ + --appris \ + --hgvsg \ + --pick_order mane_select,canonical \ + --per_gene \ --uniprot \ --check_existing \ --exclude_null_alleles \ --canonical \ + --plugin TSSDistance \ + --distance 500000 \ --plugin LoF,loftee_path:{pm.cache_dir}/VEP_plugins,gerp_bigwig:{pm.cache_dir}/gerp_conservation_scores.homo_sapiens.GRCh38.bw,human_ancestor_fa:{pm.cache_dir}/human_ancestor.fa.gz,conservation_file:/opt/vep/loftee.sql \ --plugin AlphaMissense,file={pm.cache_dir}/AlphaMissense_hg38.tsv.gz,transcript_match=1 \ --plugin CADD,snv={pm.cache_dir}/CADD_GRCh38_whole_genome_SNVs.tsv.gz", diff --git a/src/gentropy/assets/data/so_mappings.json b/src/gentropy/assets/data/so_mappings.json deleted file mode 100644 index 8a087b6f7..000000000 --- a/src/gentropy/assets/data/so_mappings.json +++ /dev/null @@ -1,43 +0,0 @@ -{ - "transcript_ablation": "SO_0001893", - "splice_acceptor_variant": "SO_0001574", - "splice_donor_variant": "SO_0001575", - "stop_gained": "SO_0001587", - "frameshift_variant": "SO_0001589", - "stop_lost": "SO_0001578", - "start_lost": "SO_0002012", - "transcript_amplification": "SO_0001889", - "feature_elongation": "SO_0001907", - "feature_truncation": "SO_0001906", - "inframe_insertion": "SO_0001821", - "inframe_deletion": "SO_0001822", - "missense_variant": "SO_0001583", - "protein_altering_variant": "SO_0001818", - "splice_donor_5th_base_variant": "SO_0001787", - "splice_region_variant": "SO_0001630", - "splice_donor_region_variant": "SO_0002170", - "splice_polypyrimidine_tract_variant": "SO_0002169", - "incomplete_terminal_codon_variant": "SO_0001626", - "start_retained_variant": "SO_0002019", - "stop_retained_variant": "SO_0001567", - "synonymous_variant": "SO_0001819", - "coding_sequence_variant": "SO_0001580", - "mature_miRNA_variant": "SO_0001620", - "5_prime_UTR_variant": "SO_0001623", - "3_prime_UTR_variant": "SO_0001624", - "non_coding_transcript_exon_variant": "SO_0001792", - "intron_variant": "SO_0001627", - "NMD_transcript_variant": "SO_0001621", - "non_coding_transcript_variant": "SO_0001619", - "coding_transcript_variant": "SO_0001968", - "upstream_gene_variant": "SO_0001631", - "downstream_gene_variant": "SO_0001632", - "TFBS_ablation": "SO_0001895", - "TFBS_amplification": "SO_0001892", - "TF_binding_site_variant": "SO_0001782", - "regulatory_region_ablation": "SO_0001894", - "regulatory_region_amplification": "SO_0001891", - "regulatory_region_variant": "SO_0001566", - "intergenic_variant": "SO_0001628", - "sequence_variant": "SO_0001060" -} diff --git a/src/gentropy/assets/data/variant_consequence_to_score.tsv b/src/gentropy/assets/data/variant_consequence_to_score.tsv new file mode 100644 index 000000000..589a855e6 --- /dev/null +++ b/src/gentropy/assets/data/variant_consequence_to_score.tsv @@ -0,0 +1,46 @@ +variantFunctionalConsequenceId label score +SO_0001893 transcript_ablation 1.0 +ECO_0000205 curator_inference +SO_0002165 trinucleotide_repeat_expansion +SO_0001574 splice_acceptor_variant 1.0 +SO_0001575 splice_donor_variant 1.0 +SO_0001587 stop_gained 1.0 +SO_0001589 frameshift_variant 1.0 +SO_0002012 start_lost 1.0 +SO_0001578 stop_lost 1.0 +SO_0001889 transcript_amplification 1.0 +SO_0001894 regulatory_region_ablation 0.66 +SO_0001583 missense_variant 0.66 +SO_0001818 protein_altering_variant 0.66 +SO_0001821 inframe_insertion 0.66 +SO_0001822 inframe_deletion 0.66 +SO_0001582 initiator_codon_variant +SO_0001630 splice_region_variant 0.33 +SO_0001626 incomplete_terminal_codon_variant 0.33 +SO_0001567 stop_retained_variant 0.33 +SO_0001819 synonymous_variant 0.33 +SO_0002019 start_retained_variant 0.33 +SO_0001619 non_coding_transcript_variant 0.0 +SO_0001620 mature_miRNA_variant 0.0 +SO_0001621 NMD_transcript_variant 0.1 +SO_0001623 5_prime_UTR_variant 0.1 +SO_0001624 3_prime_UTR_variant 0.1 +SO_0001627 intron_variant 0.1 +SO_0001792 non_coding_transcript_exon_variant 0.0 +SO_0001580 coding_sequence_variant 0.0 +SO_0001566 regulatory_region_variant 0.0 +SO_0001631 upstream_gene_variant 0.0 +SO_0001632 downstream_gene_variant 0.0 +SO_0001782 TF_binding_site_variant 0.0 +SO_0001891 regulatory_region_amplification 0.0 +SO_0001892 TFBS_amplification 0.0 +SO_0001895 TFBS_ablation 0.0 +SO_0001906 feature_truncation 0.0 +SO_0001907 feature_elongation 0.0 +SO_0001628 intergenic_variant 0.0 +SO_0001060 sequence_variant +SO_0001825 conservative_inframe_deletion +SO_0001787 splice_donor_5th_base_variant 0.66 +SO_0002170 splice_donor_region_variant 0.33 +SO_0002169 splice_polypyrimidine_tract_variant 0.33 +SO_0001968 coding_transcript_variant 0.1 diff --git a/src/gentropy/assets/schemas/variant_index.json b/src/gentropy/assets/schemas/variant_index.json index 16b1c1b11..6d5e211ac 100644 --- a/src/gentropy/assets/schemas/variant_index.json +++ b/src/gentropy/assets/schemas/variant_index.json @@ -128,10 +128,28 @@ }, { "metadata": {}, - "name": "distance", + "name": "distanceFromFootprint", "nullable": true, "type": "long" }, + { + "metadata": {}, + "name": "distanceFromTss", + "nullable": true, + "type": "long" + }, + { + "metadata": {}, + "name": "appris", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "maneSelect", + "nullable": true, + "type": "string" + }, { "metadata": {}, "name": "targetId", @@ -162,6 +180,18 @@ "nullable": true, "type": "float" }, + { + "metadata": {}, + "name": "consequenceScore", + "nullable": true, + "type": "float" + }, + { + "metadata": {}, + "name": "transcriptIndex", + "nullable": true, + "type": "integer" + }, { "metadata": {}, "name": "transcriptId", @@ -184,6 +214,12 @@ "type": "array" } }, + { + "metadata": {}, + "name": "hgvsId", + "nullable": true, + "type": "string" + }, { "name": "alleleFrequencies", "type": { diff --git a/src/gentropy/assets/schemas/vep_json_output.json b/src/gentropy/assets/schemas/vep_json_output.json index ecad3ea1e..43c3f4ad7 100644 --- a/src/gentropy/assets/schemas/vep_json_output.json +++ b/src/gentropy/assets/schemas/vep_json_output.json @@ -20,6 +20,12 @@ "containsNull": true, "elementType": { "fields": [ + { + "metadata": {}, + "name": "hgvsg", + "nullable": true, + "type": "string" + }, { "metadata": {}, "name": "cadd_phred", @@ -316,6 +322,42 @@ "nullable": true, "type": "string" }, + { + "metadata": {}, + "name": "hgvsg", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "hgvsc", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "hgvsp", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "appris", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "mane_select", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "tssdistance", + "nullable": true, + "type": "long" + }, { "metadata": {}, "name": "cadd_phred", diff --git a/src/gentropy/common/spark_helpers.py b/src/gentropy/common/spark_helpers.py index 4b22e00c2..65d3ae17b 100644 --- a/src/gentropy/common/spark_helpers.py +++ b/src/gentropy/common/spark_helpers.py @@ -5,7 +5,8 @@ import re import sys from functools import reduce, wraps -from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, TypeVar +from itertools import chain +from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, Optional, TypeVar import pyspark.sql.functions as f import pyspark.sql.types as t @@ -375,6 +376,93 @@ def order_array_of_structs_by_field(column_name: str, field_name: str) -> Column ) +def order_array_of_structs_by_two_fields( + array_name: str, descending_column: str, ascending_column: str +) -> Column: + """Sort array of structs by a field in descending order and by an other field in an ascending order. + + This function doesn't deal with null values, assumes the sort columns are not nullable. + + Args: + array_name (str): Column name with array of structs + descending_column (str): Name of the first keys sorted in descending order + ascending_column (str): Name of the second keys sorted in ascending order + + Returns: + Column: Sorted column + + Examples: + >>> data = [(1.0, 45, 'First'), (0.5, 232, 'Third'), (0.5, 233, 'Fourth'), (1.0, 125, 'Second'),] + >>> ( + ... spark.createDataFrame(data, ['col1', 'col2', 'ranking']) + ... .groupBy(f.lit('c')) + ... .agg(f.collect_list(f.struct('col1','col2', 'ranking')).alias('list')) + ... .select(order_array_of_structs_by_two_fields('list', 'col1', 'col2').alias('sorted_list')) + ... .show(truncate=False) + ... ) + +-----------------------------------------------------------------------------+ + |sorted_list | + +-----------------------------------------------------------------------------+ + |[{1.0, 45, First}, {1.0, 125, Second}, {0.5, 232, Third}, {0.5, 233, Fourth}]| + +-----------------------------------------------------------------------------+ + + """ + return f.expr( + f""" + array_sort( + {array_name}, + (left, right) -> case + when left.{descending_column} is null and right.{descending_column} is null then 0 + when left.{ascending_column} is null and right.{ascending_column} is null then 0 + + when left.{descending_column} is null then 1 + when right.{descending_column} is null then -1 + + when left.{ascending_column} is null then 1 + when right.{ascending_column} is null then -1 + + when left.{descending_column} < right.{descending_column} then 1 + when left.{descending_column} > right.{descending_column} then -1 + when left.{descending_column} == right.{descending_column} and left.{ascending_column} > right.{ascending_column} then 1 + when left.{descending_column} == right.{descending_column} and left.{ascending_column} < right.{ascending_column} then -1 + end) + """ + ) + +def map_column_by_dictionary(col: Column, mapping_dict: Dict[str, str]) -> Column: + """Map column values to dictionary values by key. + + Missing consequence label will be converted to None, unmapped consequences will be mapped as None. + + Args: + col (Column): Column containing labels to map. + mapping_dict (Dict[str, str]): Dictionary with mapping key/value pairs. + + Returns: + Column: Column with mapped values. + + Examples: + >>> data = [('consequence_1',),('unmapped_consequence',),(None,)] + >>> m = {'consequence_1': 'SO:000000'} + >>> ( + ... spark.createDataFrame(data, ['label']) + ... .select('label',map_column_by_dictionary(f.col('label'),m).alias('id')) + ... .show() + ... ) + +--------------------+---------+ + | label| id| + +--------------------+---------+ + | consequence_1|SO:000000| + |unmapped_consequence| null| + | null| null| + +--------------------+---------+ + + """ + map_expr = f.create_map(*[f.lit(x) for x in chain(*mapping_dict.items())]) + + return map_expr[col] + + def pivot_df( df: DataFrame, pivot_col: str, diff --git a/src/gentropy/datasource/ensembl/vep_parser.py b/src/gentropy/datasource/ensembl/vep_parser.py index 2259ef34e..c7ee05d13 100644 --- a/src/gentropy/datasource/ensembl/vep_parser.py +++ b/src/gentropy/datasource/ensembl/vep_parser.py @@ -3,10 +3,9 @@ from __future__ import annotations import importlib.resources as pkg_resources -import json -from itertools import chain -from typing import TYPE_CHECKING, Dict, List +from typing import TYPE_CHECKING, List +import pandas as pd from pyspark.sql import SparkSession from pyspark.sql import functions as f from pyspark.sql import types as t @@ -15,7 +14,9 @@ from gentropy.common.schemas import parse_spark_schema from gentropy.common.spark_helpers import ( enforce_schema, + map_column_by_dictionary, order_array_of_structs_by_field, + order_array_of_structs_by_two_fields, ) from gentropy.dataset.variant_index import VariantIndex @@ -27,36 +28,15 @@ class VariantEffectPredictorParser: """Collection of methods to parse VEP output in json format.""" # Schema description of the dbXref object: - DBXREF_SCHEMA = t.ArrayType( - t.StructType( - [ - t.StructField("id", t.StringType(), True), - t.StructField("source", t.StringType(), True), - ] - ) - ) + DBXREF_SCHEMA = VariantIndex.get_schema()["dbXrefs"].dataType # Schema description of the in silico predictor object: - IN_SILICO_PREDICTOR_SCHEMA = t.StructType( - [ - t.StructField("method", t.StringType(), True), - t.StructField("assessment", t.StringType(), True), - t.StructField("score", t.FloatType(), True), - t.StructField("assessmentFlag", t.StringType(), True), - t.StructField("targetId", t.StringType(), True), - ] - ) + IN_SILICO_PREDICTOR_SCHEMA = VariantIndex.get_schema()[ + "inSilicoPredictors" + ].dataType # Schema for the allele frequency column: - ALLELE_FREQUENCY_SCHEMA = t.ArrayType( - t.StructType( - [ - t.StructField("populationName", t.StringType(), True), - t.StructField("alleleFrequency", t.DoubleType(), True), - ] - ), - False, - ) + ALLELE_FREQUENCY_SCHEMA = VariantIndex.get_schema()["alleleFrequencies"].dataType @staticmethod def get_schema() -> t.StructType: @@ -371,12 +351,12 @@ def _get_max_alpha_missense(transcripts: Column) -> Column: ... .select(VariantEffectPredictorParser._get_max_alpha_missense(f.col('transcripts')).alias('am')) ... .show(truncate=False) ... ) - +----------------------------------------------------+ - |am | - +----------------------------------------------------+ - |{max alpha missense, assessment 1, 0.4, null, gene1}| - |{max alpha missense, null, null, null, gene1} | - +----------------------------------------------------+ + +------------------------------------------------------+ + |am | + +------------------------------------------------------+ + |[{max alpha missense, assessment 1, 0.4, null, gene1}]| + |[{max alpha missense, null, null, null, gene1}] | + +------------------------------------------------------+ """ return f.transform( @@ -546,42 +526,6 @@ def _collect_uniprot_accessions(trembl: Column, swissprot: Column) -> Column: lambda x: x.isNotNull(), ) - @staticmethod - def _consequence_to_sequence_ontology( - col: Column, so_dict: Dict[str, str] - ) -> Column: - """Convert VEP consequence terms to sequence ontology identifiers. - - Missing consequence label will be converted to None, unmapped consequences will be mapped as None. - - Args: - col (Column): Column containing VEP consequence terms. - so_dict (Dict[str, str]): Dictionary mapping VEP consequence terms to sequence ontology identifiers. - - Returns: - Column: Column containing sequence ontology identifiers. - - Examples: - >>> data = [('consequence_1',),('unmapped_consequence',),(None,)] - >>> m = {'consequence_1': 'SO:000000'} - >>> ( - ... spark.createDataFrame(data, ['label']) - ... .select('label',VariantEffectPredictorParser._consequence_to_sequence_ontology(f.col('label'),m).alias('id')) - ... .show() - ... ) - +--------------------+---------+ - | label| id| - +--------------------+---------+ - | consequence_1|SO:000000| - |unmapped_consequence| null| - | null| null| - +--------------------+---------+ - - """ - map_expr = f.create_map(*[f.lit(x) for x in chain(*so_dict.items())]) - - return map_expr[col].alias("ancestry") - @staticmethod def _parse_variant_location_id(vep_input_field: Column) -> List[Column]: r"""Parse variant identifier, chromosome, position, reference allele and alternate allele from VEP input field. @@ -622,10 +566,22 @@ def process_vep_output( Returns: DataFrame: processed data in the right shape. """ - # Reading consequence to sequence ontology map: - sequence_ontology_map = json.loads( - pkg_resources.read_text(data, "so_mappings.json", encoding="utf-8") + so_df = pd.read_csv( + pkg_resources.open_text( + data, "variant_consequence_to_score.tsv", encoding="utf-8" + ), + sep="\t", ) + + # Reading consequence to sequence ontology map: + sequence_ontology_map = { + row["label"]: row["variantFunctionalConsequenceId"] + for _, row in so_df.iterrows() + } + + # Reading score dictionary: + score_dictionary = {row["label"]: row["score"] for _, row in so_df.iterrows()} + # Processing VEP output: return ( vep_output @@ -704,9 +660,20 @@ def process_vep_output( ) .alias("inSilicoPredictors"), # Convert consequence to SO: - cls._consequence_to_sequence_ontology( + map_column_by_dictionary( f.col("most_severe_consequence"), sequence_ontology_map ).alias("mostSevereConsequenceId"), + # Extract HGVS identifier: + f.when( + f.size("transcript_consequences") > 0, + f.col("transcript_consequences").getItem(0).getItem("hgvsg"), + ) + .when( + f.size("intergenic_consequences") > 0, + f.col("intergenic_consequences").getItem(0).getItem("hgvsg"), + ) + .otherwise(f.lit(None)) + .alias("hgvsId"), # Collect transcript consequence: f.when( f.col("transcript_consequences").isNotNull(), @@ -716,10 +683,21 @@ def process_vep_output( # Convert consequence terms to SO identifier: f.transform( transcript.consequence_terms, - lambda y: cls._consequence_to_sequence_ontology( + lambda y: map_column_by_dictionary( y, sequence_ontology_map ), ).alias("variantFunctionalConsequenceIds"), + # Convert consequence terms to consequence score: + f.array_max( + f.transform( + transcript.consequence_terms, + lambda term: map_column_by_dictionary( + term, score_dictionary + ), + ) + ) + .cast(t.FloatType()) + .alias("consequenceScore"), # Format amino acid change: cls._parser_amino_acid_change( transcript.amino_acids, transcript.protein_end @@ -733,9 +711,20 @@ def process_vep_output( f.when(transcript.canonical == 1, f.lit(True)) .otherwise(f.lit(False)) .alias("isEnsemblCanonical"), - # Extract other fields as is: + # Extract footprint distance: transcript.codons.alias("codons"), - transcript.distance.alias("distance"), + f.when(transcript.distance.isNotNull(), transcript.distance) + .otherwise(f.lit(0)) + .cast(t.LongType()) + .alias("distanceFromFootprint"), + # Extract distance from the transcription start site: + transcript.tssdistance.cast(t.LongType()).alias( + "distanceFromTss" + ), + # Extracting APPRIS isoform annotation for this transcript: + transcript.appris.alias("appris"), + # Extracting MANE select transcript: + transcript.mane_select.alias("maneSelect"), transcript.gene_id.alias("targetId"), transcript.impact.alias("impact"), transcript.lof.cast(t.StringType()).alias( @@ -756,6 +745,30 @@ def process_vep_output( # Adding empty array for allele frequencies - now this piece of data is not coming form the VEP data: f.array().cast(cls.ALLELE_FREQUENCY_SCHEMA).alias("alleleFrequencies"), ) + # Dropping transcripts where the consequence score or the distance is null: + .withColumn( + "transcriptConsequences", + f.filter( + f.col("transcriptConsequences"), + lambda x: x.getItem("consequenceScore").isNotNull() + & x.getItem("distanceFromFootprint").isNotNull(), + ), + ) + # Sort transcript consequences by consequence score and distance from footprint and add index: + .withColumn( + "transcriptConsequences", + f.when( + f.col("transcriptConsequences").isNotNull(), + f.transform( + order_array_of_structs_by_two_fields( + "transcriptConsequences", + "consequenceScore", + "distanceFromFootprint", + ), + lambda x, i: x.withField("transcriptIndex", i + f.lit(1)), + ), + ), + ) # Adding protvar xref for missense variants: # TODO: making and extendable list of consequences .withColumn( "protvar_xrefs", diff --git a/tests/gentropy/conftest.py b/tests/gentropy/conftest.py index 9ae7ace58..629f3a505 100644 --- a/tests/gentropy/conftest.py +++ b/tests/gentropy/conftest.py @@ -323,12 +323,17 @@ def mock_variant_index(spark: SparkSession) -> VariantIndex: "uniprotAccessions", array(cast(rand() as string)), "isEnsemblCanonical", cast(rand() as boolean), "codons", cast(rand() as string), - "distance", cast(rand() as long), + "distanceFromTss", cast(rand() as long), + "distanceFromFootprint", cast(rand() as long), + "appris", cast(rand() as string), + "maneSelect", cast(rand() as string), "targetId", cast(rand() as string), "impact", cast(rand() as string), "lofteePrediction", cast(rand() as string), "siftPrediction", rand(), "polyphenPrediction", rand(), + "consequenceScore", cast(rand() as float), + "transcriptIndex", cast(rand() as integer), "transcriptId", cast(rand() as string) ) ) diff --git a/tests/gentropy/data_samples/vep_sample.jsonl b/tests/gentropy/data_samples/vep_sample.jsonl index 78a76f150..2a3cb05dc 100644 --- a/tests/gentropy/data_samples/vep_sample.jsonl +++ b/tests/gentropy/data_samples/vep_sample.jsonl @@ -1,2 +1,2 @@ -{"transcript_consequences":[{"gene_id":"ENSG00000168702","swissprot":["Q9NZR2.181"],"uniparc":["UPI00001B045B"],"transcript_id":"ENST00000389484","impact":"MODIFIER","canonical":1,"consequence_terms":["intron_variant"],"strand":-1,"variant_allele":"T"}],"assembly_name":"GRCh38","most_severe_consequence":"intron_variant","strand":1,"seq_region_name":"2","start":140699626,"end":140699625,"input":"2\t140699625\t2_140699625_G_GT\tG\tGT","allele_string":"-/T","id":"2_140699625_G_GT"} -{"transcript_consequences":[{"impact":"MODIFIER","consequence_terms":["upstream_gene_variant"],"distance":682,"trembl":["A0A2U3TZJ1.14"],"cadd_raw":4.846757,"cadd_phred":27.1,"transcript_id":"ENST00000336451","strand":-1,"variant_allele":"T","gene_id":"ENSG00000155906","uniparc":["UPI000D18E792"]},{"lof_info":"INTRON_SIZE:8753","consequence_terms":["splice_donor_variant"],"impact":"HIGH","transcript_id":"ENST00000444024","cadd_phred":27.1,"cadd_raw":4.846757,"strand":-1,"variant_allele":"T","lof":"HC","canonical":1,"uniprot_isoform":["Q9NWS8-1"],"uniparc":["UPI00001AEAE1"],"swissprot":["Q9NWS8.145"],"gene_id":"ENSG00000155906"},{"gene_id":"ENSG00000155906","uniparc":["UPI000006DC2F"],"swissprot":["Q9NWS8.145"],"uniprot_isoform":["Q9NWS8-3"],"lof":"HC","variant_allele":"T","strand":-1,"cadd_phred":27.1,"transcript_id":"ENST00000491268","cadd_raw":4.846757,"consequence_terms":["splice_donor_variant"],"impact":"HIGH","lof_info":"INTRON_SIZE:8753"},{"variant_allele":"T","strand":-1,"uniparc":["UPI0002A12044"],"gene_id":"ENSG00000155906","consequence_terms":["intron_variant"],"impact":"MODIFIER","transcript_id":"ENST00000622845","cadd_phred":27.1,"cadd_raw":4.846757,"trembl":["A0A087WXU0.51"]},{"transcript_id":"ENST00000643564","cadd_phred":27.1,"cadd_raw":4.846757,"consequence_terms":["non_coding_transcript_exon_variant"],"impact":"MODIFIER","gene_id":"ENSG00000155906","cdna_start":578,"cdna_end":578,"strand":-1,"variant_allele":"T"},{"transcript_id":"ENST00000644054","cadd_phred":27.1,"trembl":["A0A2R8YFC3.14"],"cadd_raw":4.846757,"consequence_terms":["splice_donor_variant","NMD_transcript_variant"],"impact":"HIGH","uniparc":["UPI001B8998C5"],"gene_id":"ENSG00000155906","strand":-1,"variant_allele":"T"},{"transcript_id":"ENST00000644711","cadd_phred":27.1,"cadd_raw":4.846757,"trembl":["A0A2R8Y4J4.16"],"consequence_terms":["splice_donor_variant","NMD_transcript_variant"],"impact":"HIGH","uniparc":["UPI000D1907AF"],"gene_id":"ENSG00000155906","variant_allele":"T","strand":-1},{"gene_id":"ENSG00000155906","transcript_id":"ENST00000645367","cadd_phred":27.1,"cadd_raw":4.846757,"consequence_terms":["splice_donor_variant","non_coding_transcript_variant"],"impact":"HIGH","strand":-1,"variant_allele":"T"},{"cadd_phred":27.1,"transcript_id":"ENST00000645895","cadd_raw":4.846757,"gene_id":"ENSG00000155906","variant_allele":"T","strand":-1,"consequence_terms":["splice_donor_variant","non_coding_transcript_variant"],"impact":"HIGH"},{"gene_id":"ENSG00000155906","cdna_end":724,"cdna_start":724,"variant_allele":"T","strand":-1,"cadd_phred":27.1,"transcript_id":"ENST00000645917","cadd_raw":4.846757,"consequence_terms":["non_coding_transcript_exon_variant"],"impact":"MODIFIER"},{"impact":"HIGH","consequence_terms":["splice_donor_variant","NMD_transcript_variant"],"cadd_raw":4.846757,"trembl":["A0A2R8Y4P5.16"],"cadd_phred":27.1,"transcript_id":"ENST00000646926","variant_allele":"T","strand":-1,"uniparc":["UPI001B89CA49"],"gene_id":"ENSG00000155906"},{"transcript_id":"ENST00000682004","cadd_phred":27.1,"cadd_raw":4.846757,"gene_id":"ENSG00000155906","variant_allele":"T","strand":-1,"consequence_terms":["splice_donor_variant","non_coding_transcript_variant"],"impact":"HIGH"},{"gene_id":"ENSG00000155906","uniparc":["UPI001B88E93C"],"lof":"HC","strand":-1,"variant_allele":"T","trembl":["A0A804HHY2.7"],"cadd_raw":4.846757,"transcript_id":"ENST00000682299","cadd_phred":27.1,"impact":"HIGH","consequence_terms":["splice_donor_variant"],"lof_info":"INTRON_SIZE:8753"},{"uniparc":["UPI001B8920B1"],"gene_id":"ENSG00000155906","strand":-1,"variant_allele":"T","trembl":["A0A804HLE1.7"],"cadd_raw":4.846757,"cadd_phred":27.1,"transcript_id":"ENST00000682392","impact":"HIGH","consequence_terms":["splice_donor_variant","NMD_transcript_variant"]},{"cadd_raw":4.846757,"trembl":["A0A804HHW6.7"],"cadd_phred":27.1,"transcript_id":"ENST00000682641","impact":"HIGH","consequence_terms":["splice_donor_variant"],"lof_info":"INTRON_SIZE:8753","gene_id":"ENSG00000155906","uniparc":["UPI001B893F2B"],"lof":"HC","variant_allele":"T","strand":-1},{"gene_id":"ENSG00000155906","transcript_id":"ENST00000682760","cadd_phred":27.1,"cadd_raw":4.846757,"consequence_terms":["splice_donor_variant","non_coding_transcript_variant"],"impact":"HIGH","strand":-1,"variant_allele":"T"},{"variant_allele":"T","strand":-1,"impact":"HIGH","consequence_terms":["splice_donor_variant","non_coding_transcript_variant"],"cadd_raw":4.846757,"cadd_phred":27.1,"transcript_id":"ENST00000683439","gene_id":"ENSG00000155906"},{"gene_id":"ENSG00000155906","swissprot":["Q9NWS8.145"],"uniparc":["UPI00001AEAE1"],"lof":"HC","uniprot_isoform":["Q9NWS8-1"],"strand":-1,"variant_allele":"T","cadd_raw":4.846757,"transcript_id":"ENST00000683724","cadd_phred":27.1,"impact":"HIGH","consequence_terms":["splice_donor_variant"],"lof_info":"INTRON_SIZE:8753"},{"gene_id":"ENSG00000155906","transcript_id":"ENST00000683740","cadd_phred":27.1,"cadd_raw":4.846757,"consequence_terms":["splice_donor_variant","non_coding_transcript_variant"],"impact":"HIGH","variant_allele":"T","strand":-1},{"cadd_phred":27.1,"transcript_id":"ENST00000684301","cadd_raw":4.846757,"consequence_terms":["splice_donor_variant","NMD_transcript_variant"],"impact":"HIGH","gene_id":"ENSG00000155906","uniparc":["UPI000006DC2F"],"swissprot":["Q9NWS8.145"],"uniprot_isoform":["Q9NWS8-3"],"variant_allele":"T","strand":-1},{"gene_id":"ENSG00000155906","cadd_raw":4.846757,"cadd_phred":27.1,"transcript_id":"ENST00000684658","impact":"HIGH","consequence_terms":["splice_donor_variant","non_coding_transcript_variant"],"strand":-1,"variant_allele":"T"},{"consequence_terms":["splice_donor_variant","non_coding_transcript_variant"],"impact":"HIGH","variant_allele":"T","strand":-1,"gene_id":"ENSG00000155906","cadd_phred":27.1,"transcript_id":"ENST00000684715","cadd_raw":4.846757},{"impact":"HIGH","consequence_terms":["splice_donor_variant","NMD_transcript_variant"],"trembl":["A0A804HKF8.7"],"cadd_raw":4.846757,"cadd_phred":27.1,"transcript_id":"ENST00000684765","strand":-1,"variant_allele":"T","uniparc":["UPI001B89CAE3"],"gene_id":"ENSG00000155906"}],"seq_region_name":"6","assembly_name":"GRCh38","end":151445307,"strand":1,"most_severe_consequence":"splice_donor_variant","start":151445307,"id":"6_151445307_C_T","colocated_variants":[{"clin_sig_allele":"T:pathogenic;T:likely_pathogenic","end":151445307,"var_synonyms":{"ClinVar":["RCV000032983","VCV000039764","RCV001814019"],"OMIM":[614917.0001]},"strand":1,"start":151445307,"allele_string":"C/T","clin_sig":["likely_pathogenic","pathogenic"],"seq_region_name":"6","pubmed":[23022099,18835491],"phenotype_or_disease":1,"id":"rs1562800908"}],"input":"6\t151445307\t6_151445307_C_T\tC\tT","allele_string":"C/T"} +{"most_severe_consequence":"missense_variant","input":"17\t29510931\trs2153029597\tT\tC","assembly_name":"GRCh38","transcript_consequences":[{"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000238007","hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"strand":1,"canonical":1,"impact":"MODIFIER","tssdistance":498066,"variant_allele":"C","cadd_raw":5.156509,"transcript_id":"ENST00000436028","distance":494419},{"hgvsg":"17:g.29510931T>C","consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000222363","strand":1,"impact":"MODIFIER","canonical":1,"cadd_phred":28.9,"variant_allele":"C","tssdistance":122371,"transcript_id":"ENST00000410431","distance":122248,"cadd_raw":5.156509},{"cadd_raw":5.156509,"transcript_id":"ENST00000581240","distance":128696,"tssdistance":128696,"variant_allele":"C","cadd_phred":28.9,"strand":1,"impact":"MODIFIER","canonical":1,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000263370","hgvsg":"17:g.29510931T>C"},{"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000264007","hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"strand":-1,"impact":"MODIFIER","canonical":1,"tssdistance":111323,"variant_allele":"C","cadd_raw":5.156509,"transcript_id":"ENST00000582367","distance":110686},{"cadd_raw":5.156509,"distance":49616,"transcript_id":"ENST00000307201","appris":"P1","tssdistance":56106,"uniparc":["UPI00001C1FC9"],"swissprot":["Q6UXT9.120"],"variant_allele":"C","cadd_phred":28.9,"mane_select":"NM_198147.3","impact":"MODIFIER","canonical":1,"strand":-1,"gene_id":"ENSG00000168792","consequence_terms":["downstream_gene_variant"],"hgvsg":"17:g.29510931T>C"},{"strand":1,"impact":"MODIFIER","canonical":1,"cadd_phred":28.9,"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000264290","transcript_id":"ENST00000579050","distance":58649,"cadd_raw":5.156509,"variant_allele":"C","tssdistance":58649},{"strand":1,"canonical":1,"impact":"MODIFIER","cadd_phred":28.9,"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000263657","transcript_id":"ENST00000577846","distance":250172,"cadd_raw":5.156509,"variant_allele":"C","tssdistance":250172},{"variant_allele":"C","swissprot":["Q6QEF8.143"],"uniparc":["UPI0000DA4C55"],"tssdistance":111981,"transcript_id":"ENST00000388767","distance":103831,"cadd_raw":5.156509,"uniprot_isoform":["Q6QEF8-5"],"hgvsg":"17:g.29510931T>C","consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000167549","strand":-1,"canonical":1,"impact":"MODIFIER","mane_select":"NM_032854.4","cadd_phred":28.9},{"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000263781","strand":-1,"impact":"MODIFIER","canonical":1,"cadd_phred":28.9,"variant_allele":"C","tssdistance":489291,"transcript_id":"ENST00000580924","distance":489291,"cadd_raw":5.156509},{"strand":1,"impact":"MODIFIER","canonical":1,"mane_select":"NM_198529.4","cadd_phred":28.9,"uniprot_isoform":["A4FU69-1"],"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000176927","appris":"P1","transcript_id":"ENST00000394835","distance":430703,"cadd_raw":5.156509,"variant_allele":"C","swissprot":["A4FU69.119"],"uniparc":["UPI0000E59EF5"],"tssdistance":430703},{"cdna_start":2399,"tssdistance":120568,"amino_acids":"L/P","swissprot":["Q7L7X3.173"],"transcript_id":"ENST00000261716","appris":"P1","consequence_terms":["missense_variant"],"trembl":["A0A024QZ70.65"],"cds_start":1643,"mane_select":"NM_020791.4","cadd_phred":28.9,"strand":1,"cds_end":1643,"impact":"MODERATE","canonical":1,"uniparc":["UPI000004A033"],"variant_allele":"C","cadd_raw":5.156509,"sift_score":0,"protein_start":548,"cdna_end":2399,"gene_id":"ENSG00000160551","uniprot_isoform":["Q7L7X3-1"],"codons":"cTg/cCg","sift_prediction":"deleterious_low_confidence","hgvsg":"17:g.29510931T>C","alphamissense":{"am_class":"likely_pathogenic","am_pathogenicity":0.9994},"protein_end":548,"polyphen_score":0.999,"polyphen_prediction":"probably_damaging"},{"uniparc":["UPI0000246D82"],"tssdistance":82201,"variant_allele":"C","swissprot":["Q86YJ7.146"],"cadd_raw":5.156509,"transcript_id":"ENST00000394859","appris":"P1","distance":82201,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000198720","uniprot_isoform":["Q86YJ7-1"],"hgvsg":"17:g.29510931T>C","trembl":["A0A024QZ29.60"],"mane_select":"NM_152345.5","cadd_phred":28.9,"strand":1,"impact":"MODIFIER","canonical":1},{"transcript_id":"ENST00000459235","distance":372075,"cadd_raw":5.156509,"variant_allele":"C","tssdistance":372075,"strand":1,"impact":"MODIFIER","canonical":1,"cadd_phred":28.9,"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000239129"},{"tssdistance":205754,"variant_allele":"C","cadd_raw":5.156509,"distance":205348,"transcript_id":"ENST00000493028","gene_id":"ENSG00000240531","consequence_terms":["downstream_gene_variant"],"hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"impact":"MODIFIER","canonical":1,"strand":-1},{"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000284162","hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"strand":1,"canonical":1,"impact":"MODIFIER","tssdistance":120269,"variant_allele":"C","cadd_raw":5.156509,"transcript_id":"ENST00000580425","distance":120201},{"tssdistance":49616,"variant_allele":"C","cadd_raw":5.156509,"transcript_id":"ENST00000581474","distance":49616,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000264031","hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"strand":1,"canonical":1,"impact":"MODIFIER"},{"hgvsg":"17:g.29510931T>C","gene_id":"ENSG00000222858","consequence_terms":["downstream_gene_variant"],"impact":"MODIFIER","canonical":1,"strand":-1,"cadd_phred":28.9,"variant_allele":"C","tssdistance":130771,"distance":130680,"transcript_id":"ENST00000410926","cadd_raw":5.156509},{"tssdistance":461583,"variant_allele":"C","cadd_raw":5.156509,"transcript_id":"ENST00000581995","distance":461583,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000264435","hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"strand":1,"canonical":1,"impact":"MODIFIER"},{"gene_id":"ENSG00000179761","consequence_terms":["downstream_gene_variant"],"hgvsg":"17:g.29510931T>C","mane_select":"NM_016518.3","cadd_phred":28.9,"canonical":1,"impact":"MODIFIER","strand":1,"tssdistance":467790,"uniparc":["UPI00001410B0"],"swissprot":["Q9P0Z9.165"],"variant_allele":"C","cadd_raw":5.156509,"distance":453715,"appris":"P1","transcript_id":"ENST00000323372"},{"strand":-1,"canonical":1,"impact":"MODIFIER","cadd_phred":28.9,"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000253064","transcript_id":"ENST00000517255","distance":86064,"cadd_raw":5.156509,"variant_allele":"C","tssdistance":86064},{"tssdistance":313146,"variant_allele":"C","cadd_raw":5.156509,"distance":313146,"transcript_id":"ENST00000580309","gene_id":"ENSG00000264050","consequence_terms":["upstream_gene_variant"],"hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"canonical":1,"impact":"MODIFIER","strand":-1},{"variant_allele":"C","tssdistance":134916,"transcript_id":"ENST00000582881","distance":133865,"cadd_raw":5.156509,"hgvsg":"17:g.29510931T>C","consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000265625","strand":-1,"impact":"MODIFIER","canonical":1,"cadd_phred":28.9},{"hgvsg":"17:g.29510931T>C","gene_id":"ENSG00000240074","consequence_terms":["upstream_gene_variant"],"impact":"MODIFIER","canonical":1,"strand":1,"cadd_phred":28.9,"variant_allele":"C","tssdistance":344828,"distance":344828,"transcript_id":"ENST00000478775","cadd_raw":5.156509},{"cadd_phred":28.9,"canonical":1,"impact":"MODIFIER","strand":-1,"gene_id":"ENSG00000264647","consequence_terms":["downstream_gene_variant"],"hgvsg":"17:g.29510931T>C","cadd_raw":5.156509,"distance":80772,"transcript_id":"ENST00000584986","tssdistance":81310,"variant_allele":"C"},{"variant_allele":"C","tssdistance":65531,"transcript_id":"ENST00000365335","distance":65531,"cadd_raw":5.156509,"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000202205","strand":-1,"canonical":1,"impact":"MODIFIER","cadd_phred":28.9},{"gene_id":"ENSG00000266111","consequence_terms":["downstream_gene_variant"],"hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"impact":"MODIFIER","canonical":1,"strand":1,"tssdistance":158862,"variant_allele":"C","cadd_raw":5.156509,"distance":106826,"transcript_id":"ENST00000584958"},{"variant_allele":"C","tssdistance":265770,"distance":264816,"transcript_id":"ENST00000580031","cadd_raw":5.156509,"hgvsg":"17:g.29510931T>C","gene_id":"ENSG00000265713","consequence_terms":["downstream_gene_variant"],"canonical":1,"impact":"MODIFIER","strand":-1,"cadd_phred":28.9},{"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000108255","uniprot_isoform":["P05813-1"],"hgvsg":"17:g.29510931T>C","mane_select":"NM_005208.5","cadd_phred":28.9,"strand":1,"impact":"MODIFIER","canonical":1,"uniparc":["UPI00001283CF"],"tssdistance":264072,"variant_allele":"C","swissprot":["P05813.205"],"cadd_raw":5.156509,"transcript_id":"ENST00000225387","appris":"P1","distance":256437},{"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000264808","hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"strand":-1,"canonical":1,"impact":"MODIFIER","tssdistance":120702,"variant_allele":"C","cadd_raw":5.156509,"transcript_id":"ENST00000685798","distance":120702},{"cadd_phred":28.9,"strand":-1,"canonical":1,"impact":"MODIFIER","consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000290082","hgvsg":"17:g.29510931T>C","cadd_raw":5.156509,"transcript_id":"ENST00000702873","distance":56892,"tssdistance":57602,"variant_allele":"C"},{"mane_select":"NM_078471.4","cadd_phred":28.9,"strand":-1,"canonical":1,"impact":"MODIFIER","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000196535","uniprot_isoform":["Q92614-1"],"hgvsg":"17:g.29510931T>C","cadd_raw":5.156509,"appris":"P4","transcript_id":"ENST00000527372","distance":330533,"uniparc":["UPI0000167F32"],"tssdistance":330533,"variant_allele":"C","swissprot":["Q92614.216"]},{"cadd_raw":5.156509,"transcript_id":"ENST00000492004","distance":170109,"tssdistance":170109,"variant_allele":"C","cadd_phred":28.9,"strand":-1,"impact":"MODIFIER","canonical":1,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000239256","hgvsg":"17:g.29510931T>C"},{"gene_id":"ENSG00000263709","consequence_terms":["downstream_gene_variant"],"hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"canonical":1,"impact":"MODIFIER","strand":1,"tssdistance":370448,"variant_allele":"C","cadd_raw":5.156509,"distance":355442,"transcript_id":"ENST00000582196"},{"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000252657","hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"strand":1,"impact":"MODIFIER","canonical":1,"tssdistance":266688,"variant_allele":"C","cadd_raw":5.156509,"transcript_id":"ENST00000516848","distance":266688},{"trembl":["F5H527.88"],"mane_select":"NM_001282129.2","cadd_phred":28.9,"strand":-1,"impact":"MODIFIER","canonical":1,"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000141298","hgvsg":"17:g.29510931T>C","cadd_raw":5.156509,"transcript_id":"ENST00000540801","appris":"A1","distance":115007,"uniparc":["UPI0002065A97"],"tssdistance":419297,"variant_allele":"C"},{"tssdistance":216783,"uniparc":["UPI00001B078D"],"swissprot":["Q7Z417.159"],"variant_allele":"C","cadd_raw":5.156509,"distance":216783,"transcript_id":"ENST00000225388","appris":"P1","gene_id":"ENSG00000108256","consequence_terms":["upstream_gene_variant"],"hgvsg":"17:g.29510931T>C","uniprot_isoform":["Q7Z417-1"],"mane_select":"NM_020772.3","cadd_phred":28.9,"impact":"MODIFIER","canonical":1,"strand":-1},{"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000108262","uniprot_isoform":["Q9Y2X7-1"],"hgvsg":"17:g.29510931T>C","mane_select":"NM_014030.4","cadd_phred":28.9,"strand":-1,"impact":"MODIFIER","canonical":1,"uniparc":["UPI000013C867"],"tssdistance":78717,"variant_allele":"C","swissprot":["Q9Y2X7.219"],"cadd_raw":5.156509,"transcript_id":"ENST00000225394","appris":"A1","distance":62544},{"variant_allele":"C","tssdistance":499922,"transcript_id":"ENST00000581964","distance":499922,"cadd_raw":5.156509,"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000263613","strand":-1,"impact":"MODIFIER","canonical":1,"cadd_phred":28.9},{"variant_allele":"C","tssdistance":306457,"transcript_id":"ENST00000580812","distance":306457,"cadd_raw":5.156509,"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000178082","strand":-1,"impact":"MODIFIER","canonical":1,"cadd_phred":28.9},{"variant_allele":"C","tssdistance":352471,"distance":352471,"transcript_id":"ENST00000584258","cadd_raw":5.156509,"hgvsg":"17:g.29510931T>C","gene_id":"ENSG00000263477","consequence_terms":["upstream_gene_variant"],"canonical":1,"impact":"MODIFIER","strand":1,"cadd_phred":28.9},{"cadd_raw":5.156509,"transcript_id":"ENST00000301057","appris":"P1","distance":57770,"uniparc":["UPI000003B08D"],"tssdistance":57770,"variant_allele":"C","swissprot":["Q8NBR0.130"],"mane_select":"NM_138349.4","cadd_phred":28.9,"strand":1,"impact":"MODIFIER","canonical":1,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000167543","hgvsg":"17:g.29510931T>C"}],"allele_string":"T/C","seq_region_name":"17","strand":1,"end":29510931,"start":29510931,"colocated_variants":[{"clin_sig":["pathogenic"],"clin_sig_allele":"C:pathogenic","phenotype_or_disease":1,"strand":1,"allele_string":"T/C","start":29510931,"id":"rs2153029597","seq_region_name":"17","pubmed":[33565190],"end":29510931,"var_synonyms":{"ClinVar":["RCV001731168","VCV001300172"],"OMIM":[610266.0003]}}],"id":"rs2153029597"} +{"strand":1,"seq_region_name":"9","allele_string":"C/T","transcript_consequences":[{"hgvsg":"9:g.82445881C>T","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000228046","strand":-1,"impact":"MODIFIER","canonical":1,"cadd_phred":7.002,"variant_allele":"T","tssdistance":17856,"transcript_id":"ENST00000392516","distance":17856,"cadd_raw":0.6583},{"cadd_phred":7.002,"impact":"MODIFIER","canonical":1,"strand":1,"gene_id":"ENSG00000225085","consequence_terms":["downstream_gene_variant"],"hgvsg":"9:g.82445881C>T","cadd_raw":0.6583,"distance":39642,"transcript_id":"ENST00000436084","tssdistance":40693,"variant_allele":"T"},{"cadd_raw":0.6583,"transcript_id":"ENST00000637606","tssdistance":468267,"variant_allele":"T","cadd_phred":7.002,"canonical":1,"impact":"MODIFIER","strand":1,"gene_id":"ENSG00000290551","consequence_terms":["intron_variant","non_coding_transcript_variant"],"hgvsg":"9:g.82445881C>T"},{"gene_id":"ENSG00000278988","consequence_terms":["upstream_gene_variant"],"hgvsg":"9:g.82445881C>T","cadd_phred":7.002,"canonical":1,"impact":"MODIFIER","strand":1,"tssdistance":97837,"variant_allele":"T","cadd_raw":0.6583,"distance":97837,"transcript_id":"ENST00000623079"},{"swissprot":["Q6ZQQ2.115"],"variant_allele":"T","tssdistance":457109,"uniparc":["UPI00001C10A6"],"distance":450628,"appris":"P1","transcript_id":"ENST00000344803","cadd_raw":0.6583,"hgvsg":"9:g.82445881C>T","gene_id":"ENSG00000214929","consequence_terms":["downstream_gene_variant"],"impact":"MODIFIER","canonical":1,"strand":1,"cadd_phred":7.002,"mane_select":"NM_001001670.3"},{"cadd_phred":7.002,"impact":"MODIFIER","canonical":1,"strand":-1,"gene_id":"ENSG00000230360","consequence_terms":["upstream_gene_variant"],"hgvsg":"9:g.82445881C>T","cadd_raw":0.6583,"distance":357113,"transcript_id":"ENST00000417796","tssdistance":357113,"variant_allele":"T"},{"cadd_raw":0.6583,"transcript_id":"ENST00000422010","distance":5775,"tssdistance":5775,"variant_allele":"T","cadd_phred":7.002,"strand":1,"canonical":1,"impact":"MODIFIER","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000232749","hgvsg":"9:g.82445881C>T"},{"tssdistance":12976,"variant_allele":"T","cadd_raw":0.6583,"transcript_id":"ENST00000438986","distance":12976,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000228123","hgvsg":"9:g.82445881C>T","cadd_phred":7.002,"strand":-1,"canonical":1,"impact":"MODIFIER"},{"tssdistance":382199,"variant_allele":"T","cadd_raw":0.6583,"transcript_id":"ENST00000434692","distance":382199,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000231649","hgvsg":"9:g.82445881C>T","cadd_phred":7.002,"strand":-1,"canonical":1,"impact":"MODIFIER"},{"tssdistance":298911,"variant_allele":"T","cadd_raw":0.6583,"transcript_id":"ENST00000432491","distance":298186,"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000233309","hgvsg":"9:g.82445881C>T","cadd_phred":7.002,"strand":-1,"canonical":1,"impact":"MODIFIER"},{"canonical":1,"impact":"MODIFIER","strand":1,"cadd_phred":7.002,"hgvsg":"9:g.82445881C>T","gene_id":"ENSG00000235377","consequence_terms":["downstream_gene_variant"],"distance":314249,"transcript_id":"ENST00000445918","cadd_raw":0.6583,"variant_allele":"T","tssdistance":315129},{"variant_allele":"T","tssdistance":17682,"distance":17588,"transcript_id":"ENST00000636401","cadd_raw":0.6583,"hgvsg":"9:g.82445881C>T","gene_id":"ENSG00000228430","consequence_terms":["downstream_gene_variant"],"impact":"MODIFIER","canonical":1,"strand":1,"cadd_phred":7.002},{"cadd_raw":0.6583,"transcript_id":"ENST00000585776","distance":468792,"tssdistance":468792,"variant_allele":"T","cadd_phred":7.002,"strand":-1,"impact":"MODIFIER","canonical":1,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000267559","hgvsg":"9:g.82445881C>T"},{"impact":"MODIFIER","canonical":1,"strand":1,"cadd_phred":7.002,"hgvsg":"9:g.82445881C>T","gene_id":"ENSG00000237770","consequence_terms":["downstream_gene_variant"],"distance":473751,"transcript_id":"ENST00000429999","cadd_raw":0.6583,"variant_allele":"T","tssdistance":479317},{"distance":166671,"transcript_id":"ENST00000661177","cadd_raw":0.6583,"variant_allele":"T","tssdistance":200703,"impact":"MODIFIER","canonical":1,"strand":-1,"cadd_phred":7.002,"hgvsg":"9:g.82445881C>T","gene_id":"ENSG00000286612","consequence_terms":["downstream_gene_variant"]},{"mane_select":"NM_207416.3","cadd_phred":7.002,"impact":"MODIFIER","canonical":1,"strand":1,"gene_id":"ENSG00000186788","consequence_terms":["downstream_gene_variant"],"hgvsg":"9:g.82445881C>T","cadd_raw":0.6583,"distance":495788,"appris":"P1","transcript_id":"ENST00000445385","tssdistance":502381,"uniparc":["UPI000048D678"],"swissprot":["P0C874.81"],"variant_allele":"T"}],"assembly_name":"GRCh38","input":"9\t82445881\t9_82445881_C_T\tC\tT","most_severe_consequence":"intron_variant","id":"9_82445881_C_T","colocated_variants":[{"phenotype_or_disease":1,"strand":1,"allele_string":"C/G/T","frequencies":{"T":{"gnomadg":0.01197,"gnomadg_amr":0.0191,"gnomadg_afr":0.003331,"gnomadg_asj":0.02364,"eas":0,"amr":0.0216,"gnomadg_eas":0,"sas":0,"gnomadg_nfe":0.01704,"gnomadg_fin":0.009992,"gnomadg_mid":0.006329,"afr":0,"gnomadg_oth":0.01772,"gnomadg_ami":0.003289,"af":0.0068,"eur":0.0189,"gnomadg_sas":0.0004142}},"start":82445881,"id":"rs117517710","seq_region_name":"9","pubmed":[31073882],"end":82445881}],"end":82445881,"start":82445881} diff --git a/tests/gentropy/datasource/ensembl/test_vep_variants.py b/tests/gentropy/datasource/ensembl/test_vep_variants.py index e3313a1e5..97f255cf0 100644 --- a/tests/gentropy/datasource/ensembl/test_vep_variants.py +++ b/tests/gentropy/datasource/ensembl/test_vep_variants.py @@ -104,7 +104,7 @@ def _setup(self: TestVEPParser, spark: SparkSession) -> None: schema=VariantEffectPredictorParser.get_schema(), ) self.processed_vep_output = VariantEffectPredictorParser.process_vep_output( - self.raw_vep_output + self.raw_vep_output, 200 ) def test_extract_variant_index_from_vep( From 3ea47a954b98d6dd0558670b3f4e82645016ad09 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Tue, 3 Sep 2024 20:54:01 +0200 Subject: [PATCH 022/188] fix: remove finngen prefix from credible set (#746) --- src/gentropy/config.py | 1 - src/gentropy/datasource/finngen/finemapping.py | 10 ++-------- src/gentropy/datasource/finngen/study_index.py | 2 +- src/gentropy/finngen_finemapping_ingestion.py | 3 --- .../datasource/finngen/test_finngen_finemapping.py | 1 - 5 files changed, 3 insertions(+), 14 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index ed5fe4c81..9089dbecf 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -154,7 +154,6 @@ class FinngenFinemappingConfig(StepConfig): finngen_susie_finemapping_cs_summary_files: str = ( "gs://finngen-public-data-r11/finemap/summary/*.cred.summary.tsv" ) - finngen_release_prefix: str = "FINNGEN_R11_" finngen_finemapping_out: str = MISSING _target_: str = ( "gentropy.finngen_finemapping_ingestion.FinnGenFinemappingIngestionStep" diff --git a/src/gentropy/datasource/finngen/finemapping.py b/src/gentropy/datasource/finngen/finemapping.py index 340b1d3c9..cbdd01fdc 100644 --- a/src/gentropy/datasource/finngen/finemapping.py +++ b/src/gentropy/datasource/finngen/finemapping.py @@ -206,7 +206,6 @@ def from_finngen_susie_finemapping( spark: SparkSession, finngen_susie_finemapping_snp_files: (str | list[str]), finngen_susie_finemapping_cs_summary_files: (str | list[str]), - finngen_release_prefix: str, credset_lbf_threshold: float = 0.8685889638065036, ) -> StudyLocus: """Process the SuSIE finemapping output for FinnGen studies. @@ -262,7 +261,6 @@ def from_finngen_susie_finemapping( spark (SparkSession): SparkSession object. finngen_susie_finemapping_snp_files (str | list[str]): SuSIE finemapping output filename(s). finngen_susie_finemapping_cs_summary_files (str | list[str]): filename of SuSIE finemapping credible set summaries. - finngen_release_prefix (str): FinnGen study prefix. credset_lbf_threshold (float, optional): Filter out credible sets below, Default 0.8685889638065036 == np.log10(np.exp(2)), this is threshold from publication. Returns: @@ -297,9 +295,7 @@ def from_finngen_susie_finemapping( .filter(f.col("cs").cast(t.IntegerType()) > 0) .select( # Add study idenfitier. - f.concat(f.lit(finngen_release_prefix), f.col("trait")) - .cast(t.StringType()) - .alias("studyId"), + f.col("trait").cast(t.StringType()).alias("studyId"), f.col("region"), # Add variant information. f.regexp_replace(f.col("v"), ":", "_").alias("variantId"), @@ -412,9 +408,7 @@ def from_finngen_susie_finemapping( (f.col("credibleSetlog10BF") > credset_lbf_threshold) | (f.col("credibleSetIndex") == 1) ) - .withColumn( - "studyId", f.concat(f.lit(finngen_release_prefix), f.col("trait")) - ) + .withColumn("studyId", f.col("trait")) ) processed_finngen_finemapping_df = processed_finngen_finemapping_df.join( diff --git a/src/gentropy/datasource/finngen/study_index.py b/src/gentropy/datasource/finngen/study_index.py index 71dce10d4..210c4330c 100644 --- a/src/gentropy/datasource/finngen/study_index.py +++ b/src/gentropy/datasource/finngen/study_index.py @@ -55,7 +55,7 @@ def join_efo_mapping( ) if not finngen_release_prefix_match: raise ValueError( - f"Invalid FinnGen release prefix: {finngen_release_prefix}, use the format FINNGEN_R*_" + f"Invalid FinnGen release prefix: {finngen_release_prefix}, use the format FINNGEN_R*" ) finngen_release = finngen_release_prefix_match.group("release").upper() diff --git a/src/gentropy/finngen_finemapping_ingestion.py b/src/gentropy/finngen_finemapping_ingestion.py index e85508023..80089cf68 100644 --- a/src/gentropy/finngen_finemapping_ingestion.py +++ b/src/gentropy/finngen_finemapping_ingestion.py @@ -20,7 +20,6 @@ def __init__( finngen_finemapping_out: str, finngen_susie_finemapping_snp_files: str = FinngenFinemappingConfig().finngen_susie_finemapping_snp_files, finngen_susie_finemapping_cs_summary_files: str = FinngenFinemappingConfig().finngen_susie_finemapping_cs_summary_files, - finngen_release_prefix: str = FinngenFinemappingConfig().finngen_release_prefix, ) -> None: """Run FinnGen finemapping ingestion step. @@ -29,7 +28,6 @@ def __init__( finngen_finemapping_out (str): Output path for the finemapping results in StudyLocus format. finngen_susie_finemapping_snp_files(str): Path to the FinnGen SuSIE finemapping results. finngen_susie_finemapping_cs_summary_files (str): FinnGen SuSIE summaries for CS filters(LBF>2). - finngen_release_prefix (str): Release prefix for FinnGen. """ # Read finemapping outputs from the input paths. @@ -37,7 +35,6 @@ def __init__( spark=session.spark, finngen_susie_finemapping_snp_files=finngen_susie_finemapping_snp_files, finngen_susie_finemapping_cs_summary_files=finngen_susie_finemapping_cs_summary_files, - finngen_release_prefix=finngen_release_prefix, ) # Write the output. diff --git a/tests/gentropy/datasource/finngen/test_finngen_finemapping.py b/tests/gentropy/datasource/finngen/test_finngen_finemapping.py index a7ffa4bc2..ed0b68643 100644 --- a/tests/gentropy/datasource/finngen/test_finngen_finemapping.py +++ b/tests/gentropy/datasource/finngen/test_finngen_finemapping.py @@ -40,7 +40,6 @@ def test_finngen_finemapping_from_finngen_susie_finemapping( spark=spark, finngen_susie_finemapping_snp_files=finngen_susie_finemapping_snp_files, finngen_susie_finemapping_cs_summary_files=finngen_susie_finemapping_cs_summary_files, - finngen_release_prefix="FINNGEN_R11", ), StudyLocus, ) From 12ff35ba10547a3deefea8b6f4db5b81669cc57e Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Wed, 4 Sep 2024 11:53:58 +0200 Subject: [PATCH 023/188] fix(finngen_r11): preserve all studyIds (#747) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(finngen_r11): preserve all studyIds Preserve all studyIds, even if EFO mapping is missing, so mapping between studyIndex and StudyLocus is by studyId column is accurate. * fix: typo in docstring Co-authored-by: Irene López Santiago <45119610+ireneisdoomed@users.noreply.github.com> --------- Co-authored-by: Szymon Szyszkowski Co-authored-by: Irene López Santiago <45119610+ireneisdoomed@users.noreply.github.com> --- src/gentropy/datasource/finngen/study_index.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/gentropy/datasource/finngen/study_index.py b/src/gentropy/datasource/finngen/study_index.py index 210c4330c..1e2e71f72 100644 --- a/src/gentropy/datasource/finngen/study_index.py +++ b/src/gentropy/datasource/finngen/study_index.py @@ -38,6 +38,10 @@ def join_efo_mapping( All studies without EFO traits are dropped. The EFO mappings are then aggregated into lists per studyId. + NOTE: preserve all studyId entries even if they don't have EFO mappings. + This is to avoid discrepancies between `study_index` and `credible_set` `studyId` column. + The rows with missing EFO mappings will be dropped in the study_index validation step. + Args: study_index (StudyIndex): Study index table. efo_curation_mapping (DataFrame): Dataframe with EFO mappings. @@ -70,8 +74,10 @@ def join_efo_mapping( f.col("PROPERTY_VALUE").alias("traitFromSource"), ) ) - # NOTE: inner join to keep only the studies with EFO mappings - si_df = study_index.df.join(efo_mappings, on="traitFromSource", how="inner") + + si_df = study_index.df.join( + efo_mappings, on="traitFromSource", how="left_outer" + ) common_cols = [c for c in si_df.columns if c != "traitFromSourceMappedId"] si_df = si_df.groupby(common_cols).agg( f.collect_list("traitFromSourceMappedId").alias("traitFromSourceMappedIds") From 15a058ee470942574c2c274821cfb28515340242 Mon Sep 17 00:00:00 2001 From: Daniel-Considine <113430683+Daniel-Considine@users.noreply.github.com> Date: Mon, 9 Sep 2024 14:43:16 +0100 Subject: [PATCH 024/188] feat: adding finemapping method to studylocusid hash (#744) * feat: adding finemapping method to studylocusid hash * chore: updating function in needed files * test: update doc test * fix: mistakes * fix: tidying logic * chore: adding studyLocusId recalculation to pics finemapping method --------- Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- src/gentropy/dataset/study_locus.py | 29 ++++++++++++------- .../datasource/eqtl_catalogue/finemapping.py | 2 +- .../datasource/finngen/finemapping.py | 4 ++- .../open_targets/l2g_gold_standard.py | 1 + src/gentropy/l2g.py | 1 + src/gentropy/method/pics.py | 6 ++++ src/gentropy/susie_finemapper.py | 7 +++-- 7 files changed, 36 insertions(+), 14 deletions(-) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 47a73c665..ee488b019 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -347,29 +347,38 @@ def _align_overlapping_tags( ) @staticmethod - def assign_study_locus_id(study_id_col: Column, variant_id_col: Column) -> Column: + def assign_study_locus_id( + study_id_col: Column, + variant_id_col: Column, + finemapping_col: Column = None, + ) -> Column: """Hashes a column with a variant ID and a study ID to extract a consistent studyLocusId. Args: study_id_col (Column): column name with a study ID variant_id_col (Column): column name with a variant ID + finemapping_col (Column, optional): column with fine mapping methodology Returns: Column: column with a study locus ID Examples: - >>> df = spark.createDataFrame([("GCST000001", "1_1000_A_C"), ("GCST000002", "1_1000_A_C")]).toDF("studyId", "variantId") - >>> df.withColumn("study_locus_id", StudyLocus.assign_study_locus_id(f.col("studyId"), f.col("variantId"))).show() - +----------+----------+-------------------+ - | studyId| variantId| study_locus_id| - +----------+----------+-------------------+ - |GCST000001|1_1000_A_C|1553357789130151995| - |GCST000002|1_1000_A_C|-415050894682709184| - +----------+----------+-------------------+ + >>> df = spark.createDataFrame([("GCST000001", "1_1000_A_C", "SuSiE-inf"), ("GCST000002", "1_1000_A_C", "pics")]).toDF("studyId", "variantId", "finemappingMethod") + >>> df.withColumn("study_locus_id", StudyLocus.assign_study_locus_id(f.col("studyId"), f.col("variantId"), f.col("finemappingMethod"))).show() + +----------+----------+-----------------+-------------------+ + | studyId| variantId|finemappingMethod| study_locus_id| + +----------+----------+-----------------+-------------------+ + |GCST000001|1_1000_A_C| SuSiE-inf|3801266831619496075| + |GCST000002|1_1000_A_C| pics|1581844826999194430| + +----------+----------+-----------------+-------------------+ """ + if finemapping_col is None: + finemapping_col = f.lit(None).cast(StringType()) variant_id_col = f.coalesce(variant_id_col, f.rand().cast("string")) - return f.xxhash64(study_id_col, variant_id_col).alias("studyLocusId") + return f.xxhash64(study_id_col, variant_id_col, finemapping_col).alias( + "studyLocusId" + ) @classmethod def calculate_credible_set_log10bf(cls: type[StudyLocus], logbfs: Column) -> Column: diff --git a/src/gentropy/datasource/eqtl_catalogue/finemapping.py b/src/gentropy/datasource/eqtl_catalogue/finemapping.py index 21bf8ec38..a5c02dd3e 100644 --- a/src/gentropy/datasource/eqtl_catalogue/finemapping.py +++ b/src/gentropy/datasource/eqtl_catalogue/finemapping.py @@ -259,7 +259,7 @@ def from_susie_results( .select( *study_locus_cols, StudyLocus.assign_study_locus_id( - f.col("studyId"), f.col("variantId") + f.col("studyId"), f.col("variantId"), f.col("finemappingMethod") ), StudyLocus.calculate_credible_set_log10bf( f.col("locus.logBF") diff --git a/src/gentropy/datasource/finngen/finemapping.py b/src/gentropy/datasource/finngen/finemapping.py index cbdd01fdc..092a79372 100644 --- a/src/gentropy/datasource/finngen/finemapping.py +++ b/src/gentropy/datasource/finngen/finemapping.py @@ -470,7 +470,9 @@ def from_finngen_susie_finemapping( ) ).withColumn( "studyLocusId", - StudyLocus.assign_study_locus_id(f.col("studyId"), f.col("variantId")), + StudyLocus.assign_study_locus_id( + f.col("studyId"), f.col("variantId"), f.col("finemappingMethod") + ), ) return StudyLocus( diff --git a/src/gentropy/datasource/open_targets/l2g_gold_standard.py b/src/gentropy/datasource/open_targets/l2g_gold_standard.py index 97b04320c..2cfcd62f8 100644 --- a/src/gentropy/datasource/open_targets/l2g_gold_standard.py +++ b/src/gentropy/datasource/open_targets/l2g_gold_standard.py @@ -1,4 +1,5 @@ """Parser for OTPlatform locus to gene gold standards curation.""" + from __future__ import annotations from typing import Type diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 432e46f88..cb13d3640 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -201,6 +201,7 @@ def _generate_feature_matrix(self) -> L2GFeatureMatrix: f.col("sentinel_variant.alleles.reference"), f.col("sentinel_variant.alleles.alternative"), ), + f.col("finemappingMethod"), ).alias("studyLocusId"), ) ), diff --git a/src/gentropy/method/pics.py b/src/gentropy/method/pics.py index e5ed5f2c6..2de06f512 100644 --- a/src/gentropy/method/pics.py +++ b/src/gentropy/method/pics.py @@ -254,6 +254,12 @@ def finemap( "finemappingMethod", f.coalesce(f.col("finemappingMethod"), f.lit("pics")), ) + .withColumn( + "studyLocusId", + StudyLocus.assign_study_locus_id( + "studyId", "variantId", "finemappingMethod" + ), + ) .drop("neglog_pvalue") ), _schema=StudyLocus.get_schema(), diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index f2997eb9f..f4b9141d0 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -94,7 +94,10 @@ def __init__( study_locus = ( StudyLocus.from_parquet(session, study_locus_input) .df.withColumn( - "studyLocusId", StudyLocus.assign_study_locus_id("studyId", "variantId") + "studyLocusId", + StudyLocus.assign_study_locus_id( + "studyId", "variantId", "finemappingMethod" + ), ) .collect()[0] ) @@ -380,7 +383,7 @@ def susie_inf_to_studylocus( .withColumn( "studyLocusId", StudyLocus.assign_study_locus_id( - f.col("studyId"), f.col("variantId") + f.col("studyId"), f.col("variantId"), f.col("finemappingMethod") ), ) .select( From 3c1e81a78cc7a49b43902c6de24586f9d1ca3a41 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 16:59:42 +0100 Subject: [PATCH 025/188] build(deps-dev): bump ipython from 8.26.0 to 8.27.0 (#741) Bumps [ipython](https://github.com/ipython/ipython) from 8.26.0 to 8.27.0. - [Release notes](https://github.com/ipython/ipython/releases) - [Commits](https://github.com/ipython/ipython/compare/8.26.0...8.27.0) --- updated-dependencies: - dependency-name: ipython dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- poetry.lock | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 46d0cf279..21c53d38d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiodns" @@ -4051,13 +4051,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio [[package]] name = "ipython" -version = "8.26.0" +version = "8.27.0" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.10" files = [ - {file = "ipython-8.26.0-py3-none-any.whl", hash = "sha256:e6b347c27bdf9c32ee9d31ae85defc525755a1869f14057e900675b9e8d6e6ff"}, - {file = "ipython-8.26.0.tar.gz", hash = "sha256:1cec0fbba8404af13facebe83d04436a7434c7400e59f47acf467c64abd0956c"}, + {file = "ipython-8.27.0-py3-none-any.whl", hash = "sha256:f68b3cb8bde357a5d7adc9598d57e22a45dfbea19eb6b98286fa3b288c9cd55c"}, + {file = "ipython-8.27.0.tar.gz", hash = "sha256:0b99a2dc9f15fd68692e898e5568725c6d49c527d36a9fb5960ffbdeaa82ff7e"}, ] [package.dependencies] @@ -4835,6 +4835,7 @@ description = "Expand standard functools to methods" optional = false python-versions = "*" files = [ + {file = "methodtools-0.4.7-py2.py3-none-any.whl", hash = "sha256:5e188c780b236adc12e75b5f078c5afb419ef99eb648569fc6d7071f053a1f11"}, {file = "methodtools-0.4.7.tar.gz", hash = "sha256:e213439dd64cfe60213f7015da6efe5dd4003fd89376db3baa09fe13ec2bb0ba"}, ] @@ -6879,6 +6880,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -8431,6 +8433,7 @@ description = "'Turn functions and methods into fully controllable objects'" optional = false python-versions = "*" files = [ + {file = "wirerope-0.4.7-py2.py3-none-any.whl", hash = "sha256:332973a3be6898f02fd0e73b2e20414c5102cc6c811d75856a938206677495c8"}, {file = "wirerope-0.4.7.tar.gz", hash = "sha256:f3961039218276283c5037da0fa164619def0327595f10892d562a61a8603990"}, ] From 151b4ec8f177b461f21ded39207206eae9bfdfd2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 10 Sep 2024 09:42:27 +0100 Subject: [PATCH 026/188] build(deps-dev): bump deptry from 0.19.1 to 0.20.0 (#742) Bumps [deptry](https://github.com/fpgmaas/deptry) from 0.19.1 to 0.20.0. - [Release notes](https://github.com/fpgmaas/deptry/releases) - [Changelog](https://github.com/fpgmaas/deptry/blob/main/CHANGELOG.md) - [Commits](https://github.com/fpgmaas/deptry/compare/0.19.1...0.20.0) --- updated-dependencies: - dependency-name: deptry dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- poetry.lock | 28 ++++++++++++++-------------- pyproject.toml | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/poetry.lock b/poetry.lock index 21c53d38d..2ba1390ce 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1667,23 +1667,23 @@ dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] [[package]] name = "deptry" -version = "0.19.1" +version = "0.20.0" description = "A command line utility to check for unused, missing and transitive dependencies in a Python project." optional = false python-versions = ">=3.8" files = [ - {file = "deptry-0.19.1-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3a20ef0dd1c737fb05553d1b9c2fa9f185d0c9d3d881d255334cef401ffdc599"}, - {file = "deptry-0.19.1-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:2c6b2df353e5113fd2f787c2f7e694657548d388929e988e8644bd178e19fc5c"}, - {file = "deptry-0.19.1-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a407bab3486e3844f93d702f1a381942873b2a46056c693b5634bbde219bb056"}, - {file = "deptry-0.19.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43f33789b97b47313609e92b62fabf8a71bba0d35a7476806da5d3d152e32345"}, - {file = "deptry-0.19.1-cp38-abi3-win_amd64.whl", hash = "sha256:0bad85a77b31360d0f52383b14783fdae4a201b597c0158fe10e91a779c67079"}, - {file = "deptry-0.19.1-cp38-abi3-win_arm64.whl", hash = "sha256:c59142d9dca8873325692fbb7aa1d2902fde87020dcc8102f75120ba95515172"}, - {file = "deptry-0.19.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1abc119f9c8536b8ab1ee2122d4130665f33225d00d8615256ce354eb2c11ba"}, - {file = "deptry-0.19.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7344c6cea032b549d86e156aa1e679fb94cd44deb7e93f25cb6d9c0ded5ea06f"}, - {file = "deptry-0.19.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ff7d8954265c48ea334fdd508339c51d3fba05e2d4a8be47712c69d1c8d35c94"}, - {file = "deptry-0.19.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:023073247e5dac21254bf7b600ca2e2b71560652d2dfbe11535445ee912ca059"}, - {file = "deptry-0.19.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:af8a0a9c42f8f92dfbc048e724fa89b9131f032f7e245812260560c214395abf"}, - {file = "deptry-0.19.1.tar.gz", hash = "sha256:1c12fea1d2301f42c7035c5636e4b9421457fde256fe7a241245662d20b4c841"}, + {file = "deptry-0.20.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:41434d95124851b83cb05524d1a09ad6fea62006beafed2ef90a6b501c1b237f"}, + {file = "deptry-0.20.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:b3b4b22d1406147de5d606a24042126cd74d52fdfdb0232b9c5fd0270d601610"}, + {file = "deptry-0.20.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:012fb106dbea6ca95196cdcd75ac90c516c8f01292f7934f2e802a7cf025a660"}, + {file = "deptry-0.20.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ce3920e2bd6d2b4427ab31ab8efb94bbef897001c2d395782bc30002966d12d"}, + {file = "deptry-0.20.0-cp38-abi3-win_amd64.whl", hash = "sha256:0c90ce64e637d0e902bc97c5a020adecfee9e9f09ee0bf4c61554994139bebdb"}, + {file = "deptry-0.20.0-cp38-abi3-win_arm64.whl", hash = "sha256:6886ff44aaf26fd83093f14f844ebc84589d90df9bbad9a1625e8a080e6f1be2"}, + {file = "deptry-0.20.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ace3b39b1d0763f357c79bab003d1b135bea2eb61102be539992621a42d1ac7b"}, + {file = "deptry-0.20.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d1a00f8c9e6c0829a4a523edd5e526e3df06d2b50e0a99446f09f9723df2efad"}, + {file = "deptry-0.20.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e233859f150df70ffff76e95f9b7326fc25494b9beb26e776edae20f0f515e7d"}, + {file = "deptry-0.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f92e7e97ef42477717747b190bc6796ab94b35655af126d8c577f7eae0eb3a9"}, + {file = "deptry-0.20.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f6cee6005997791bb77155667be055333fb63ae9a24f0f103f25faf1e7affe34"}, + {file = "deptry-0.20.0.tar.gz", hash = "sha256:62e9aaf3aea9e2ca66c85da98a0ba0290b4d3daea4e1d0ad937d447bd3c36402"}, ] [package.dependencies] @@ -8690,4 +8690,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "04bc80689794ab41c58a2daf8f7841a36b6f34bed3b74069b6e0e8c30f32d24b" +content-hash = "489d535e828faa827aa6cedc3bb739eff1c3b841770d3b9e94bdde44e572e621" diff --git a/pyproject.toml b/pyproject.toml index 3dbf9f8ff..d91629ab3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,7 @@ apache-airflow = "^2.8.0" apache-airflow-providers-google = "^10.13.1" pydoclint = ">=0.3.8,<0.6.0" prettier = "^0.0.7" -deptry = ">=0.12,<0.20" +deptry = ">=0.12,<0.21" yamllint = "^1.33.0" [tool.semantic_release] From 0b216f60bd8ffd2235dae80a91ade53cc4e67812 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Tue, 10 Sep 2024 14:37:12 +0200 Subject: [PATCH 027/188] fix: validation name mapping (#753) * fix: use mapping instead of enum values in valid_rows * fix: typos * fix: swap valid and invalid paths --------- Co-authored-by: Szymon Szyszkowski --- src/gentropy/dataset/dataset.py | 27 +++++++++++++------ src/gentropy/dataset/study_index.py | 8 +++--- src/gentropy/dataset/study_locus.py | 8 +++--- src/gentropy/study_locus_validation.py | 10 +++---- .../dataset/test_dataset_exclusion.py | 22 +++++++-------- 5 files changed, 41 insertions(+), 34 deletions(-) diff --git a/src/gentropy/dataset/dataset.py b/src/gentropy/dataset/dataset.py index b31537a2a..e019ea379 100644 --- a/src/gentropy/dataset/dataset.py +++ b/src/gentropy/dataset/dataset.py @@ -86,13 +86,15 @@ def get_QC_column_name(cls: type[Self]) -> str | None: return None @classmethod - def get_QC_categories(cls: type[Self]) -> list[str]: - """Method to get the QC categories for this dataset. Returns empty list unless overriden by child classes. + def get_QC_mappings(cls: type[Self]) -> dict[str, str]: + """Method to get the mapping between QC flag and corresponding QC category value. + + Returns empty dict unless overriden by child classes. Returns: - list[str]: Column name + dict[str, str]: Mapping between flag name and QC column category value. """ - return [] + return {} @classmethod def from_parquet( @@ -193,22 +195,31 @@ def validate_schema(self: Dataset) -> None: def valid_rows(self: Self, invalid_flags: list[str], invalid: bool = False) -> Self: """Filters `Dataset` according to a list of quality control flags. Only `Dataset` classes with a QC column can be validated. + This method checks do following steps: + - Check if the Dataset contains a QC column. + - Check if the invalid_flags exist in the QC mappings flags. + - Filter the Dataset according to the invalid_flags and invalid parameters. + Args: invalid_flags (list[str]): List of quality control flags to be excluded. - invalid (bool): If True returns the invalid rows, instead of the valids. Defaults to False. + invalid (bool): If True returns the invalid rows, instead of the valid. Defaults to False. Returns: Self: filtered dataset. Raises: ValueError: If the Dataset does not contain a QC column. + ValueError: If the invalid_flags elements do not exist in QC mappings flags. """ # If the invalid flags are not valid quality checks (enum) for this Dataset we raise an error: + invalid_reasons = [] for flag in invalid_flags: - if flag not in self.get_QC_categories(): + if flag not in self.get_QC_mappings(): raise ValueError( - f"{flag} is not a valid QC flag for {type(self).__name__} ({self.get_QC_categories()})." + f"{flag} is not a valid QC flag for {type(self).__name__} ({self.get_QC_mappings()})." ) + reason = self.get_QC_mappings()[flag] + invalid_reasons.append(reason) qc_column_name = self.get_QC_column_name() # If Dataset (class) does not contain QC column we raise an error: @@ -222,7 +233,7 @@ def valid_rows(self: Self, invalid_flags: list[str], invalid: bool = False) -> S qc = f.when(f.col(column).isNull(), f.array()).otherwise(f.col(column)) filterCondition = ~f.arrays_overlap( - f.array([f.lit(i) for i in invalid_flags]), qc + f.array([f.lit(i) for i in invalid_reasons]), qc ) # Returning the filtered dataset: if invalid: diff --git a/src/gentropy/dataset/study_index.py b/src/gentropy/dataset/study_index.py index f60b2135c..ac637f137 100644 --- a/src/gentropy/dataset/study_index.py +++ b/src/gentropy/dataset/study_index.py @@ -118,13 +118,13 @@ def get_QC_column_name(cls: type[StudyIndex]) -> str: return "qualityControls" @classmethod - def get_QC_categories(cls: type[StudyIndex]) -> list[str]: - """Return the quality control categories. + def get_QC_mappings(cls: type[StudyIndex]) -> dict[str, str]: + """Quality control flag to QC column category mappings. Returns: - list[str]: The quality control categories. + dict[str, str]: Mapping between flag name and QC column category value. """ - return [member.value for member in StudyQualityCheck] + return {member.name: member.value for member in StudyQualityCheck} @classmethod def aggregate_and_map_ancestries( diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index ee488b019..edf9dc8be 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -421,13 +421,13 @@ def get_QC_column_name(cls: type[StudyLocus]) -> str: return "qualityControls" @classmethod - def get_QC_categories(cls: type[StudyLocus]) -> list[str]: - """Quality control categories. + def get_QC_mappings(cls: type[StudyLocus]) -> dict[str, str]: + """Quality control flag to QC column category mappings. Returns: - list[str]: List of quality control categories. + dict[str, str]: Mapping between flag name and QC column category value. """ - return [member.value for member in StudyLocusQualityCheck] + return {member.name: member.value for member in StudyLocusQualityCheck} def filter_by_study_type( self: StudyLocus, study_type: str, study_index: StudyIndex diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index 06995290c..41a572e79 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -48,10 +48,10 @@ def __init__( .validate_unique_study_locus_id() # Flagging duplicated study locus ids ).persist() # we will need this for 2 types of outputs - study_locus_with_qc.valid_rows(invalid_qc_reasons).df.write.parquet( - invalid_study_locus_path - ) - study_locus_with_qc.valid_rows( invalid_qc_reasons, invalid=True - ).df.write.parquet(valid_study_locus_path) + ).df.write.parquet(invalid_study_locus_path) + + study_locus_with_qc.valid_rows(invalid_qc_reasons).df.write.parquet( + valid_study_locus_path + ) diff --git a/tests/gentropy/dataset/test_dataset_exclusion.py b/tests/gentropy/dataset/test_dataset_exclusion.py index 361398f34..329a0a1d5 100644 --- a/tests/gentropy/dataset/test_dataset_exclusion.py +++ b/tests/gentropy/dataset/test_dataset_exclusion.py @@ -16,9 +16,9 @@ class TestDataExclusion: the right rows are excluded. """ - CORRECT_FILTER = ["The identifier of this study is not unique."] - INCORRECT_FILTER = ["Some mock flag."] - ALL_FILTERS = [member.value for member in StudyQualityCheck] + CORRECT_FLAG = ["DUPLICATED_STUDY"] + INCORRECT_FLAG = ["UNKNOWN_CATEGORY"] + ALL_FLAGS = [member.name for member in StudyQualityCheck] DATASET = [ # Good study no flag: @@ -52,8 +52,8 @@ def _setup(self: TestDataExclusion, spark: SparkSession) -> None: @pytest.mark.parametrize( "filter_, expected", [ - (CORRECT_FILTER, ["S1", "S2"]), - (ALL_FILTERS, ["S1"]), + (CORRECT_FLAG, ["S1", "S2"]), + (ALL_FLAGS, ["S1"]), ], ) def test_valid_rows( @@ -72,8 +72,8 @@ def test_valid_rows( @pytest.mark.parametrize( "filter_, expected", [ - (CORRECT_FILTER, ["S3"]), - (ALL_FILTERS, ["S2", "S3"]), + (CORRECT_FLAG, ["S3"]), + (ALL_FLAGS, ["S2", "S3"]), ], ) def test_invalid_rows( @@ -90,11 +90,7 @@ def test_invalid_rows( def test_failing_quality_flag(self: TestDataExclusion) -> None: """Test invalid quality flag.""" with pytest.raises(ValueError): - self.study_index.valid_rows( - self.INCORRECT_FILTER, invalid=True - ).df.collect() + self.study_index.valid_rows(self.INCORRECT_FLAG, invalid=True).df.collect() with pytest.raises(ValueError): - self.study_index.valid_rows( - self.INCORRECT_FILTER, invalid=False - ).df.collect() + self.study_index.valid_rows(self.INCORRECT_FLAG, invalid=False).df.collect() From 010c881ce10c5851734b088ec6106f9d1be2d78a Mon Sep 17 00:00:00 2001 From: Yakov Date: Wed, 11 Sep 2024 09:25:59 +0100 Subject: [PATCH 028/188] fix: removing old functions (#752) Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- src/gentropy/susie_finemapper.py | 653 ------------------------------- 1 file changed, 653 deletions(-) diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index f4b9141d0..587ea7963 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -27,7 +27,6 @@ ) from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus -from gentropy.dataset.summary_statistics import SummaryStatistics from gentropy.datasource.gnomad.ld import GnomADLDMatrix from gentropy.method.carma import CARMA from gentropy.method.sumstat_imputation import SummaryStatisticsImputation @@ -136,141 +135,6 @@ def __init__( index=False, ) - @staticmethod - def susie_finemapper_one_studylocus_row( - GWAS: SummaryStatistics, - session: Session, - study_locus_row: Row, - study_index: StudyIndex, - radius: int = 1_000_000, - max_causal_snps: int = 10, - primary_signal_pval_threshold: float = 5e-8, - secondary_signal_pval_threshold: float = 1e-7, - purity_mean_r2_threshold: float = 0, - purity_min_r2_threshold: float = 0.25, - sum_pips: float = 0.99, - cs_lbf_thr: float = 2, - ) -> StudyLocus: - """Susie fine-mapper for StudyLocus row with SummaryStatistics object. - - Args: - GWAS (SummaryStatistics): GWAS summary statistics - session (Session): Spark session - study_locus_row (Row): StudyLocus row - study_index (StudyIndex): StudyIndex object - radius (int): window size for fine-mapping - max_causal_snps (int): number of causal variants - primary_signal_pval_threshold (float): p-value threshold for the lead variant from the primary signal (credibleSetIndex==1) - secondary_signal_pval_threshold (float): p-value threshold for the lead variant from the secondary signals - purity_mean_r2_threshold (float): thrshold for purity mean r2 qc metrics for filtering credible sets - purity_min_r2_threshold (float): thrshold for purity min r2 qc metrics for filtering credible sets - sum_pips (float): the expected sum of posterior probabilities in the locus, default is 0.99 (99% credible set) - cs_lbf_thr (float): credible set logBF threshold for filtering credible sets, default is 2 - - Returns: - StudyLocus: StudyLocus object with fine-mapped credible sets - """ - # PLEASE DO NOT REMOVE THIS LINE - pd.DataFrame.iteritems = pd.DataFrame.items - - chromosome = study_locus_row["chromosome"] - position = study_locus_row["position"] - studyId = study_locus_row["studyId"] - - study_index_df = study_index._df - study_index_df = study_index_df.filter(f.col("studyId") == studyId) - major_population = study_index_df.select( - "studyId", - f.array_max(f.col("ldPopulationStructure")) - .getItem("ldPopulation") - .alias("majorPopulation"), - ).collect()[0]["majorPopulation"] - - region = ( - chromosome - + ":" - + str(int(position - radius)) - + "-" - + str(int(position + radius)) - ) - - gwas_df = ( - GWAS.df.withColumn("z", f.col("beta") / f.col("standardError")) - .withColumn("chromosome", f.split(f.col("variantId"), "_")[0]) - .withColumn("position", f.split(f.col("variantId"), "_")[1]) - .filter(f.col("studyId") == studyId) - .filter(f.col("z").isNotNull()) - ) - # Remove ALL duplicated variants from GWAS DataFrame - we don't know which is correct - variant_counts = gwas_df.groupBy("variantId").count() - unique_variants = variant_counts.filter(f.col("count") == 1) - gwas_df = gwas_df.join(unique_variants, on="variantId", how="left_semi") - - ld_index = ( - GnomADLDMatrix() - .get_locus_index( - study_locus_row=study_locus_row, - radius=radius, - major_population=major_population, - ) - .withColumn( - "variantId", - f.concat( - f.lit(chromosome), - f.lit("_"), - f.col("`locus.position`"), - f.lit("_"), - f.col("alleles").getItem(0), - f.lit("_"), - f.col("alleles").getItem(1), - ).cast("string"), - ) - ) - - # Filtering out the variants that are not in the LD matrix, we don't need them - gwas_index = gwas_df.join( - ld_index.select("variantId", "alleles", "idx"), on="variantId" - ).sort("idx") - - gnomad_ld = GnomADLDMatrix.get_numpy_matrix( - gwas_index, gnomad_ancestry=major_population - ) - - pd_df = gwas_index.toPandas() - z_to_fm = np.array(pd_df["z"]) - ld_to_fm = gnomad_ld - - susie_output = SUSIE_inf.susie_inf(z=z_to_fm, LD=ld_to_fm, L=max_causal_snps) - - schema = StructType( - [ - StructField("variantId", StringType(), True), - StructField("chromosome", StringType(), True), - StructField("position", IntegerType(), True), - StructField("z", DoubleType(), True), - ] - ) - pd_df["position"] = pd_df["position"].astype(int) - variant_index = session.spark.createDataFrame( - pd_df[["variantId", "chromosome", "position", "z"]], - schema=schema, - ) - - return SusieFineMapperStep.susie_inf_to_studylocus( - susie_output=susie_output, - session=session, - studyId=studyId, - region=region, - variant_index=variant_index, - ld_matrix=ld_to_fm, - primary_signal_pval_threshold=primary_signal_pval_threshold, - secondary_signal_pval_threshold=secondary_signal_pval_threshold, - purity_mean_r2_threshold=purity_mean_r2_threshold, - purity_min_r2_threshold=purity_min_r2_threshold, - sum_pips=sum_pips, - cs_lbf_thr=cs_lbf_thr, - ) - @staticmethod def susie_inf_to_studylocus( susie_output: dict[str, Any], @@ -506,147 +370,6 @@ def susie_inf_to_studylocus( _schema=StudyLocus.get_schema(), ) - @staticmethod - def susie_finemapper_ss_gathered( - session: Session, - study_locus_row: Row, - study_index: StudyIndex, - radius: int = 1_000_000, - max_causal_snps: int = 10, - primary_signal_pval_threshold: float = 5e-8, - secondary_signal_pval_threshold: float = 1e-7, - purity_mean_r2_threshold: float = 0, - purity_min_r2_threshold: float = 0.25, - cs_lbf_thr: float = 2, - sum_pips: float = 0.99, - ) -> StudyLocus | None: - """Susie fine-mapper for StudyLocus row with locus annotated summary statistics. - - Args: - session (Session): Spark session - study_locus_row (Row): StudyLocus row - study_index (StudyIndex): StudyIndex object - radius (int): window size for fine-mapping - max_causal_snps (int): number of causal variants - primary_signal_pval_threshold (float): p-value threshold for the lead variant from the primary signal (credibleSetIndex==1) - secondary_signal_pval_threshold (float): p-value threshold for the lead variant from the secondary signals - purity_mean_r2_threshold (float): thrshold for purity mean r2 qc metrics for filtering credible sets - purity_min_r2_threshold (float): thrshold for purity min r2 qc metrics for filtering credible sets - cs_lbf_thr (float): credible set logBF threshold for filtering credible sets - sum_pips (float): the expected sum of posterior probabilities in the locus, default is 0.99 (99% credible set) - - Returns: - StudyLocus | None: StudyLocus object with fine-mapped credible sets, or None - """ - # PLEASE DO NOT REMOVE THIS LINE - pd.DataFrame.iteritems = pd.DataFrame.items - - chromosome = study_locus_row["chromosome"] - position = study_locus_row["position"] - studyId = study_locus_row["studyId"] - - study_index_df = study_index._df - study_index_df = study_index_df.filter(f.col("studyId") == studyId) - major_population = study_index_df.select( - "studyId", - f.array_max(f.col("ldPopulationStructure")) - .getItem("ldPopulation") - .alias("majorPopulation"), - ).collect()[0]["majorPopulation"] - - region = ( - chromosome - + ":" - + str(int(position - radius)) - + "-" - + str(int(position + radius)) - ) - - schema = StudyLocus.get_schema() - gwas_df = session.spark.createDataFrame([study_locus_row], schema=schema) - exploded_df = gwas_df.select(f.explode("locus").alias("locus")) - - result_df = exploded_df.select( - "locus.variantId", "locus.beta", "locus.standardError" - ) - gwas_df = ( - result_df.withColumn("z", f.col("beta") / f.col("standardError")) - .withColumn("chromosome", f.split(f.col("variantId"), "_")[0]) - .withColumn("position", f.split(f.col("variantId"), "_")[1]) - .filter(f.col("z").isNotNull()) - ) - # Remove ALL duplicated variants from GWAS DataFrame - we don't know which is correct - variant_counts = gwas_df.groupBy("variantId").count() - unique_variants = variant_counts.filter(f.col("count") == 1) - gwas_df = gwas_df.join(unique_variants, on="variantId", how="left_semi") - - ld_index = ( - GnomADLDMatrix() - .get_locus_index( - study_locus_row=study_locus_row, - radius=radius, - major_population=major_population, - ) - .withColumn( - "variantId", - f.concat( - f.lit(chromosome), - f.lit("_"), - f.col("`locus.position`"), - f.lit("_"), - f.col("alleles").getItem(0), - f.lit("_"), - f.col("alleles").getItem(1), - ).cast("string"), - ) - ) - - # Filtering out the variants that are not in the LD matrix, we don't need them - gwas_index = gwas_df.join( - ld_index.select("variantId", "alleles", "idx"), on="variantId" - ).sort("idx") - if gwas_index.rdd.isEmpty(): - logging.warning("No overlapping variants in the LD Index") - return None - gnomad_ld = GnomADLDMatrix.get_numpy_matrix( - gwas_index, gnomad_ancestry=major_population - ) - - pd_df = gwas_index.toPandas() - z_to_fm = np.array(pd_df["z"]) - ld_to_fm = gnomad_ld - - susie_output = SUSIE_inf.susie_inf(z=z_to_fm, LD=ld_to_fm, L=max_causal_snps) - - schema = StructType( - [ - StructField("variantId", StringType(), True), - StructField("chromosome", StringType(), True), - StructField("position", IntegerType(), True), - StructField("z", DoubleType(), True), - ] - ) - pd_df["position"] = pd_df["position"].astype(int) - variant_index = session.spark.createDataFrame( - pd_df[["variantId", "chromosome", "position", "z"]], - schema=schema, - ) - - return SusieFineMapperStep.susie_inf_to_studylocus( - susie_output=susie_output, - session=session, - studyId=studyId, - region=region, - variant_index=variant_index, - ld_matrix=ld_to_fm, - primary_signal_pval_threshold=primary_signal_pval_threshold, - secondary_signal_pval_threshold=secondary_signal_pval_threshold, - purity_mean_r2_threshold=purity_mean_r2_threshold, - purity_min_r2_threshold=purity_min_r2_threshold, - cs_lbf_thr=cs_lbf_thr, - sum_pips=sum_pips, - ) - @staticmethod def susie_finemapper_from_prepared_dataframes( GWAS_df: DataFrame, @@ -847,382 +570,6 @@ def susie_finemapper_from_prepared_dataframes( "log": log_df, } - @staticmethod - def susie_finemapper_one_studylocus_row_v2_dev( - GWAS: SummaryStatistics, - session: Session, - study_locus_row: Row, - study_index: StudyIndex, - radius: int = 1_000_000, - max_causal_snps: int = 10, - susie_est_tausq: bool = False, - run_carma: bool = False, - run_sumstat_imputation: bool = False, - carma_time_limit: int = 600, - imputed_r2_threshold: float = 0.9, - ld_score_threshold: float = 5, - sum_pips: float = 0.99, - primary_signal_pval_threshold: float = 5e-8, - secondary_signal_pval_threshold: float = 1e-7, - purity_mean_r2_threshold: float = 0, - purity_min_r2_threshold: float = 0.25, - cs_lbf_thr: float = 2, - ) -> dict[str, Any]: - """Susie fine-mapper function that uses Summary Statstics, chromosome and position as inputs. - - Args: - GWAS (SummaryStatistics): GWAS summary statistics - session (Session): Spark session - study_locus_row (Row): StudyLocus row - study_index (StudyIndex): StudyIndex object - radius (int): Radius in base-pairs of window for fine-mapping - max_causal_snps (int): maximum number of causal variants - susie_est_tausq (bool): estimate tau squared, default is False - run_carma (bool): run CARMA, default is False - run_sumstat_imputation (bool): run summary statistics imputation, default is False - carma_time_limit (int): CARMA time limit, default is 600 seconds - imputed_r2_threshold (float): imputed R2 threshold, default is 0.8 - ld_score_threshold (float): LD score threshold ofr imputation, default is 4 - sum_pips (float): the expected sum of posterior probabilities in the locus, default is 0.99 (99% credible set) - primary_signal_pval_threshold (float): p-value threshold for the lead variant from the primary signal (credibleSetIndex==1) - secondary_signal_pval_threshold (float): p-value threshold for the lead variant from the secondary signals - purity_mean_r2_threshold (float): thrshold for purity mean r2 qc metrics for filtering credible sets - purity_min_r2_threshold (float): thrshold for purity min r2 qc metrics for filtering credible sets - cs_lbf_thr (float): credible set logBF threshold for filtering credible sets, default is 2 - - Returns: - dict[str, Any]: dictionary with study locus, number of GWAS variants, number of LD variants, number of variants after merge, number of outliers, number of imputed variants, number of variants to fine-map - """ - # PLEASE DO NOT REMOVE THIS LINE - pd.DataFrame.iteritems = pd.DataFrame.items - - chromosome = study_locus_row["chromosome"] - position = study_locus_row["position"] - studyId = study_locus_row["studyId"] - - study_index_df = study_index._df - study_index_df = study_index_df.filter(f.col("studyId") == studyId) - major_population = study_index_df.select( - "studyId", - f.array_max(f.col("ldPopulationStructure")) - .getItem("ldPopulation") - .alias("majorPopulation"), - ).collect()[0]["majorPopulation"] - - region = ( - chromosome - + ":" - + str(int(position - radius)) - + "-" - + str(int(position + radius)) - ) - gwas_df = ( - GWAS.df.withColumn("z", f.col("beta") / f.col("standardError")) - .withColumn( - "chromosome", f.split(f.col("variantId"), "_")[0].cast("string") - ) - .withColumn("position", f.split(f.col("variantId"), "_")[1].cast("int")) - .filter(f.col("studyId") == studyId) - .filter(f.col("z").isNotNull()) - .filter(f.col("chromosome") == chromosome) - .filter(f.col("position") >= position - radius) - .filter(f.col("position") <= position + radius) - ) - - ld_index = ( - GnomADLDMatrix() - .get_locus_index( - study_locus_row=study_locus_row, - radius=radius, - major_population=major_population, - ) - .withColumn( - "variantId", - f.concat( - f.lit(chromosome), - f.lit("_"), - f.col("`locus.position`"), - f.lit("_"), - f.col("alleles").getItem(0), - f.lit("_"), - f.col("alleles").getItem(1), - ).cast("string"), - ) - ) - - gnomad_ld = GnomADLDMatrix.get_numpy_matrix( - ld_index, gnomad_ancestry=major_population - ) - - out = SusieFineMapperStep.susie_finemapper_from_prepared_dataframes( - GWAS_df=gwas_df, - ld_index=ld_index, - gnomad_ld=gnomad_ld, - L=max_causal_snps, - session=session, - studyId=studyId, - region=region, - susie_est_tausq=susie_est_tausq, - run_carma=run_carma, - run_sumstat_imputation=run_sumstat_imputation, - carma_time_limit=carma_time_limit, - imputed_r2_threshold=imputed_r2_threshold, - ld_score_threshold=ld_score_threshold, - sum_pips=sum_pips, - primary_signal_pval_threshold=primary_signal_pval_threshold, - secondary_signal_pval_threshold=secondary_signal_pval_threshold, - purity_mean_r2_threshold=purity_mean_r2_threshold, - purity_min_r2_threshold=purity_min_r2_threshold, - cs_lbf_thr=cs_lbf_thr, - ) - - return out - - @staticmethod - def susie_finemapper_one_studylocus_row_v3_dev_ss_gathered( - session: Session, - study_locus_row: Row, - study_index: StudyIndex, - radius: int = 1_000_000, - max_causal_snps: int = 10, - susie_est_tausq: bool = False, - run_carma: bool = False, - run_sumstat_imputation: bool = False, - carma_time_limit: int = 600, - imputed_r2_threshold: float = 0.9, - ld_score_threshold: float = 5, - sum_pips: float = 0.99, - primary_signal_pval_threshold: float = 5e-8, - secondary_signal_pval_threshold: float = 1e-7, - purity_mean_r2_threshold: float = 0, - purity_min_r2_threshold: float = 0.25, - cs_lbf_thr: float = 2, - ) -> dict[str, Any] | None: - """Susie fine-mapper function that uses study-locus row with collected locus, chromosome and position as inputs. - - Args: - session (Session): Spark session - study_locus_row (Row): StudyLocus row with collected locus - study_index (StudyIndex): StudyIndex object - radius (int): Radius in base-pairs of window for fine-mapping - max_causal_snps (int): maximum number of causal variants - susie_est_tausq (bool): estimate tau squared, default is False - run_carma (bool): run CARMA, default is False - run_sumstat_imputation (bool): run summary statistics imputation, default is False - carma_time_limit (int): CARMA time limit, default is 600 seconds - imputed_r2_threshold (float): imputed R2 threshold, default is 0.8 - ld_score_threshold (float): LD score threshold ofr imputation, default is 4 - sum_pips (float): the expected sum of posterior probabilities in the locus, default is 0.99 (99% credible set) - primary_signal_pval_threshold (float): p-value threshold for the lead variant from the primary signal (credibleSetIndex==1) - secondary_signal_pval_threshold (float): p-value threshold for the lead variant from the secondary signals - purity_mean_r2_threshold (float): thrshold for purity mean r2 qc metrics for filtering credible sets - purity_min_r2_threshold (float): thrshold for purity min r2 qc metrics for filtering credible sets - cs_lbf_thr (float): credible set logBF threshold for filtering credible sets, default is 2 - - Returns: - dict[str, Any] | None: dictionary with study locus, number of GWAS variants, number of LD variants, number of variants after merge, number of outliers, number of imputed variants, number of variants to fine-map, or None - """ - # PLEASE DO NOT REMOVE THIS LINE - pd.DataFrame.iteritems = pd.DataFrame.items - - chromosome = study_locus_row["chromosome"] - position = study_locus_row["position"] - studyId = study_locus_row["studyId"] - - study_index_df = study_index._df - study_index_df = study_index_df.filter(f.col("studyId") == studyId) - major_population = study_index_df.select( - "studyId", - f.array_max(f.col("ldPopulationStructure")) - .getItem("ldPopulation") - .alias("majorPopulation"), - ).collect()[0]["majorPopulation"] - - region = ( - chromosome - + ":" - + str(int(position - radius)) - + "-" - + str(int(position + radius)) - ) - - schema = StudyLocus.get_schema() - gwas_df = session.spark.createDataFrame([study_locus_row], schema=schema) - exploded_df = gwas_df.select(f.explode("locus").alias("locus")) - - result_df = exploded_df.select( - "locus.variantId", "locus.beta", "locus.standardError" - ) - gwas_df = ( - result_df.withColumn("z", f.col("beta") / f.col("standardError")) - .withColumn( - "chromosome", f.split(f.col("variantId"), "_")[0].cast("string") - ) - .withColumn("position", f.split(f.col("variantId"), "_")[1].cast("int")) - .filter(f.col("chromosome") == chromosome) - .filter(f.col("position") >= position - radius) - .filter(f.col("position") <= position + radius) - .filter(f.col("z").isNotNull()) - ) - - # Remove ALL duplicated variants from GWAS DataFrame - we don't know which is correct - variant_counts = gwas_df.groupBy("variantId").count() - unique_variants = variant_counts.filter(f.col("count") == 1) - gwas_df = gwas_df.join(unique_variants, on="variantId", how="left_semi") - - ld_index = ( - GnomADLDMatrix() - .get_locus_index( - study_locus_row=study_locus_row, - radius=radius, - major_population=major_population, - ) - .withColumn( - "variantId", - f.concat( - f.lit(chromosome), - f.lit("_"), - f.col("`locus.position`"), - f.lit("_"), - f.col("alleles").getItem(0), - f.lit("_"), - f.col("alleles").getItem(1), - ).cast("string"), - ) - ) - # Remove ALL duplicated variants from ld_index DataFrame - we don't know which is correct - variant_counts = ld_index.groupBy("variantId").count() - unique_variants = variant_counts.filter(f.col("count") == 1) - ld_index = ld_index.join(unique_variants, on="variantId", how="left_semi").sort( - "idx" - ) - - if not run_sumstat_imputation: - # Filtering out the variants that are not in the LD matrix, we don't need them - gwas_index = gwas_df.join( - ld_index.select("variantId", "alleles", "idx"), on="variantId" - ).sort("idx") - gwas_df = gwas_index.select( - "variantId", - "z", - "chromosome", - "position", - "beta", - "StandardError", - ) - gwas_index = gwas_index.drop( - "z", "chromosome", "position", "beta", "StandardError" - ) - if gwas_index.rdd.isEmpty(): - logging.warning("No overlapping variants in the LD Index") - return None - gnomad_ld = GnomADLDMatrix.get_numpy_matrix( - gwas_index, gnomad_ancestry=major_population - ) - # Module to remove NANs from the LD matrix - if sum(sum(np.isnan(gnomad_ld))) > 0: - gwas_index = gwas_index.toPandas() - - # First round of filtering out the variants with NANs - nan_count = 1 - (sum(np.isnan(gnomad_ld)) / len(gnomad_ld)) - indices = np.where(nan_count >= 0.98) - indices = indices[0] - gnomad_ld = gnomad_ld[indices][:, indices] - - gwas_index = gwas_index.iloc[indices, :] - - if len(gwas_index) == 0: - logging.warning("No overlapping variants in the LD Index") - return None - - # Second round of filtering out the variants with NANs - nan_count = sum(np.isnan(gnomad_ld)) - indices = np.where(nan_count == 0) - indices = indices[0] - - gnomad_ld = gnomad_ld[indices][:, indices] - gwas_index = gwas_index.iloc[indices, :] - - if len(gwas_index) == 0: - logging.warning("No overlapping variants in the LD Index") - return None - - gwas_index = session.spark.createDataFrame(gwas_index) - else: - gwas_index = gwas_df.join( - ld_index.select("variantId", "alleles", "idx"), on="variantId" - ).sort("idx") - if gwas_index.rdd.isEmpty(): - logging.warning("No overlapping variants in the LD Index") - return None - gwas_index = ld_index - gnomad_ld = GnomADLDMatrix.get_numpy_matrix( - gwas_index, gnomad_ancestry=major_population - ) - # Module to remove NANs from the LD matrix - if sum(sum(np.isnan(gnomad_ld))) > 0: - gwas_index = gwas_index.toPandas() - - # First round of filtering out the variants with NANs - nan_count = 1 - (sum(np.isnan(gnomad_ld)) / len(gnomad_ld)) - indices = np.where(nan_count >= 0.98) - indices = indices[0] - gnomad_ld = gnomad_ld[indices][:, indices] - - gwas_index = gwas_index.iloc[indices, :] - - if len(gwas_index) == 0: - logging.warning("No overlapping variants in the LD Index") - return None - - # Second round of filtering out the variants with NANs - nan_count = sum(np.isnan(gnomad_ld)) - indices = np.where(nan_count == 0) - indices = indices[0] - - gnomad_ld = gnomad_ld[indices][:, indices] - gwas_index = gwas_index.iloc[indices, :] - - if len(gwas_index) == 0: - logging.warning("No overlapping variants in the LD Index") - return None - - gwas_index = session.spark.createDataFrame(gwas_index) - - # sanity filters on LD matrix - np.fill_diagonal(gnomad_ld, 1) - gnomad_ld[gnomad_ld > 1] = 1 - gnomad_ld[gnomad_ld < -1] = -1 - upper_triangle = np.triu(gnomad_ld) - gnomad_ld = ( - upper_triangle + upper_triangle.T - np.diag(upper_triangle.diagonal()) - ) - np.fill_diagonal(gnomad_ld, 1) - - out = SusieFineMapperStep.susie_finemapper_from_prepared_dataframes( - GWAS_df=gwas_df, - ld_index=gwas_index, - gnomad_ld=gnomad_ld, - L=max_causal_snps, - session=session, - studyId=studyId, - region=region, - susie_est_tausq=susie_est_tausq, - run_carma=run_carma, - run_sumstat_imputation=run_sumstat_imputation, - carma_time_limit=carma_time_limit, - imputed_r2_threshold=imputed_r2_threshold, - ld_score_threshold=ld_score_threshold, - sum_pips=sum_pips, - primary_signal_pval_threshold=primary_signal_pval_threshold, - secondary_signal_pval_threshold=secondary_signal_pval_threshold, - purity_mean_r2_threshold=purity_mean_r2_threshold, - purity_min_r2_threshold=purity_min_r2_threshold, - cs_lbf_thr=cs_lbf_thr, - ) - - return out - @staticmethod def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries( session: Session, From feb9cadeb282d4f5dcfda4e261710f55e723f2c5 Mon Sep 17 00:00:00 2001 From: Kirill Tsukanov Date: Wed, 11 Sep 2024 13:24:28 +0100 Subject: [PATCH 029/188] refactor: generalise per-chromosome processing (#754) * refactor: move VA preparation logic into a separate module * refactor: generalise per-chromosome summary stats ingestion * Update src/gentropy/common/per_chromosome.py Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --------- Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- src/gentropy/common/per_chromosome.py | 98 +++++++++++++++++++ .../ukb_ppp_eur_sumstat_preprocess.py | 76 ++------------ 2 files changed, 104 insertions(+), 70 deletions(-) create mode 100644 src/gentropy/common/per_chromosome.py diff --git a/src/gentropy/common/per_chromosome.py b/src/gentropy/common/per_chromosome.py new file mode 100644 index 000000000..528e6ffbe --- /dev/null +++ b/src/gentropy/common/per_chromosome.py @@ -0,0 +1,98 @@ +"""Spark operations to make efficient per-chromosome processing possible.""" + +from __future__ import annotations + +import pyspark.sql.functions as f +from pyspark.sql import SparkSession + +from gentropy.datasource.ukb_ppp_eur.summary_stats import UkbPppEurSummaryStats + + +def prepare_va(session: SparkSession, variant_annotation_path: str, tmp_variant_annotation_path: str) -> None: + """Prepare the Variant Annotation dataset for efficient per-chromosome joins. + + Args: + session (SparkSession): The Spark session to be used for reading and writing data. + variant_annotation_path (str): The path to the input variant annotation dataset. + tmp_variant_annotation_path (str): The path to store the temporary output for the repartitioned annotation dataset. + """ + va_df = ( + session + .spark + .read + .parquet(variant_annotation_path) + ) + va_df_direct = ( + va_df. + select( + f.col("chromosome").alias("vaChromosome"), + f.col("variantId"), + f.concat_ws( + "_", + f.col("chromosome"), + f.col("position"), + f.col("referenceAllele"), + f.col("alternateAllele") + ).alias("ukb_ppp_id"), + f.lit("direct").alias("direction") + ) + ) + va_df_flip = ( + va_df. + select( + f.col("chromosome").alias("vaChromosome"), + f.col("variantId"), + f.concat_ws( + "_", + f.col("chromosome"), + f.col("position"), + f.col("alternateAllele"), + f.col("referenceAllele") + ).alias("ukb_ppp_id"), + f.lit("flip").alias("direction") + ) + ) + ( + va_df_direct.union(va_df_flip) + .coalesce(1) + .repartition("vaChromosome") + .write + .partitionBy("vaChromosome") + .mode("overwrite") + .parquet(tmp_variant_annotation_path) + ) + + +def process_summary_stats_per_chromosome(session: SparkSession, ingestion_class: type[UkbPppEurSummaryStats], raw_summary_stats_path: str, tmp_variant_annotation_path: str, summary_stats_output_path: str) -> None: + """Processes summary statistics for each chromosome, partitioning and writing results. + + Args: + session (SparkSession): The Spark session to use for distributed data processing. + ingestion_class (type[UkbPppEurSummaryStats]): The class used to handle ingestion of source data. Must have a `from_source` method returning a DataFrame. + raw_summary_stats_path (str): The path to the raw summary statistics files. + tmp_variant_annotation_path (str): The path to temporary variant annotation data, used for chromosome joins. + summary_stats_output_path (str): The output path to write processed summary statistics as parquet files. + """ + # Set mode to overwrite for processing the first chromosome. + write_mode = "overwrite" + # Chromosome 23 is X, this is handled downstream. + for chromosome in list(range(1, 24)): + logging_message = f" Processing chromosome {chromosome}" + session.logger.info(logging_message) + ( + ingestion_class.from_source( + spark=session.spark, + raw_summary_stats_path=raw_summary_stats_path, + tmp_variant_annotation_path=tmp_variant_annotation_path, + chromosome=str(chromosome), + ) + .df + .coalesce(1) + .repartition("studyId", "chromosome") + .write + .partitionBy("studyId", "chromosome") + .mode(write_mode) + .parquet(summary_stats_output_path) + ) + # Now that we have written the first chromosome, change mode to append for subsequent operations. + write_mode = "append" diff --git a/src/gentropy/ukb_ppp_eur_sumstat_preprocess.py b/src/gentropy/ukb_ppp_eur_sumstat_preprocess.py index fc7fed548..b192d963f 100644 --- a/src/gentropy/ukb_ppp_eur_sumstat_preprocess.py +++ b/src/gentropy/ukb_ppp_eur_sumstat_preprocess.py @@ -2,8 +2,10 @@ from __future__ import annotations -import pyspark.sql.functions as f - +from gentropy.common.per_chromosome import ( + prepare_va, + process_summary_stats_per_chromosome, +) from gentropy.common.session import Session from gentropy.datasource.ukb_ppp_eur.study_index import UkbPppEurStudyIndex from gentropy.datasource.ukb_ppp_eur.summary_stats import UkbPppEurSummaryStats @@ -27,51 +29,7 @@ def __init__( summary_stats_output_path (str): Summary stats output path. """ session.logger.info("Pre-compute the direct and flipped variant annotation dataset.") - va_df = ( - session - .spark - .read - .parquet(variant_annotation_path) - ) - va_df_direct = ( - va_df. - select( - f.col("chromosome").alias("vaChromosome"), - f.col("variantId"), - f.concat_ws( - "_", - f.col("chromosome"), - f.col("position"), - f.col("referenceAllele"), - f.col("alternateAllele") - ).alias("ukb_ppp_id"), - f.lit("direct").alias("direction") - ) - ) - va_df_flip = ( - va_df. - select( - f.col("chromosome").alias("vaChromosome"), - f.col("variantId"), - f.concat_ws( - "_", - f.col("chromosome"), - f.col("position"), - f.col("alternateAllele"), - f.col("referenceAllele") - ).alias("ukb_ppp_id"), - f.lit("flip").alias("direction") - ) - ) - ( - va_df_direct.union(va_df_flip) - .coalesce(1) - .repartition("vaChromosome") - .write - .partitionBy("vaChromosome") - .mode("overwrite") - .parquet(tmp_variant_annotation_path) - ) + prepare_va(session, variant_annotation_path, tmp_variant_annotation_path) session.logger.info("Process study index.") ( @@ -87,26 +45,4 @@ def __init__( ) session.logger.info("Process and harmonise summary stats.") - # Set mode to overwrite for processing the first chromosome. - write_mode = "overwrite" - # Chromosome 23 is X, this is handled downstream. - for chromosome in list(range(1, 24)): - logging_message = f" Processing chromosome {chromosome}" - session.logger.info(logging_message) - ( - UkbPppEurSummaryStats.from_source( - spark=session.spark, - raw_summary_stats_path=raw_summary_stats_path, - tmp_variant_annotation_path=tmp_variant_annotation_path, - chromosome=str(chromosome), - ) - .df - .coalesce(1) - .repartition("studyId", "chromosome") - .write - .partitionBy("studyId", "chromosome") - .mode(write_mode) - .parquet(summary_stats_output_path) - ) - # Now that we have written the first chromosome, change mode to append for subsequent operations. - write_mode = "append" + process_summary_stats_per_chromosome(session, UkbPppEurSummaryStats, raw_summary_stats_path, tmp_variant_annotation_path, summary_stats_output_path) From d10cc207c0bc6c538e887e3fea6f4b1e6eb081ca Mon Sep 17 00:00:00 2001 From: Kirill Tsukanov Date: Wed, 11 Sep 2024 13:30:21 +0100 Subject: [PATCH 030/188] refactor: generalise the harmonisation pipeline (#755) * refactor: move harmonisation into a separate module * refactor: make column names configurable * feat: make INFO and A1FREQ columns optional * docs: expand comments on tmp_variant_annotation_path and variant types --- src/gentropy/common/harmonise.py | 184 ++++++++++++++++++ .../datasource/ukb_ppp_eur/summary_stats.py | 142 ++------------ 2 files changed, 199 insertions(+), 127 deletions(-) create mode 100644 src/gentropy/common/harmonise.py diff --git a/src/gentropy/common/harmonise.py b/src/gentropy/common/harmonise.py new file mode 100644 index 000000000..aac763e43 --- /dev/null +++ b/src/gentropy/common/harmonise.py @@ -0,0 +1,184 @@ +"""Variant harmonisation utilities.""" + +import pyspark.sql.functions as f +import pyspark.sql.types as t +from pyspark.sql import DataFrame, SparkSession + +from gentropy.common.spark_helpers import neglog_pvalue_to_mantissa_and_exponent + + +def harmonise_summary_stats( + spark: SparkSession, + raw_summary_stats_path: str, + tmp_variant_annotation_path: str, + chromosome: str, + colname_position: str, + colname_allele0: str, + colname_allele1: str, + colname_a1freq: str, + colname_info: str, + colname_beta: str, + colname_se: str, + colname_mlog10p: str, + colname_n: str, +) -> DataFrame: + """Ingest and harmonise the summary stats. + + 1. Rename chromosome 23 to X. + 2. Filter out low INFO rows. + 3. Filter out low frequency rows. + 4. Assign variant types. + 5. Create variant ID for joining the variant annotation dataset. + 6. Join with the Variant Annotation dataset. + 7. Drop bad quality variants. + + Args: + spark (SparkSession): Spark session object. + raw_summary_stats_path (str): Input raw summary stats path. + tmp_variant_annotation_path (str): Path to the Variant Annotation dataset which has been further prepared and processed by the per_chromosome module (previous PR in the chain) to speed up the joins in the harmonisation phase. It includes all variants in both the direct (A0/A1) and reverse (A1/A0) orientations, so that the direction of the variant can be easily determined on joining. + chromosome (str): Which chromosome to process. + colname_position (str): Column name for position. + colname_allele0 (str): Column name for allele0. + colname_allele1 (str): Column name for allele1. + colname_a1freq (str): Column name for allele1 frequency (optional). + colname_info (str): Column name for INFO, reflecting variant quality (optional). + colname_beta (str): Column name for beta. + colname_se (str): Column name for beta standard error. + colname_mlog10p (str): Column name for -log10(p). + colname_n (str): Column name for the number of samples. + + Returns: + DataFrame: A harmonised summary stats dataframe. + """ + # Read the precomputed variant annotation dataset. + va_df = ( + spark + .read + .parquet(tmp_variant_annotation_path) + .filter(f.col("vaChromosome") == ("X" if chromosome == "23" else chromosome)) + .persist() + ) + + # Read and process the summary stats dataset. + df = ( + spark + .read + .parquet(raw_summary_stats_path) + .filter(f.col("chromosome") == chromosome) + # Harmonise, 1: Rename chromosome 23 to X. + .withColumn( + "chromosome", + f.when( + f.col("chromosome") == "23", "X" + ).otherwise(f.col("chromosome")) + ) + ) + if colname_info: + # Harmonise, 2: Filter out low INFO rows. + df = df.filter(f.col(colname_info) >= 0.8) + if colname_a1freq: + # Harmonise, 3: Filter out low frequency rows. + df = ( + df + .withColumn( + "MAF", + f.when(f.col(colname_a1freq) < 0.5, f.col(colname_a1freq)) + .otherwise(1 - f.col(colname_a1freq)) + ) + .filter(f.col("MAF") >= 0.0001) + .drop("MAF") + ) + df = ( + df + # Harmonise, 4: Assign variant types. + # There are three possible variant types: + # 1. `snp_c` means an SNP converting a base into its complementary base: A<>T or G> SummaryStatistics: """Ingest and harmonise all summary stats for UKB PPP (EUR) data. - 1. Rename chromosome 23 to X. - 2. Filter out low INFO rows. - 3. Filter out low frequency rows. - 4. Assign variant types. - 5. Create variant ID for joining the variant annotation dataset. - 6. Join with the Variant Annotation dataset. - 7. Drop bad quality variants. - Args: spark (SparkSession): Spark session object. raw_summary_stats_path (str): Input raw summary stats path. @@ -43,122 +33,20 @@ def from_source( Returns: SummaryStatistics: Processed summary statistics dataset for a given chromosome. """ - # Read the precomputed variant annotation dataset. - va_df = ( - spark - .read - .parquet(tmp_variant_annotation_path) - .filter(f.col("vaChromosome") == ("X" if chromosome == "23" else chromosome)) - .persist() - ) - - # Read and process the summary stats dataset. - df = ( - spark - .read - .parquet(raw_summary_stats_path) - .filter(f.col("chromosome") == chromosome) - # Harmonise, 1: Rename chromosome 23 to X. - .withColumn( - "chromosome", - f.when( - f.col("chromosome") == "23", "X" - ).otherwise(f.col("chromosome")) - ) - # Harmonise, 2: Filter out low INFO rows. - .filter(f.col("INFO") >= 0.8) - # Harmonise, 3: Filter out low frequency rows. - .withColumn( - "MAF", - f.when(f.col("A1FREQ") < 0.5, f.col("A1FREQ")) - .otherwise(1 - f.col("A1FREQ")) - ) - .filter(f.col("MAF") >= 0.0001) - .drop("MAF") - # Harmonise, 4: Assign variant types. - .withColumn( - "variant_type", - f.when( - (f.length("ALLELE0") == 1) & (f.length("ALLELE1") == 1), - f.when( - ((f.col("ALLELE0") == "A") & (f.col("ALLELE1") == "T")) | - ((f.col("ALLELE0") == "T") & (f.col("ALLELE1") == "A")) | - ((f.col("ALLELE0") == "G") & (f.col("ALLELE1") == "C")) | - ((f.col("ALLELE0") == "C") & (f.col("ALLELE1") == "G")), - "snp_c" - ) - .otherwise( - "snp_n" - ) - ) - .otherwise( - "indel" - ) - ) - # Harmonise, 5: Create variant ID for joining the variant annotation dataset. - .withColumn( - "GENPOS", - f.col("GENPOS").cast("integer") - ) - .withColumn( - "ukb_ppp_id", - f.concat_ws( - "_", - f.col("chromosome"), - f.col("GENPOS"), - f.col("ALLELE0"), - f.col("ALLELE1") - ) - ) - ) - # Harmonise, 6: Join with the Variant Annotation dataset. - df = ( - df - .join(va_df, (df["chromosome"] == va_df["vaChromosome"]) & (df["ukb_ppp_id"] == va_df["ukb_ppp_id"]), "inner") - .drop("vaChromosome", "ukb_ppp_id") - .withColumn( - "effectAlleleFrequencyFromSource", - f.when( - f.col("direction") == "direct", - f.col("A1FREQ").cast("float") - ).otherwise(1 - f.col("A1FREQ").cast("float")) - ) - .withColumn( - "beta", - f.when( - f.col("direction") == "direct", - f.col("BETA").cast("double") - ).otherwise(-f.col("BETA").cast("double")) - ) - ) - df = ( - # Harmonise, 7: Drop bad quality variants. - df - .filter( - ~ ((f.col("variant_type") == "snp_c") & (f.col("direction") == "flip")) - ) - ) - - # Prepare the fields according to schema. - df = ( - df - .select( - f.col("studyId"), - f.col("chromosome"), - f.col("variantId"), - f.col("beta"), - f.col("GENPOS").cast(t.IntegerType()).alias("position"), - # Parse p-value into mantissa and exponent. - *neglog_pvalue_to_mantissa_and_exponent(f.col("LOG10P").cast(t.DoubleType())), - # Add standard error and sample size information. - f.col("SE").cast("double").alias("standardError"), - f.col("N").cast("integer").alias("sampleSize"), - ) - # Drop rows which don't have proper position or beta value. - .filter( - f.col("position").cast(t.IntegerType()).isNotNull() - & (f.col("beta") != 0) - ) + df = harmonise_summary_stats( + spark, + raw_summary_stats_path, + tmp_variant_annotation_path, + chromosome, + colname_position="GENPOS", + colname_allele0="ALLELE0", + colname_allele1="ALLELE1", + colname_a1freq="A1FREQ", + colname_info="INFO", + colname_beta="BETA", + colname_se="SE", + colname_mlog10p="LOG10P", + colname_n="N", ) # Create the summary statistics object. From a49ae9ace6e0129984000bc15c8092ca142d2c42 Mon Sep 17 00:00:00 2001 From: Kirill Tsukanov Date: Wed, 11 Sep 2024 13:46:40 +0100 Subject: [PATCH 031/188] feat: ingest FinnGen UKB meta-analysis data (#756) * feat: implement FinnGen UKB meta-analysis ingestion and harmonisation * chore: remove ot_finngen_ukb_meta.yaml * chore: remove raw_study_index_path to raw_study_index_path_from_tsv * fix: use session.write_mode * style: rename class to FinngenUkbMetaIngestionStep --- .../ot_ukb_ppp_eur_sumstat_preprocess.yaml | 2 +- src/airflow/dags/ukb_ppp_eur.py | 2 +- src/gentropy/common/harmonise.py | 38 +++++------ src/gentropy/common/per_chromosome.py | 16 ++++- src/gentropy/config.py | 15 ++++- .../datasource/finngen_ukb_meta/__init__.py | 3 + .../finngen_ukb_meta/study_index.py | 62 ++++++++++++++++++ .../finngen_ukb_meta/summary_stats.py | 63 +++++++++++++++++++ .../datasource/ukb_ppp_eur/study_index.py | 6 +- .../datasource/ukb_ppp_eur/summary_stats.py | 2 + src/gentropy/finngen_ukb_meta.py | 49 +++++++++++++++ .../ukb_ppp_eur_sumstat_preprocess.py | 8 +-- 12 files changed, 236 insertions(+), 30 deletions(-) create mode 100644 src/gentropy/datasource/finngen_ukb_meta/__init__.py create mode 100644 src/gentropy/datasource/finngen_ukb_meta/study_index.py create mode 100644 src/gentropy/datasource/finngen_ukb_meta/summary_stats.py create mode 100644 src/gentropy/finngen_ukb_meta.py diff --git a/config/step/ot_ukb_ppp_eur_sumstat_preprocess.yaml b/config/step/ot_ukb_ppp_eur_sumstat_preprocess.yaml index 1f8c108bc..24da7bad0 100644 --- a/config/step/ot_ukb_ppp_eur_sumstat_preprocess.yaml +++ b/config/step/ot_ukb_ppp_eur_sumstat_preprocess.yaml @@ -1,7 +1,7 @@ defaults: - ukb_ppp_eur_sumstat_preprocess -raw_study_index_path: ??? +raw_study_index_path_from_tsv: ??? raw_summary_stats_path: ??? variant_annotation_path: ??? tmp_variant_annotation_path: ??? diff --git a/src/airflow/dags/ukb_ppp_eur.py b/src/airflow/dags/ukb_ppp_eur.py index f0d0e1fe8..c8df8cf5b 100644 --- a/src/airflow/dags/ukb_ppp_eur.py +++ b/src/airflow/dags/ukb_ppp_eur.py @@ -33,7 +33,7 @@ cluster_name=CLUSTER_NAME, step_id="ot_ukb_ppp_eur_sumstat_preprocess", other_args=[ - f"step.raw_study_index_path={UKB_PPP_EUR_STUDY_INDEX}", + f"step.raw_study_index_path_from_tsv={UKB_PPP_EUR_STUDY_INDEX}", f"step.raw_summary_stats_path={UKB_PPP_EUR_SUMMARY_STATS}", f"step.variant_annotation_path={VARIANT_ANNOTATION}", f"step.tmp_variant_annotation_path={TMP_VARIANT_ANNOTATION}", diff --git a/src/gentropy/common/harmonise.py b/src/gentropy/common/harmonise.py index aac763e43..9b570eec6 100644 --- a/src/gentropy/common/harmonise.py +++ b/src/gentropy/common/harmonise.py @@ -15,12 +15,12 @@ def harmonise_summary_stats( colname_position: str, colname_allele0: str, colname_allele1: str, - colname_a1freq: str, - colname_info: str, + colname_a1freq: str | None, + colname_info: str | None, colname_beta: str, colname_se: str, colname_mlog10p: str, - colname_n: str, + colname_n: str | None, ) -> DataFrame: """Ingest and harmonise the summary stats. @@ -40,12 +40,12 @@ def harmonise_summary_stats( colname_position (str): Column name for position. colname_allele0 (str): Column name for allele0. colname_allele1 (str): Column name for allele1. - colname_a1freq (str): Column name for allele1 frequency (optional). - colname_info (str): Column name for INFO, reflecting variant quality (optional). + colname_a1freq (str | None): Column name for allele1 frequency (optional). + colname_info (str | None): Column name for INFO, reflecting variant quality (optional). colname_beta (str): Column name for beta. colname_se (str): Column name for beta standard error. colname_mlog10p (str): Column name for -log10(p). - colname_n (str): Column name for the number of samples. + colname_n (str | None): Column name for the number of samples (optional). Returns: DataFrame: A harmonised summary stats dataframe. @@ -159,20 +159,22 @@ def harmonise_summary_stats( ) # Prepare the fields according to schema. + select_expr = [ + f.col("studyId"), + f.col("chromosome"), + f.col("variantId"), + f.col("beta"), + f.col(colname_position).cast(t.IntegerType()).alias("position"), + # Parse p-value into mantissa and exponent. + *neglog_pvalue_to_mantissa_and_exponent(f.col(colname_mlog10p).cast(t.DoubleType())), + # Add standard error and sample size information. + f.col(colname_se).cast("double").alias("standardError"), + ] + if colname_n: + select_expr.append(f.col(colname_n).cast("integer").alias("sampleSize")) df = ( df - .select( - f.col("studyId"), - f.col("chromosome"), - f.col("variantId"), - f.col("beta"), - f.col(colname_position).cast(t.IntegerType()).alias("position"), - # Parse p-value into mantissa and exponent. - *neglog_pvalue_to_mantissa_and_exponent(f.col(colname_mlog10p).cast(t.DoubleType())), - # Add standard error and sample size information. - f.col(colname_se).cast("double").alias("standardError"), - f.col(colname_n).cast("integer").alias("sampleSize"), - ) + .select(*select_expr) # Drop rows which don't have proper position or beta value. .filter( f.col("position").cast(t.IntegerType()).isNotNull() diff --git a/src/gentropy/common/per_chromosome.py b/src/gentropy/common/per_chromosome.py index 528e6ffbe..f2cedd98e 100644 --- a/src/gentropy/common/per_chromosome.py +++ b/src/gentropy/common/per_chromosome.py @@ -5,6 +5,9 @@ import pyspark.sql.functions as f from pyspark.sql import SparkSession +from gentropy.datasource.finngen_ukb_meta.summary_stats import ( + FinngenUkbMetaSummaryStats, +) from gentropy.datasource.ukb_ppp_eur.summary_stats import UkbPppEurSummaryStats @@ -63,15 +66,23 @@ def prepare_va(session: SparkSession, variant_annotation_path: str, tmp_variant_ ) -def process_summary_stats_per_chromosome(session: SparkSession, ingestion_class: type[UkbPppEurSummaryStats], raw_summary_stats_path: str, tmp_variant_annotation_path: str, summary_stats_output_path: str) -> None: +def process_summary_stats_per_chromosome( + session: SparkSession, + ingestion_class: type[UkbPppEurSummaryStats] | type[FinngenUkbMetaSummaryStats], + raw_summary_stats_path: str, + tmp_variant_annotation_path: str, + summary_stats_output_path: str, + study_index_path: str, + ) -> None: """Processes summary statistics for each chromosome, partitioning and writing results. Args: session (SparkSession): The Spark session to use for distributed data processing. - ingestion_class (type[UkbPppEurSummaryStats]): The class used to handle ingestion of source data. Must have a `from_source` method returning a DataFrame. + ingestion_class (type[UkbPppEurSummaryStats] | type[FinngenUkbMetaSummaryStats]): The class used to handle ingestion of source data. Must have a `from_source` method returning a DataFrame. raw_summary_stats_path (str): The path to the raw summary statistics files. tmp_variant_annotation_path (str): The path to temporary variant annotation data, used for chromosome joins. summary_stats_output_path (str): The output path to write processed summary statistics as parquet files. + study_index_path (str): The path to study index, which is necessary in some cases to populate the sample size column. """ # Set mode to overwrite for processing the first chromosome. write_mode = "overwrite" @@ -85,6 +96,7 @@ def process_summary_stats_per_chromosome(session: SparkSession, ingestion_class: raw_summary_stats_path=raw_summary_stats_path, tmp_variant_annotation_path=tmp_variant_annotation_path, chromosome=str(chromosome), + study_index_path=study_index_path, ) .df .coalesce(1) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 9089dbecf..181e9042d 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -299,7 +299,7 @@ class PICSConfig(StepConfig): class UkbPppEurConfig(StepConfig): """UKB PPP (EUR) ingestion step configuration.""" - raw_study_index_path: str = MISSING + raw_study_index_path_from_tsv: str = MISSING raw_summary_stats_path: str = MISSING tmp_variant_annotation_path: str = MISSING variant_annotation_path: str = MISSING @@ -308,6 +308,19 @@ class UkbPppEurConfig(StepConfig): _target_: str = "gentropy.ukb_ppp_eur_sumstat_preprocess.UkbPppEurStep" +@dataclass +class FinngenUkbMetaConfig(StepConfig): + """FinnGen UKB meta-analysis ingestion step configuration.""" + + raw_study_index_path_from_tsv: str = MISSING + raw_summary_stats_path: str = MISSING + tmp_variant_annotation_path: str = MISSING + variant_annotation_path: str = MISSING + study_index_output_path: str = MISSING + summary_stats_output_path: str = MISSING + _target_: str = "gentropy.finngen_ukb_meta.FinngenUkbMetaIngestionStep" + + @dataclass class GnomadVariantConfig(StepConfig): """Gnomad variant ingestion step configuration.""" diff --git a/src/gentropy/datasource/finngen_ukb_meta/__init__.py b/src/gentropy/datasource/finngen_ukb_meta/__init__.py new file mode 100644 index 000000000..8d6cfd752 --- /dev/null +++ b/src/gentropy/datasource/finngen_ukb_meta/__init__.py @@ -0,0 +1,3 @@ +"""FinnGen UKB meta-analysis data source.""" + +from __future__ import annotations diff --git a/src/gentropy/datasource/finngen_ukb_meta/study_index.py b/src/gentropy/datasource/finngen_ukb_meta/study_index.py new file mode 100644 index 000000000..76e82f0eb --- /dev/null +++ b/src/gentropy/datasource/finngen_ukb_meta/study_index.py @@ -0,0 +1,62 @@ +"""Study Index for Finngen data source.""" +from __future__ import annotations + +import pyspark.sql.functions as f +from pyspark.sql import SparkSession + +from gentropy.dataset.study_index import StudyIndex + + +class FinngenUkbMetaStudyIndex(StudyIndex): + """Study index dataset from FinnGen UKB meta-analysis.""" + + @classmethod + def from_source( + cls: type[FinngenUkbMetaStudyIndex], + spark: SparkSession, + raw_study_index_path_from_tsv: str, + ) -> StudyIndex: + """This function ingests study level metadata from FinnGen UKB meta-analysis. + + Args: + spark (SparkSession): Spark session object. + raw_study_index_path_from_tsv (str): Raw study index path. + + Returns: + StudyIndex: Parsed and annotated FinnGen UKB meta-analysis study table. + """ + # Read the raw study index and process. + study_index_df = ( + spark.read.csv(raw_study_index_path_from_tsv, sep="\t", header=True) + .select( + f.lit("gwas").alias("studyType"), + f.lit("FINNGEN_R11_UKB_META").alias("projectId"), + f.col("_gentropy_study_id").alias("studyId"), + f.col("name").alias("traitFromSource"), + f.lit(True).alias("hasSumstats"), + f.col("_gentropy_summary_stats_link").alias("summarystatsLocation"), + (f.col("fg_n_cases") + f.col("ukbb_n_cases") + f.col("fg_n_controls") + f.col("ukbb_n_controls")).alias("nSamples") + ) + ) + # Add population structure. + study_index_df = ( + study_index_df + .withColumn( + "discoverySamples", + f.array( + f.struct( + f.col("nSamples").cast("integer").alias("sampleSize"), + f.lit("European").alias("ancestry"), + ) + ) + ) + .withColumn( + "ldPopulationStructure", + cls.aggregate_and_map_ancestries(f.col("discoverySamples")), + ) + ) + + return StudyIndex( + _df=study_index_df, + _schema=StudyIndex.get_schema(), + ) diff --git a/src/gentropy/datasource/finngen_ukb_meta/summary_stats.py b/src/gentropy/datasource/finngen_ukb_meta/summary_stats.py new file mode 100644 index 000000000..6e45736c3 --- /dev/null +++ b/src/gentropy/datasource/finngen_ukb_meta/summary_stats.py @@ -0,0 +1,63 @@ +"""Summary statistics ingestion for FinnGen UKB meta-analysis.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from pyspark.sql import SparkSession + +from gentropy.common.harmonise import harmonise_summary_stats +from gentropy.dataset.summary_statistics import SummaryStatistics + + +@dataclass +class FinngenUkbMetaSummaryStats: + """Summary statistics dataset for FinnGen UKB meta-analysis.""" + + @classmethod + def from_source( + cls: type[FinngenUkbMetaSummaryStats], + spark: SparkSession, + raw_summary_stats_path: str, + tmp_variant_annotation_path: str, + chromosome: str, + study_index_path: str, + ) -> SummaryStatistics: + """Ingest and harmonise all summary stats for FinnGen UKB meta-analysis data. + + Args: + spark (SparkSession): Spark session object. + raw_summary_stats_path (str): Input raw summary stats path. + tmp_variant_annotation_path (str): Input variant annotation dataset path. + chromosome (str): Which chromosome to process. + study_index_path (str): The path to study index, which is necessary in some cases to populate the sample size column. + + Returns: + SummaryStatistics: Processed summary statistics dataset for a given chromosome. + """ + # Run the harmonisation steps. + df = harmonise_summary_stats( + spark, + raw_summary_stats_path, + tmp_variant_annotation_path, + chromosome, + colname_position="POS", + colname_allele0="REF", + colname_allele1="ALT", + colname_a1freq=None, + colname_info=None, + colname_beta="all_inv_var_meta_beta", + colname_se="all_inv_var_meta_sebeta", + colname_mlog10p="all_inv_var_meta_mlogp", + colname_n=None, + ) + + # Populate the sample size column from the study index. + study_index = spark.read.parquet(study_index_path).select("studyId", "nSamples") + df = df.join(study_index, on=["studyId"], how="inner") + + # Create the summary statistics object. + return SummaryStatistics( + _df=df, + _schema=SummaryStatistics.get_schema(), + ) diff --git a/src/gentropy/datasource/ukb_ppp_eur/study_index.py b/src/gentropy/datasource/ukb_ppp_eur/study_index.py index 3e0a7d782..f694b9a47 100644 --- a/src/gentropy/datasource/ukb_ppp_eur/study_index.py +++ b/src/gentropy/datasource/ukb_ppp_eur/study_index.py @@ -14,14 +14,14 @@ class UkbPppEurStudyIndex(StudyIndex): def from_source( cls: type[UkbPppEurStudyIndex], spark: SparkSession, - raw_study_index_path: str, + raw_study_index_path_from_tsv: str, raw_summary_stats_path: str, ) -> StudyIndex: """This function ingests study level metadata from UKB PPP (EUR). Args: spark (SparkSession): Spark session object. - raw_study_index_path (str): Raw study index path. + raw_study_index_path_from_tsv (str): Raw study index path. raw_summary_stats_path (str): Raw summary stats path. Returns: @@ -39,7 +39,7 @@ def from_source( ) # Now we can read the raw study index and complete the processing. study_index_df = ( - spark.read.csv(raw_study_index_path, sep="\t", header=True) + spark.read.csv(raw_study_index_path_from_tsv, sep="\t", header=True) .select( f.lit("pqtl").alias("studyType"), f.lit("UKB_PPP_EUR").alias("projectId"), diff --git a/src/gentropy/datasource/ukb_ppp_eur/summary_stats.py b/src/gentropy/datasource/ukb_ppp_eur/summary_stats.py index a0480f740..5ded9c891 100644 --- a/src/gentropy/datasource/ukb_ppp_eur/summary_stats.py +++ b/src/gentropy/datasource/ukb_ppp_eur/summary_stats.py @@ -21,6 +21,7 @@ def from_source( raw_summary_stats_path: str, tmp_variant_annotation_path: str, chromosome: str, + study_index_path: str, ) -> SummaryStatistics: """Ingest and harmonise all summary stats for UKB PPP (EUR) data. @@ -29,6 +30,7 @@ def from_source( raw_summary_stats_path (str): Input raw summary stats path. tmp_variant_annotation_path (str): Input variant annotation dataset path. chromosome (str): Which chromosome to process. + study_index_path (str): The path to study index, which is necessary in some cases to populate the sample size column. Returns: SummaryStatistics: Processed summary statistics dataset for a given chromosome. diff --git a/src/gentropy/finngen_ukb_meta.py b/src/gentropy/finngen_ukb_meta.py new file mode 100644 index 000000000..eafd2a659 --- /dev/null +++ b/src/gentropy/finngen_ukb_meta.py @@ -0,0 +1,49 @@ +"""Step to run FinnGen UKB meta-analysis data ingestion.""" + +from __future__ import annotations + +from gentropy.common.per_chromosome import ( + prepare_va, + process_summary_stats_per_chromosome, +) +from gentropy.common.session import Session +from gentropy.datasource.finngen_ukb_meta.study_index import FinngenUkbMetaStudyIndex +from gentropy.datasource.finngen_ukb_meta.summary_stats import ( + FinngenUkbMetaSummaryStats, +) + + +class FinngenUkbMetaIngestionStep: + """FinnGen UKB meta-analysis data ingestion and harmonisation.""" + + def __init__( + self, session: Session, raw_study_index_path_from_tsv: str, raw_summary_stats_path: str, variant_annotation_path: str, tmp_variant_annotation_path: str, study_index_output_path: str, summary_stats_output_path: str + ) -> None: + """Data ingestion and harmonisation step for FinnGen UKB meta-analysis. + + Args: + session (Session): Session object. + raw_study_index_path_from_tsv (str): Input raw study index path. + raw_summary_stats_path (str): Input raw summary stats path. + variant_annotation_path (str): Input variant annotation dataset path. + tmp_variant_annotation_path (str): Temporary output path for variant annotation dataset. + study_index_output_path (str): Study index output path. + summary_stats_output_path (str): Summary stats output path. + """ + session.logger.info("Pre-compute the direct and flipped variant annotation dataset.") + prepare_va(session, variant_annotation_path, tmp_variant_annotation_path) + + session.logger.info("Process study index.") + ( + FinngenUkbMetaStudyIndex.from_source( + spark=session.spark, + raw_study_index_path_from_tsv=raw_study_index_path_from_tsv, + ) + .df + .write + .mode(session.write_mode) + .parquet(study_index_output_path) + ) + + session.logger.info("Process and harmonise summary stats.") + process_summary_stats_per_chromosome(session, FinngenUkbMetaSummaryStats, raw_summary_stats_path, tmp_variant_annotation_path, summary_stats_output_path, study_index_output_path) diff --git a/src/gentropy/ukb_ppp_eur_sumstat_preprocess.py b/src/gentropy/ukb_ppp_eur_sumstat_preprocess.py index b192d963f..3cee45c6a 100644 --- a/src/gentropy/ukb_ppp_eur_sumstat_preprocess.py +++ b/src/gentropy/ukb_ppp_eur_sumstat_preprocess.py @@ -15,13 +15,13 @@ class UkbPppEurStep: """UKB PPP (EUR) data ingestion and harmonisation.""" def __init__( - self, session: Session, raw_study_index_path: str, raw_summary_stats_path: str, variant_annotation_path: str, tmp_variant_annotation_path: str, study_index_output_path: str, summary_stats_output_path: str + self, session: Session, raw_study_index_path_from_tsv: str, raw_summary_stats_path: str, variant_annotation_path: str, tmp_variant_annotation_path: str, study_index_output_path: str, summary_stats_output_path: str ) -> None: """Run UKB PPP (EUR) data ingestion and harmonisation step. Args: session (Session): Session object. - raw_study_index_path (str): Input raw study index path. + raw_study_index_path_from_tsv (str): Input raw study index path. raw_summary_stats_path (str): Input raw summary stats path. variant_annotation_path (str): Input variant annotation dataset path. tmp_variant_annotation_path (str): Temporary output path for variant annotation dataset. @@ -35,7 +35,7 @@ def __init__( ( UkbPppEurStudyIndex.from_source( spark=session.spark, - raw_study_index_path=raw_study_index_path, + raw_study_index_path_from_tsv=raw_study_index_path_from_tsv, raw_summary_stats_path=raw_summary_stats_path, ) .df @@ -45,4 +45,4 @@ def __init__( ) session.logger.info("Process and harmonise summary stats.") - process_summary_stats_per_chromosome(session, UkbPppEurSummaryStats, raw_summary_stats_path, tmp_variant_annotation_path, summary_stats_output_path) + process_summary_stats_per_chromosome(session, UkbPppEurSummaryStats, raw_summary_stats_path, tmp_variant_annotation_path, summary_stats_output_path, study_index_output_path) From 6469bf5dc4311675a90ae60e3ed1a8f0b4da9349 Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Thu, 12 Sep 2024 14:17:21 +0100 Subject: [PATCH 032/188] feat(validation): adding credible set variant validation (#757) * feat(validation): adding logic to validate credible sets against variant index * fix: tidying docstrings --- src/gentropy/dataset/study_locus.py | 66 +++++++++++++++- tests/gentropy/dataset/test_study_locus.py | 89 ++++++++++++++++++++++ 2 files changed, 154 insertions(+), 1 deletion(-) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index edf9dc8be..b59d57650 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -8,7 +8,7 @@ import numpy as np import pyspark.sql.functions as f -from pyspark.sql.types import FloatType, StringType +from pyspark.sql.types import ArrayType, FloatType, StringType from gentropy.common.schemas import parse_spark_schema from gentropy.common.spark_helpers import ( @@ -18,6 +18,7 @@ from gentropy.common.utils import get_logsum, parse_region from gentropy.dataset.dataset import Dataset from gentropy.dataset.study_locus_overlap import StudyLocusOverlap +from gentropy.dataset.variant_index import VariantIndex from gentropy.method.clump import LDclumping if TYPE_CHECKING: @@ -47,6 +48,7 @@ class StudyLocusQualityCheck(Enum): FAILED_STUDY (str): Flagging study loci if the study has failed QC MISSING_STUDY (str): Flagging study loci if the study is not found in the study index as a reference DUPLICATED_STUDYLOCUS_ID (str): Study-locus identifier is not unique. + INVALID_VARIANT_IDENTIFIER (str): Flagging study loci where identifier of any tagging variant was not found in the variant index """ SUBSIGNIFICANT_FLAG = "Subsignificant p-value" @@ -65,6 +67,9 @@ class StudyLocusQualityCheck(Enum): FAILED_STUDY = "Study has failed quality controls" MISSING_STUDY = "Study not found in the study index" DUPLICATED_STUDYLOCUS_ID = "Non-unique study locus identifier" + INVALID_VARIANT_IDENTIFIER = ( + "Some variant identifiers of this locus were not found in variant index" + ) class CredibleInterval(Enum): @@ -141,6 +146,65 @@ def validate_study(self: StudyLocus, study_index: StudyIndex) -> StudyLocus: _schema=self.get_schema(), ) + def validate_variant_identifiers( + self: StudyLocus, variant_index: VariantIndex + ) -> StudyLocus: + """Flagging study loci, where tagging variant identifiers are not found in variant index. + + Args: + variant_index (VariantIndex): Variant index to resolve variant identifiers. + + Returns: + StudyLocus: Updated study locus with quality control flags. + """ + # QC column might not be present in the variant index schema, so we have to be ready to handle it: + qc_select_expression = ( + f.col("qualityControls") + if "qualityControls" in self.df.columns + else f.lit(None).cast(ArrayType(StringType())) + ) + + # Find out which study loci have variants not in the variant index: + flag = ( + self.df + # Exploding locus: + .select("studyLocusId", f.explode("locus").alias("locus")) + .select("studyLocusId", "locus.variantId") + # Join with variant index variants: + .join( + variant_index.df.select( + "variantId", f.lit(True).alias("inVariantIndex") + ), + on="variantId", + how="left", + ) + # Flagging variants not in the variant index: + .withColumn("inVariantIndex", f.col("inVariantIndex").isNotNull()) + # Flagging study loci with ANY variants not in the variant index: + .groupBy("studyLocusId") + .agg(f.collect_set("inVariantIndex").alias("inVariantIndex")) + .select( + "studyLocusId", + f.array_contains("inVariantIndex", False).alias("toFlag"), + ) + ) + + return StudyLocus( + _df=( + self.df.join(flag, on="studyLocusId", how="left") + .withColumn( + "qualityControls", + self.update_quality_flag( + qc_select_expression, + f.col("toFlag"), + StudyLocusQualityCheck.INVALID_VARIANT_IDENTIFIER, + ), + ) + .drop("toFlag") + ), + _schema=self.get_schema(), + ) + def validate_lead_pvalue(self: StudyLocus, pvalue_cutoff: float) -> StudyLocus: """Flag associations below significant threshold. diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index 9b40796db..c7538b28b 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -27,6 +27,7 @@ ) from gentropy.dataset.study_locus_overlap import StudyLocusOverlap from gentropy.dataset.summary_statistics import SummaryStatistics +from gentropy.dataset.variant_index import VariantIndex @pytest.mark.parametrize( @@ -562,6 +563,94 @@ def test_annotate_locus_statistics_boundaries( ) +class TestStudyLocusVariantValidation: + """Collection of tests for StudyLocus variant validation.""" + + VARIANT_DATA = [ + ("v1", "c1", 1, "r", "a"), + ("v2", "c1", 2, "r", "a"), + ("v3", "c1", 3, "r", "a"), + ("v4", "c1", 4, "r", "a"), + ] + VARIANT_HEADERS = [ + "variantId", + "chromosome", + "position", + "referenceAllele", + "alternateAllele", + ] + + STUDYLOCUS_DATA = [ + # First studylocus passes qc: + (1, "v1", "s1", "v1"), + (1, "v1", "s1", "v2"), + (1, "v1", "s1", "v3"), + # Second studylocus passes qc: + (2, "v1", "s1", "v1"), + (2, "v1", "s1", "v5"), + ] + STUDYLOCUS_HEADER = ["studyLocusId", "variantId", "studyId", "tagVariantId"] + + @pytest.fixture(autouse=True) + def _setup(self: TestStudyLocusVariantValidation, spark: SparkSession) -> None: + """Setup study locus for testing.""" + self.variant_index = VariantIndex( + _df=spark.createDataFrame( + self.VARIANT_DATA, self.VARIANT_HEADERS + ).withColumn("position", f.col("position").cast(t.IntegerType())), + _schema=VariantIndex.get_schema(), + ) + + self.credible_set = StudyLocus( + _df=( + spark.createDataFrame(self.STUDYLOCUS_DATA, self.STUDYLOCUS_HEADER) + .withColumn("studyLocusId", f.col("studyLocusId").cast(t.LongType())) + .withColumn("qualityControls", f.array()) + .groupBy("studyLocusId", "variantId", "studyId") + .agg( + f.collect_set( + f.struct(f.col("tagVariantId").alias("variantId")) + ).alias("locus") + ) + ), + _schema=StudyLocus.get_schema(), + ) + + def test_validation_return_type(self: TestStudyLocusVariantValidation) -> None: + """Testing if the validation returns the right type.""" + assert isinstance( + self.credible_set.validate_variant_identifiers(self.variant_index), + StudyLocus, + ) + + def test_validation_no_data_loss(self: TestStudyLocusVariantValidation) -> None: + """Testing if the validation returns same number of rows.""" + assert ( + self.credible_set.validate_variant_identifiers( + self.variant_index + ).df.count() + == self.credible_set.df.count() + ) + + def test_validation_correctness(self: TestStudyLocusVariantValidation) -> None: + """Testing if the validation flags the right number of variants.""" + # Execute validation: + validated = self.credible_set.validate_variant_identifiers( + self.variant_index + ).df + + # Make sure there's only one study locus with a failed variants: + assert validated.filter(f.size("qualityControls") > 0).count() == 1 + + # Check that the right one is flagged: + assert ( + validated.filter( + (f.size("qualityControls") > 0) & (f.col("studyLocusId") == 2) + ).count() + == 1 + ) + + class TestStudyLocusValidation: """Collection of tests for StudyLocus validation.""" From d3435bb03682e829fbf9fe1bbe2e2e3df5344528 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Thu, 12 Sep 2024 22:56:24 +0200 Subject: [PATCH 033/188] feat: drop airflow orchestration layer from gentropy (#758) * refactor: drop gentropy config * feat(airflow): removal of airflow orchestration BREAKING CHANGE: see https://github.com/opentargets/orchestration --------- Co-authored-by: Szymon Szyszkowski --- config/__init__.py | 3 - config/datasets/ot_gcp.yaml | 80 - config/ot_config.yaml | 5 - config/step/ot_colocalisation_coloc.yaml | 7 - config/step/ot_colocalisation_ecaviar.yaml | 7 - config/step/ot_eqtl_catalogue.yaml | 10 - config/step/ot_gene_index.yaml | 5 - config/step/ot_gwas_catalog_ingestion.yaml | 12 - .../step/ot_gwas_catalog_study_curation.yaml | 8 - .../step/ot_gwas_catalog_study_inclusion.yaml | 12 - config/step/ot_ld_based_clumping.yaml | 7 - config/step/ot_ld_index.yaml | 20 - config/step/ot_locus_to_gene_predict.yaml | 11 - config/step/ot_locus_to_gene_train.yaml | 19 - .../ot_ukb_ppp_eur_sumstat_preprocess.yaml | 13 - config/step/ot_variant_index.yaml | 6 - config/step/ot_variant_to_gene.yaml | 13 - config/step/session/dataproc.yaml | 5 - docs/development/airflow.md | 124 - docs/development/contributing.md | 36 +- docs/development/workflows.md | 27 - mkdocs.yml | 3 +- poetry.lock | 3737 +---------------- pyproject.toml | 8 +- src/airflow/.env | 6 - src/airflow/Dockerfile | 33 - src/airflow/config/.gitkeep | 0 src/airflow/dags/.gitkeep | 0 src/airflow/dags/common_airflow.py | 490 --- src/airflow/dags/configs/dag.yaml | 17 - src/airflow/dags/configs/variant_sources.yaml | 13 - src/airflow/dags/data_validation.py | 96 - src/airflow/dags/eqtl_preprocess.py | 73 - src/airflow/dags/genetics_etl.py | 154 - src/airflow/dags/gnomad_preprocess.py | 29 - .../dags/gwas_catalog_harmonisation.py | 125 - src/airflow/dags/gwas_catalog_preprocess.py | 223 - src/airflow/dags/gwas_curation_update.py | 34 - src/airflow/dags/ukb_ppp_eur.py | 45 - src/airflow/dags/variant_index.py | 321 -- src/airflow/docker-compose.yaml | 228 - src/airflow/logs/.gitkeep | 0 src/airflow/plugins/.gitkeep | 0 src/airflow/requirements.txt | 3 - tests/airflow/test_dag.py | 51 - 45 files changed, 149 insertions(+), 5970 deletions(-) delete mode 100644 config/__init__.py delete mode 100644 config/datasets/ot_gcp.yaml delete mode 100644 config/ot_config.yaml delete mode 100644 config/step/ot_colocalisation_coloc.yaml delete mode 100644 config/step/ot_colocalisation_ecaviar.yaml delete mode 100644 config/step/ot_eqtl_catalogue.yaml delete mode 100644 config/step/ot_gene_index.yaml delete mode 100644 config/step/ot_gwas_catalog_ingestion.yaml delete mode 100644 config/step/ot_gwas_catalog_study_curation.yaml delete mode 100644 config/step/ot_gwas_catalog_study_inclusion.yaml delete mode 100644 config/step/ot_ld_based_clumping.yaml delete mode 100644 config/step/ot_ld_index.yaml delete mode 100644 config/step/ot_locus_to_gene_predict.yaml delete mode 100644 config/step/ot_locus_to_gene_train.yaml delete mode 100644 config/step/ot_ukb_ppp_eur_sumstat_preprocess.yaml delete mode 100644 config/step/ot_variant_index.yaml delete mode 100644 config/step/ot_variant_to_gene.yaml delete mode 100644 config/step/session/dataproc.yaml delete mode 100644 docs/development/airflow.md delete mode 100644 docs/development/workflows.md delete mode 100644 src/airflow/.env delete mode 100644 src/airflow/Dockerfile delete mode 100644 src/airflow/config/.gitkeep delete mode 100644 src/airflow/dags/.gitkeep delete mode 100644 src/airflow/dags/common_airflow.py delete mode 100644 src/airflow/dags/configs/dag.yaml delete mode 100644 src/airflow/dags/configs/variant_sources.yaml delete mode 100644 src/airflow/dags/data_validation.py delete mode 100644 src/airflow/dags/eqtl_preprocess.py delete mode 100644 src/airflow/dags/genetics_etl.py delete mode 100644 src/airflow/dags/gnomad_preprocess.py delete mode 100644 src/airflow/dags/gwas_catalog_harmonisation.py delete mode 100644 src/airflow/dags/gwas_catalog_preprocess.py delete mode 100644 src/airflow/dags/gwas_curation_update.py delete mode 100644 src/airflow/dags/ukb_ppp_eur.py delete mode 100644 src/airflow/dags/variant_index.py delete mode 100644 src/airflow/docker-compose.yaml delete mode 100644 src/airflow/logs/.gitkeep delete mode 100644 src/airflow/plugins/.gitkeep delete mode 100644 src/airflow/requirements.txt delete mode 100644 tests/airflow/test_dag.py diff --git a/config/__init__.py b/config/__init__.py deleted file mode 100644 index 31939863f..000000000 --- a/config/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Reference configuration yamls.""" - -from __future__ import annotations diff --git a/config/datasets/ot_gcp.yaml b/config/datasets/ot_gcp.yaml deleted file mode 100644 index a8d8886bb..000000000 --- a/config/datasets/ot_gcp.yaml +++ /dev/null @@ -1,80 +0,0 @@ -# Release specific configuration: -release_version: "24.06" -dev_version: XX.XX -release_folder: gs://genetics_etl_python_playground/releases/${datasets.release_version} - -inputs: gs://genetics_etl_python_playground/input -static_assets: gs://genetics_etl_python_playground/static_assets -outputs: gs://genetics_etl_python_playground/output/python_etl/parquet/${datasets.dev_version} - -## Datasets: -# GWAS -gwas_catalog_dataset: gs://gwas_catalog_data -# Ingestion input files: -gwas_catalog_associations: ${datasets.gwas_catalog_dataset}/curated_inputs/gwas_catalog_associations_ontology_annotated.tsv -gwas_catalog_studies: - - ${datasets.gwas_catalog_dataset}/curated_inputs/gwas_catalog_download_studies.tsv - - ${datasets.gwas_catalog_dataset}/curated_inputs/gwas_catalog_unpublished_studies.tsv -gwas_catalog_ancestries: - - ${datasets.gwas_catalog_dataset}/curated_inputs/gwas_catalog_download_ancestries.tsv - - ${datasets.gwas_catalog_dataset}/curated_inputs/gwas_catalog_unpublished_ancestries.tsv -gwas_catalog_sumstats_lut: ${datasets.gwas_catalog_dataset}/curated_inputs/harmonised_list.txt -gwas_catalog_study_curation: ${datasets.gwas_catalog_dataset}/manifests/gwas_catalog_study_curation.tsv -# Harmonised summary statistics list: -gwas_catalog_summary_stats_list: ${datasets.gwas_catalog_dataset}/manifests/gwas_catalog_harmonised_summary_statistics_list.txt -# Inclusion lists: -gwas_catalog_curated_inclusion_list: ${datasets.gwas_catalog_dataset}/manifests/gwas_catalog_curation_included_studies -gwas_catalog_summary_statistics_inclusion_list: ${datasets.gwas_catalog_dataset}/manifests/gwas_catalog_summary_statistics_included_studies -# Ingestion output folders: -gwas_catalog_study_index: ${datasets.gwas_catalog_dataset}/study_index -gwas_catalog_study_locus_folder: ${datasets.gwas_catalog_dataset}/study_locus_datasets -gwas_catalog_credible_set_folder: ${datasets.gwas_catalog_dataset}/credible_set_datasets - -# GnomAD -gnomad_public_bucket: gs://gcp-public-data--gnomad/release/ -# LD generation -# Templates require placeholders {POP} to expand template to match multiple populationwise paths -ld_matrix_template: ${datasets.gnomad_public_bucket}/2.1.1/ld/gnomad.genomes.r2.1.1.{POP}.common.adj.ld.bm -ld_index_raw_template: ${datasets.gnomad_public_bucket}/2.1.1/ld/gnomad.genomes.r2.1.1.{POP}.common.ld.variant_indices.ht -liftover_ht_path: ${datasets.gnomad_public_bucket}/2.1.1/liftover_grch38/ht/genomes/gnomad.genomes.r2.1.1.sites.liftover_grch38.ht -# GnomAD variant set: -gnomad_genomes_path: ${datasets.gnomad_public_bucket}4.0/ht/genomes/gnomad.genomes.v4.0.sites.ht/ - -# Others -chain_38_37: gs://hail-common/references/grch38_to_grch37.over.chain.gz -chain_37_38: ${datasets.static_assets}/grch37_to_grch38.over.chain -vep_consequences: ${datasets.static_assets}/variant_consequence_to_score.tsv -anderson: ${datasets.static_assets}/andersson2014/enhancer_tss_associations.bed -javierre: ${datasets.static_assets}/javierre_2016_preprocessed -jung: ${datasets.static_assets}/jung2019_pchic_tableS3.csv -thurman: ${datasets.static_assets}/thurman2012/genomewideCorrs_above0.7_promoterPlusMinus500kb_withGeneNames_32celltypeCategories.bed8.gz -target_index: ${datasets.static_assets}/targets # OTP 23.12 data -gene_interactions: ${datasets.static_assets}/interaction # OTP 23.12 data - -# Dev output datasets -gnomad_variants: ${datasets.static_assets}/gnomad_variants -study_locus: ${datasets.outputs}/study_locus -summary_statistics: ${datasets.outputs}/summary_statistics -study_locus_overlap: ${datasets.outputs}/study_locus_overlap -susie_finemapping: ${datasets.outputs}/finngen_susie_finemapping - -ld_index: ${datasets.static_assets}/ld_index -catalog_study_index: ${datasets.study_index}/catalog -catalog_study_locus: ${datasets.study_locus}/catalog_study_locus - -from_sumstats_study_locus: ${datasets.study_locus}/from_sumstats -from_sumstats_pics: ${datasets.credible_set}/from_sumstats - -vep_output_path: gs://genetics_etl_python_playground/vep/full_variant_index_vcf - -# ETL output datasets: -l2g_gold_standard_curation: ${datasets.release_folder}/locus_to_gene_gold_standard.json -l2g_model: ${datasets.release_folder}/locus_to_gene_model/classifier.skops -l2g_predictions: ${datasets.release_folder}/locus_to_gene_predictions -l2g_feature_matrix: ${datasets.release_folder}/locus_to_gene_feature_matrix -colocalisation: ${datasets.release_folder}/colocalisation -study_index: ${datasets.release_folder}/study_index -variant_index: ${datasets.release_folder}/variant_index -credible_set: ${datasets.release_folder}/credible_set -gene_index: ${datasets.release_folder}/gene_index -variant_to_gene: ${datasets.release_folder}/variant_to_gene diff --git a/config/ot_config.yaml b/config/ot_config.yaml deleted file mode 100644 index 7f28a58d6..000000000 --- a/config/ot_config.yaml +++ /dev/null @@ -1,5 +0,0 @@ -defaults: - - config - - datasets: ot_gcp - - _self_ - - override step/session: dataproc diff --git a/config/step/ot_colocalisation_coloc.yaml b/config/step/ot_colocalisation_coloc.yaml deleted file mode 100644 index f01335514..000000000 --- a/config/step/ot_colocalisation_coloc.yaml +++ /dev/null @@ -1,7 +0,0 @@ -defaults: - - colocalisation - -credible_set_path: ${datasets.credible_set} -study_index_path: ${datasets.study_index} -coloc_path: ${datasets.colocalisation} -colocalisation_method: Coloc diff --git a/config/step/ot_colocalisation_ecaviar.yaml b/config/step/ot_colocalisation_ecaviar.yaml deleted file mode 100644 index d57887c93..000000000 --- a/config/step/ot_colocalisation_ecaviar.yaml +++ /dev/null @@ -1,7 +0,0 @@ -defaults: - - colocalisation - -credible_set_path: ${datasets.credible_set} -study_index_path: ${datasets.study_index} -coloc_path: ${datasets.colocalisation} -colocalisation_method: ECaviar diff --git a/config/step/ot_eqtl_catalogue.yaml b/config/step/ot_eqtl_catalogue.yaml deleted file mode 100644 index 7d4441864..000000000 --- a/config/step/ot_eqtl_catalogue.yaml +++ /dev/null @@ -1,10 +0,0 @@ -defaults: - - eqtl_catalogue - -eqtl_catalogue_paths_imported: ??? -eqtl_catalogue_study_index_out: ??? -eqtl_catalogue_credible_sets_out: ??? -mqtl_quantification_methods_blacklist: [] -session: - extended_spark_conf: - "spark.sql.shuffle.partitions": "3200" diff --git a/config/step/ot_gene_index.yaml b/config/step/ot_gene_index.yaml deleted file mode 100644 index ce5971bf9..000000000 --- a/config/step/ot_gene_index.yaml +++ /dev/null @@ -1,5 +0,0 @@ -defaults: - - gene_index - -target_path: ${datasets.target_index} -gene_index_path: ${datasets.gene_index} diff --git a/config/step/ot_gwas_catalog_ingestion.yaml b/config/step/ot_gwas_catalog_ingestion.yaml deleted file mode 100644 index 8acc07d62..000000000 --- a/config/step/ot_gwas_catalog_ingestion.yaml +++ /dev/null @@ -1,12 +0,0 @@ -defaults: - - gwas_catalog_ingestion - -catalog_study_files: ${datasets.gwas_catalog_studies} -catalog_ancestry_files: ${datasets.gwas_catalog_ancestries} -catalog_associations_file: ${datasets.gwas_catalog_associations} -catalog_sumstats_lut: ${datasets.gwas_catalog_sumstats_lut} -variant_annotation_path: ${datasets.gnomad_variants} -catalog_studies_out: ${datasets.gwas_catalog_study_index} -catalog_associations_out: ${datasets.gwas_catalog_study_locus_folder}/gwas_catalog_curated_associations -gwas_catalog_study_curation_file: ${datasets.gwas_catalog_study_curation} -inclusion_list_path: ${datasets.gwas_catalog_curated_inclusion_list} diff --git a/config/step/ot_gwas_catalog_study_curation.yaml b/config/step/ot_gwas_catalog_study_curation.yaml deleted file mode 100644 index 77c1d7834..000000000 --- a/config/step/ot_gwas_catalog_study_curation.yaml +++ /dev/null @@ -1,8 +0,0 @@ -defaults: - - gwas_catalog_study_curation - -catalog_study_files: ${datasets.gwas_catalog_studies} -catalog_ancestry_files: ${datasets.gwas_catalog_ancestries} -catalog_sumstats_lut: ${datasets.gwas_catalog_sumstats_lut} -gwas_catalog_study_curation_file: ${datasets.gwas_catalog_study_curation} -gwas_catalog_study_curation_out: ??? diff --git a/config/step/ot_gwas_catalog_study_inclusion.yaml b/config/step/ot_gwas_catalog_study_inclusion.yaml deleted file mode 100644 index 41590333c..000000000 --- a/config/step/ot_gwas_catalog_study_inclusion.yaml +++ /dev/null @@ -1,12 +0,0 @@ -defaults: - - gwas_catalog_study_inclusion - -catalog_study_files: ${datasets.gwas_catalog_studies} -catalog_ancestry_files: ${datasets.gwas_catalog_ancestries} -catalog_associations_file: ${datasets.gwas_catalog_associations} -variant_annotation_path: ${datasets.gnomad_variants} -gwas_catalog_study_curation_file: ${datasets.gwas_catalog_study_curation} -harmonised_study_file: ${datasets.gwas_catalog_summary_stats_list} -criteria: ??? -inclusion_list_path: ??? -exclusion_list_path: ??? diff --git a/config/step/ot_ld_based_clumping.yaml b/config/step/ot_ld_based_clumping.yaml deleted file mode 100644 index f836145c2..000000000 --- a/config/step/ot_ld_based_clumping.yaml +++ /dev/null @@ -1,7 +0,0 @@ -defaults: - - ld_based_clumping - -ld_index_path: ${datasets.ld_index}/2.1.1 -study_locus_input_path: ??? -study_index_path: ??? -clumped_study_locus_output_path: ??? diff --git a/config/step/ot_ld_index.yaml b/config/step/ot_ld_index.yaml deleted file mode 100644 index d17a0777c..000000000 --- a/config/step/ot_ld_index.yaml +++ /dev/null @@ -1,20 +0,0 @@ -defaults: - - ld_index - -ld_index_out: ${datasets.ld_index} -ld_matrix_template: ${datasets.ld_matrix_template} -ld_index_raw_template: ${datasets.ld_index_raw_template} -grch37_to_grch38_chain_path: ${datasets.chain_37_38} -liftover_ht_path: ${datasets.liftover_ht_path} -ld_populations: - - afr # African-American - - amr # American Admixed/Latino - - asj # Ashkenazi Jewish - - eas # East Asian - - est # Estonian - - fin # Finnish - - nfe # Non-Finnish European - - nwe # Northwestern European - - seu # Southeastern European -# The version will of the gnomad will be inferred from ld_matrix_template and appended to the ld_index_out. -use_version_from_input: true diff --git a/config/step/ot_locus_to_gene_predict.yaml b/config/step/ot_locus_to_gene_predict.yaml deleted file mode 100644 index c3cb88b59..000000000 --- a/config/step/ot_locus_to_gene_predict.yaml +++ /dev/null @@ -1,11 +0,0 @@ -defaults: - - locus_to_gene - -run_mode: predict -model_path: null -predictions_path: ${datasets.l2g_predictions} -feature_matrix_path: ${datasets.l2g_feature_matrix} -credible_set_path: ${datasets.credible_set} -variant_gene_path: ${datasets.variant_to_gene} -colocalisation_path: ${datasets.colocalisation} -study_index_path: ${datasets.study_index} diff --git a/config/step/ot_locus_to_gene_train.yaml b/config/step/ot_locus_to_gene_train.yaml deleted file mode 100644 index b59a24dae..000000000 --- a/config/step/ot_locus_to_gene_train.yaml +++ /dev/null @@ -1,19 +0,0 @@ -defaults: - - locus_to_gene - -run_mode: train -wandb_run_name: null -hf_hub_repo_id: opentargets/locus_to_gene -model_path: ${datasets.l2g_model} -predictions_path: ${datasets.l2g_predictions} -credible_set_path: ${datasets.credible_set} -variant_gene_path: ${datasets.variant_to_gene} -colocalisation_path: ${datasets.colocalisation} -study_index_path: ${datasets.study_index} -gold_standard_curation_path: ${datasets.l2g_gold_standard_curation} -gene_interactions_path: ${datasets.gene_interactions} -hyperparameters: - n_estimators: 100 - max_depth: 5 - loss: log_loss -download_from_hub: true diff --git a/config/step/ot_ukb_ppp_eur_sumstat_preprocess.yaml b/config/step/ot_ukb_ppp_eur_sumstat_preprocess.yaml deleted file mode 100644 index 24da7bad0..000000000 --- a/config/step/ot_ukb_ppp_eur_sumstat_preprocess.yaml +++ /dev/null @@ -1,13 +0,0 @@ -defaults: - - ukb_ppp_eur_sumstat_preprocess - -raw_study_index_path_from_tsv: ??? -raw_summary_stats_path: ??? -variant_annotation_path: ??? -tmp_variant_annotation_path: ??? -study_index_output_path: ??? -summary_stats_output_path: ??? - -session: - extended_spark_conf: - "spark.sql.shuffle.partitions": "3200" diff --git a/config/step/ot_variant_index.yaml b/config/step/ot_variant_index.yaml deleted file mode 100644 index 00b6b1602..000000000 --- a/config/step/ot_variant_index.yaml +++ /dev/null @@ -1,6 +0,0 @@ -defaults: - - variant_index - -vep_output_json_path: ${datasets.vep_output_path} -gnomad_variant_annotations_path: ${datasets.gnomad_variants} -variant_index_path: ${datasets.variant_index} diff --git a/config/step/ot_variant_to_gene.yaml b/config/step/ot_variant_to_gene.yaml deleted file mode 100644 index 7187a0625..000000000 --- a/config/step/ot_variant_to_gene.yaml +++ /dev/null @@ -1,13 +0,0 @@ -defaults: - - variant_to_gene - -variant_index_path: ${datasets.variant_index} -gene_index_path: ${datasets.gene_index} -vep_consequences_path: ${datasets.vep_consequences} -liftover_chain_file_path: ${datasets.chain_37_38} -interval_sources: - andersson: ${datasets.anderson} - javierre: ${datasets.javierre} - jung: ${datasets.jung} - thurman: ${datasets.thurman} -v2g_path: ${datasets.variant_to_gene} diff --git a/config/step/session/dataproc.yaml b/config/step/session/dataproc.yaml deleted file mode 100644 index 6ac641718..000000000 --- a/config/step/session/dataproc.yaml +++ /dev/null @@ -1,5 +0,0 @@ -defaults: - - base_session - -spark_uri: yarn -write_mode: errorifexists diff --git a/docs/development/airflow.md b/docs/development/airflow.md deleted file mode 100644 index ff5f7906c..000000000 --- a/docs/development/airflow.md +++ /dev/null @@ -1,124 +0,0 @@ -# Airflow configuration - -This section describes how to set up a local Airflow server which will orchestrate running workflows in Google Cloud Platform. This is useful for testing and debugging, but for production use, it is recommended to run Airflow on a dedicated server. - -## Install pre-requisites - -- [Docker](https://docs.docker.com/get-docker/) -- [Google Cloud SDK](https://cloud.google.com/sdk/docs/install) - -!!! warning macOS Docker memory allocation - - On macOS, the default amount of memory available for Docker might not be enough to get Airflow up and running. Allocate at least 4GB of memory for the Docker Engine (ideally 8GB). [More info](https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#) - -## Configure Airflow access to Google Cloud Platform - -!!! warning Specifying Google Cloud parameters - - Run the next two command with the appropriate Google Cloud project ID and service account name to ensure the correct Google default application credentials are set up. - -Authenticate to Google Cloud: - -```bash -gcloud auth application-default login --project= -``` - -Create the service account key file that will be used by Airflow to access Google Cloud Platform resources: - -```bash -gcloud iam service-accounts keys create ~/.config/gcloud/service_account_credentials.json --iam-account=@appspot.gserviceaccount.com -``` - -## Set up Airflow - -Change the working directory so that all subsequent commands will work: - -```bash -cd src/airflow -``` - -### Build Docker image - -!!! note Custom Docker image for Airflow - - The custom Dockerfile built by the command below extends the official [Airflow Docker Compose YAML](https://airflow.apache.org/docs/apache-airflow/stable/docker-compose.yaml). We add support for Google Cloud SDK, Google Dataproc operators, and access to GCP credentials. - -```bash -docker build . --tag extending_airflow:latest -``` - -### Set Airflow user ID - -!!! note Setting Airflow user ID - - These commands allow Airflow running inside Docker to access the credentials file which was generated earlier. - -```bash -# If any user ID is already specified in .env, remove it. -grep -v "AIRFLOW_UID" .env > .env.tmp -# Add the correct user ID. -echo "AIRFLOW_UID=$(id -u)" >> .env.tmp -# Move the file. -mv .env.tmp .env -``` - -### Initialise - -Before starting Airflow, initialise the database: - -```bash -docker compose up airflow-init -``` - -Now start all services: - -```bash -docker compose up -d -``` - -Airflow UI will now be available at `http://localhost:8080/`. Default username and password are both `airflow`. - -For additional information on how to use Airflow visit the [official documentation](https://airflow.apache.org/docs/apache-airflow/stable/index.html). - -### Cleaning up - -At any time, you can check the status of your containers with: - -```bash -docker ps -``` - -To stop Airflow, run: - -```bash -docker compose down -``` - -To cleanup the Airflow database, run: - -```bash -docker compose down --volumes --remove-orphans -``` - -### Advanced configuration - -More information on running Airflow with Docker Compose can be found in the [official docs](https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html). - -1. **Increase Airflow concurrency**. Modify the `docker-compose.yaml` and add the following to the x-airflow-common → environment section: - - ```yaml - AIRFLOW__CORE__PARALLELISM: 32 - AIRFLOW__CORE__MAX_ACTIVE_TASKS_PER_DAG: 32 - AIRFLOW__SCHEDULER__MAX_TIS_PER_QUERY: 16 - AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG: 1 - # Also add the following line if you are using CeleryExecutor (by default, LocalExecutor is used). - AIRFLOW__CELERY__WORKER_CONCURRENCY: 32 - ``` - -1. **Additional pip packages**. They can be added to the `requirements.txt` file. - -## Troubleshooting - -Note that when you a a new workflow under `dags/`, Airflow will not pick that up immediately. By default the filesystem is only scanned for new DAGs every 300s. However, once the DAG is added, updates are applied nearly instantaneously. - -Also, if you edit the DAG while an instance of it is running, it might cause problems with the run, as Airflow will try to update the tasks and their properties in DAG according to the file changes. diff --git a/docs/development/contributing.md b/docs/development/contributing.md index 3a363210f..acbb8f2a7 100644 --- a/docs/development/contributing.md +++ b/docs/development/contributing.md @@ -18,7 +18,7 @@ For Google Cloud configuration: Check that you have the `make` utility installed, and if not (which is unlikely), install it using your system package manager. -Check that you have `java` installed. +Check that you have `java` installed. To be able to use all features including hail support use java 11. ## Environment configuration @@ -26,30 +26,19 @@ Run `make setup-dev` to install/update the necessary packages and activate the d It is recommended to use VS Code as an IDE for development. -## How to run the code +## How to create gentropy step -All pipelines in this repository are intended to be run in Google Dataproc. Running them locally is not currently supported. +All gentropy steps can be invoked after successful environment configuration by running -In order to run the code: +```python +poetry run gentropy step= +``` -1. Manually edit your local `src/airflow/dags/*` file and comment out the steps you do not want to run. +1. Create a new step config in the `src/gentropy/config.py` that inherits from `StepConfig` class. -2. Manually edit your local `pyproject.toml` file and modify the version of the code. +2. Register new step configuration to `ConfigStore`. - - This must be different from the version used by any other people working on the repository to avoid any deployment conflicts, so it's a good idea to use your name, for example: `1.2.3+jdoe`. - - You can also add a brief branch description, for example: `1.2.3+jdoe.myfeature`. - - Note that the version must comply with [PEP440 conventions](https://peps.python.org/pep-0440/#normalization), otherwise Poetry will not allow it to be deployed. - - Do not use underscores or hyphens in your version name. When building the WHL file, they will be automatically converted to dots, which means the file name will no longer match the version and the build will fail. Use dots instead. - -3. Manually edit your local `src/airflow/dags/common_airflow.py` and set `GENTROPY_VERSION` to the same version as you did in the previous step. - -4. Run `make build`. - - - This will create a bundle containing the neccessary code, configuration and dependencies to run the ETL pipeline, and then upload this bundle to Google Cloud. - - A version specific subpath is used, so uploading the code will not affect any branches but your own. - - If there was already a code bundle uploaded with the same version number, it will be replaced. - -5. Open Airflow UI and run the DAG. +3. Create a step class that holds the business logic in new file in the `src/gentropy`. ## Contributing checklist @@ -72,8 +61,7 @@ For more details on each of these steps, see the sections below. ### Configuration -- Input and output paths in `config/datasets/ot_gcp.yaml` -- Step configuration, for example: `config/step/ot_finngen_sumstat_preprocess.yaml` +- step default configuration in the `src/gentropy/config/` `StepConfig` derived classes. ### Classes @@ -87,6 +75,6 @@ For more details on each of these steps, see the sections below. - Test sample data, for example: `tests/gentropy/data_samples/finngen_studies_sample.json` - Test definition, for example: `tests/dataset/test_study_index.py` → `test_study_index_finngen_creation`) -### Orchestration +### Airflow dags -- Airflow DAG, for example: `src/airflow/dags/finngen_harmonisation.py` +- Upstream of version 2.0.0 airflow orchestration layer was moved to the [orchestration repository](https://github.com/opentargets/orchestration) diff --git a/docs/development/workflows.md b/docs/development/workflows.md deleted file mode 100644 index 2269041d8..000000000 --- a/docs/development/workflows.md +++ /dev/null @@ -1,27 +0,0 @@ -# Pipeline workflows - -This page describes the high level components of the pipeline, which are organised as Airflow DAGs (directed acyclic graphs). - -## Note on DAGs and Dataproc clusters - -Each DAG consists of the following general stages: - -1. Create cluster (if it already exists, this step is skipped) - -1. Install dependencies on the cluster - -1. Run data processing steps for this DAG - -1. Delete the cluster - -Within a DAG, all data processing steps run on the same Dataproc cluster as separate jobs. - -There is no need to configure DAGs or steps depending on the size of the input data. Clusters have autoscaling enabled, which means they will increase or decrease the number of worker VMs to accommodate the load. - -## DAG 1: Preprocess - -This DAG contains steps which are only supposed to be run once, or very rarely. They ingest external data and apply bespoke transformations specific for each particular data source. The output is normalised according to the data schemas used by the pipeline. - -## DAG 2: ETL - -The ETL DAG takes the inputs of the previous step and performs the main algorithmic processing. This processing is supposed to be data source agnostic. diff --git a/mkdocs.yml b/mkdocs.yml index 180c6fbe1..3704ed274 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -48,7 +48,8 @@ markdown_extensions: - pymdownx.tabbed: alternate_style: true combine_header_slug: true - + - pymdownx.tasklist: + custom_checkbox: true hooks: - src/utils/schemadocs.py diff --git a/poetry.lock b/poetry.lock index 2ba1390ce..0df0da543 100644 --- a/poetry.lock +++ b/poetry.lock @@ -14,17 +14,6 @@ files = [ [package.dependencies] pycares = ">=3.0.0" -[[package]] -name = "aiofiles" -version = "23.2.1" -description = "File support for asyncio." -optional = false -python-versions = ">=3.7" -files = [ - {file = "aiofiles-23.2.1-py3-none-any.whl", hash = "sha256:19297512c647d4b27a2cf7c34caa7e405c0d60b5560618a29a9fe027b18b0107"}, - {file = "aiofiles-23.2.1.tar.gz", hash = "sha256:84ec2218d8419404abcb9f0c02df3f34c6e0a68ed41072acfb1cef5cbc29051a"}, -] - [[package]] name = "aiohttp" version = "3.9.5" @@ -135,36 +124,6 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" -[[package]] -name = "alembic" -version = "1.13.1" -description = "A database migration tool for SQLAlchemy." -optional = false -python-versions = ">=3.8" -files = [ - {file = "alembic-1.13.1-py3-none-any.whl", hash = "sha256:2edcc97bed0bd3272611ce3a98d98279e9c209e7186e43e75bbb1b2bdfdbcc43"}, - {file = "alembic-1.13.1.tar.gz", hash = "sha256:4932c8558bf68f2ee92b9bbcb8218671c627064d5b08939437af6d77dc05e595"}, -] - -[package.dependencies] -Mako = "*" -SQLAlchemy = ">=1.3.0" -typing-extensions = ">=4" - -[package.extras] -tz = ["backports.zoneinfo"] - -[[package]] -name = "annotated-types" -version = "0.7.0" -description = "Reusable constraint types to use with typing.Annotated" -optional = false -python-versions = ">=3.8" -files = [ - {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, - {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, -] - [[package]] name = "antlr4-python3-runtime" version = "4.9.3" @@ -175,493 +134,6 @@ files = [ {file = "antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b"}, ] -[[package]] -name = "anyio" -version = "4.4.0" -description = "High level compatibility layer for multiple asynchronous event loop implementations" -optional = false -python-versions = ">=3.8" -files = [ - {file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"}, - {file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"}, -] - -[package.dependencies] -exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} -idna = ">=2.8" -sniffio = ">=1.1" -typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} - -[package.extras] -doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] -trio = ["trio (>=0.23)"] - -[[package]] -name = "apache-airflow" -version = "2.9.2" -description = "Programmatically author, schedule and monitor data pipelines" -optional = false -python-versions = "<3.13,~=3.8" -files = [ - {file = "apache_airflow-2.9.2-py3-none-any.whl", hash = "sha256:6fd6501b1622ab58f4a3c1fc5bed4c216bd36915243bb9445b54415c3e625200"}, - {file = "apache_airflow-2.9.2.tar.gz", hash = "sha256:c5d7b4bbcbc4d7b2bb3433af2d9307a3f3dc0b142c25fdbe8f187dd4cad5521d"}, -] - -[package.dependencies] -alembic = ">=1.13.1,<2.0" -apache-airflow-providers-common-io = "*" -apache-airflow-providers-common-sql = "*" -apache-airflow-providers-fab = ">=1.0.2" -apache-airflow-providers-ftp = "*" -apache-airflow-providers-http = "*" -apache-airflow-providers-imap = "*" -apache-airflow-providers-smtp = "*" -apache-airflow-providers-sqlite = "*" -argcomplete = ">=1.10" -asgiref = "*" -attrs = ">=22.1.0" -blinker = ">=1.6.2" -colorlog = ">=4.0.2,<5.0" -configupdater = ">=3.1.1" -connexion = {version = ">=2.10.0,<3.0", extras = ["flask"]} -cron-descriptor = ">=1.2.24" -croniter = ">=2.0.2" -cryptography = ">=39.0.0" -deprecated = ">=1.2.13" -dill = ">=0.2.2" -flask = ">=2.2,<2.3" -flask-caching = ">=1.5.0" -flask-session = ">=0.4.0,<0.6" -flask-wtf = ">=0.15" -fsspec = ">=2023.10.0" -google-re2 = ">=1.0" -gunicorn = ">=20.1.0" -httpx = "*" -importlib_metadata = {version = ">=6.5", markers = "python_version < \"3.12\""} -itsdangerous = ">=2.0" -jinja2 = ">=3.0.0" -jsonschema = ">=4.18.0" -lazy-object-proxy = "*" -linkify-it-py = ">=2.0.0" -lockfile = ">=0.12.2" -markdown-it-py = ">=2.1.0" -markupsafe = ">=1.1.1" -marshmallow-oneofschema = ">=2.0.1" -mdit-py-plugins = ">=0.3.0" -methodtools = ">=0.4.7" -opentelemetry-api = ">=1.15.0" -opentelemetry-exporter-otlp = "*" -packaging = ">=14.0" -pathspec = ">=0.9.0" -pendulum = ">=2.1.2,<4.0" -pluggy = ">=1.0" -psutil = ">=4.2.0" -pygments = ">=2.0.1" -pyjwt = ">=2.0.0" -python-daemon = ">=3.0.0" -python-dateutil = ">=2.3" -python-nvd3 = ">=0.15.0" -python-slugify = ">=5.0" -requests = ">=2.27.0,<3" -rfc3339-validator = ">=0.1.4" -rich = ">=12.4.4" -rich-argparse = ">=1.0.0" -setproctitle = ">=1.1.8" -sqlalchemy = ">=1.4.36,<2.0" -sqlalchemy-jsonfield = ">=1.0" -tabulate = ">=0.7.5" -tenacity = ">=6.2.0,<8.2.0 || >8.2.0" -termcolor = ">=1.1.0" -unicodecsv = ">=0.14.1" -universal-pathlib = ">=0.2.2" -werkzeug = ">=2.0,<3" - -[package.extras] -aiobotocore = ["aiobotocore (>=2.7.0)"] -airbyte = ["apache-airflow-providers-airbyte"] -alibaba = ["apache-airflow-providers-alibaba"] -all = ["apache-airflow[aiobotocore]", "apache-airflow[airbyte]", "apache-airflow[alibaba]", "apache-airflow[all-dbs]", "apache-airflow[amazon]", "apache-airflow[apache-atlas]", "apache-airflow[apache-beam]", "apache-airflow[apache-cassandra]", "apache-airflow[apache-drill]", "apache-airflow[apache-druid]", "apache-airflow[apache-flink]", "apache-airflow[apache-hdfs]", "apache-airflow[apache-hive]", "apache-airflow[apache-impala]", "apache-airflow[apache-kafka]", "apache-airflow[apache-kylin]", "apache-airflow[apache-livy]", "apache-airflow[apache-pig]", "apache-airflow[apache-pinot]", "apache-airflow[apache-spark]", "apache-airflow[apache-webhdfs]", "apache-airflow[apprise]", "apache-airflow[arangodb]", "apache-airflow[asana]", "apache-airflow[async]", "apache-airflow[atlassian-jira]", "apache-airflow[celery]", "apache-airflow[cgroups]", "apache-airflow[cloudant]", "apache-airflow[cncf-kubernetes]", "apache-airflow[cohere]", "apache-airflow[common-io]", "apache-airflow[common-sql]", "apache-airflow[databricks]", "apache-airflow[datadog]", "apache-airflow[dbt-cloud]", "apache-airflow[deprecated-api]", "apache-airflow[dingding]", "apache-airflow[discord]", "apache-airflow[docker]", "apache-airflow[elasticsearch]", "apache-airflow[exasol]", "apache-airflow[fab]", "apache-airflow[facebook]", "apache-airflow[ftp]", "apache-airflow[github-enterprise]", "apache-airflow[github]", "apache-airflow[google-auth]", "apache-airflow[google]", "apache-airflow[graphviz]", "apache-airflow[grpc]", "apache-airflow[hashicorp]", "apache-airflow[http]", "apache-airflow[imap]", "apache-airflow[influxdb]", "apache-airflow[jdbc]", "apache-airflow[jenkins]", "apache-airflow[kerberos]", "apache-airflow[ldap]", "apache-airflow[leveldb]", "apache-airflow[microsoft-azure]", "apache-airflow[microsoft-mssql]", "apache-airflow[microsoft-psrp]", "apache-airflow[microsoft-winrm]", "apache-airflow[mongo]", "apache-airflow[mysql]", "apache-airflow[neo4j]", "apache-airflow[odbc]", "apache-airflow[openai]", "apache-airflow[openfaas]", "apache-airflow[openlineage]", "apache-airflow[opensearch]", "apache-airflow[opsgenie]", "apache-airflow[oracle]", "apache-airflow[otel]", "apache-airflow[pagerduty]", "apache-airflow[pandas]", "apache-airflow[papermill]", "apache-airflow[password]", "apache-airflow[pgvector]", "apache-airflow[pinecone]", "apache-airflow[postgres]", "apache-airflow[presto]", "apache-airflow[pydantic]", "apache-airflow[qdrant]", "apache-airflow[rabbitmq]", "apache-airflow[redis]", "apache-airflow[s3fs]", "apache-airflow[salesforce]", "apache-airflow[samba]", "apache-airflow[saml]", "apache-airflow[segment]", "apache-airflow[sendgrid]", "apache-airflow[sentry]", "apache-airflow[sftp]", "apache-airflow[singularity]", "apache-airflow[slack]", "apache-airflow[smtp]", "apache-airflow[snowflake]", "apache-airflow[sqlite]", "apache-airflow[ssh]", "apache-airflow[statsd]", "apache-airflow[tableau]", "apache-airflow[tabular]", "apache-airflow[telegram]", "apache-airflow[teradata]", "apache-airflow[trino]", "apache-airflow[uv]", "apache-airflow[vertica]", "apache-airflow[virtualenv]", "apache-airflow[weaviate]", "apache-airflow[yandex]", "apache-airflow[zendesk]"] -all-core = ["apache-airflow[aiobotocore]", "apache-airflow[apache-atlas]", "apache-airflow[apache-webhdfs]", "apache-airflow[async]", "apache-airflow[cgroups]", "apache-airflow[deprecated-api]", "apache-airflow[github-enterprise]", "apache-airflow[google-auth]", "apache-airflow[graphviz]", "apache-airflow[kerberos]", "apache-airflow[ldap]", "apache-airflow[leveldb]", "apache-airflow[otel]", "apache-airflow[pandas]", "apache-airflow[password]", "apache-airflow[pydantic]", "apache-airflow[rabbitmq]", "apache-airflow[s3fs]", "apache-airflow[saml]", "apache-airflow[sentry]", "apache-airflow[statsd]", "apache-airflow[uv]", "apache-airflow[virtualenv]"] -all-dbs = ["apache-airflow[apache-cassandra]", "apache-airflow[apache-drill]", "apache-airflow[apache-druid]", "apache-airflow[apache-hdfs]", "apache-airflow[apache-hive]", "apache-airflow[apache-impala]", "apache-airflow[apache-pinot]", "apache-airflow[arangodb]", "apache-airflow[cloudant]", "apache-airflow[databricks]", "apache-airflow[exasol]", "apache-airflow[influxdb]", "apache-airflow[microsoft-mssql]", "apache-airflow[mongo]", "apache-airflow[mysql]", "apache-airflow[neo4j]", "apache-airflow[postgres]", "apache-airflow[presto]", "apache-airflow[trino]", "apache-airflow[vertica]"] -amazon = ["apache-airflow-providers-amazon"] -apache-atlas = ["atlasclient (>=0.1.2)"] -apache-beam = ["apache-airflow-providers-apache-beam"] -apache-cassandra = ["apache-airflow-providers-apache-cassandra"] -apache-drill = ["apache-airflow-providers-apache-drill"] -apache-druid = ["apache-airflow-providers-apache-druid"] -apache-flink = ["apache-airflow-providers-apache-flink"] -apache-hdfs = ["apache-airflow-providers-apache-hdfs"] -apache-hive = ["apache-airflow-providers-apache-hive"] -apache-impala = ["apache-airflow-providers-apache-impala"] -apache-kafka = ["apache-airflow-providers-apache-kafka"] -apache-kylin = ["apache-airflow-providers-apache-kylin"] -apache-livy = ["apache-airflow-providers-apache-livy"] -apache-pig = ["apache-airflow-providers-apache-pig"] -apache-pinot = ["apache-airflow-providers-apache-pinot"] -apache-spark = ["apache-airflow-providers-apache-spark"] -apache-webhdfs = ["hdfs[avro,dataframe,kerberos] (>=2.0.4)"] -apprise = ["apache-airflow-providers-apprise"] -arangodb = ["apache-airflow-providers-arangodb"] -asana = ["apache-airflow-providers-asana"] -async = ["eventlet (>=0.33.3)", "gevent (>=0.13)", "greenlet (>=0.4.9)"] -atlas = ["apache-airflow[apache-atlas]"] -atlassian-jira = ["apache-airflow-providers-atlassian-jira"] -aws = ["apache-airflow[amazon]"] -azure = ["apache-airflow[microsoft-azure]"] -cassandra = ["apache-airflow[apache-cassandra]"] -celery = ["apache-airflow-providers-celery"] -cgroups = ["cgroupspy (>=0.2.2)"] -cloudant = ["apache-airflow-providers-cloudant"] -cncf-kubernetes = ["apache-airflow-providers-cncf-kubernetes"] -cohere = ["apache-airflow-providers-cohere"] -common-io = ["apache-airflow-providers-common-io"] -common-sql = ["apache-airflow-providers-common-sql"] -databricks = ["apache-airflow-providers-databricks"] -datadog = ["apache-airflow-providers-datadog"] -dbt-cloud = ["apache-airflow-providers-dbt-cloud"] -deprecated-api = ["requests (>=2.27.0,<3)"] -devel-ci = ["aiobotocore (>=2.7.0)", "aiofiles (>=23.2.0)", "aioresponses (>=0.7.6)", "amqp", "astroid (>=2.12.3,<3.0)", "atlasclient (>=0.1.2)", "authlib (>=1.0.0)", "backports-zoneinfo (>=0.2.1)", "bcrypt (>=2.0.0)", "beautifulsoup4 (>=4.7.1)", "black (>=23.12.0)", "blinker (>=1.1)", "blinker (>=1.7.0)", "cgroupspy (>=0.2.2)", "checksumdir (>=1.2.0)", "click (>=8.0)", "click (>=8.0,!=8.1.4,!=8.1.5)", "coverage (>=7.4.0)", "diagrams (>=0.23.4)", "docutils (>=0.16,<0.17)", "duckdb (>=0.10.0)", "duckdb (>=0.9.0)", "eralchemy2 (>=1.3.8)", "eventlet (>=0.33.3)", "flask-bcrypt (>=0.7.1)", "gevent (>=0.13)", "gitpython (>=3.1.40)", "graphviz (>=0.12)", "greenlet (>=0.4.9)", "hatch (>=1.9.1)", "hdfs[avro,dataframe,kerberos] (>=2.0.4)", "ipdb (>=0.13.13)", "ldap3 (>=2.5.1)", "mypy (==1.9.0)", "opentelemetry-exporter-prometheus", "pandas (>=1.2.5,<2.2)", "pipdeptree (>=2.13.1)", "plyvel", "pre-commit (>=3.5.0)", "pydantic (>=2.3.0)", "pygithub (>=2.1.1)", "pykerberos (>=1.1.13)", "pytest (>=7.4.4,<8.0)", "pytest-asyncio (>=0.23.3)", "pytest-cov (>=4.1.0)", "pytest-custom-exit-code (>=0.3.0)", "pytest-icdiff (>=0.9)", "pytest-instafail (>=0.5.0)", "pytest-mock (>=3.12.0)", "pytest-rerunfailures (>=13.0)", "pytest-timeouts (>=1.2.1)", "pytest-xdist (>=3.5.0)", "python-ldap", "python3-saml (>=1.16.0)", "requests (>=2.27.0,<3)", "requests-kerberos (>=0.10.0)", "requests-mock (>=1.11.0)", "restructuredtext-lint (>=1.4.0)", "rich-click (>=1.7.0)", "ruff (==0.3.3)", "s3fs (>=2023.10.0)", "semver (>=3.0.2)", "sentry-sdk (>=1.32.0,!=1.33.0)", "sphinx (>=5.3.0,<6.0.0)", "sphinx-airflow-theme (>=0.0.12)", "sphinx-argparse (>=0.4.0)", "sphinx-autoapi (>=2.1.1)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-jinja (>=2.0.2)", "sphinx-rtd-theme (>=2.0.0)", "sphinxcontrib-applehelp (>=1.0.4)", "sphinxcontrib-devhelp (>=1.0.2)", "sphinxcontrib-htmlhelp (>=2.0.1)", "sphinxcontrib-httpdomain (>=1.8.1)", "sphinxcontrib-jquery (>=4.1)", "sphinxcontrib-jsmath (>=1.0.1)", "sphinxcontrib-qthelp (>=1.0.3)", "sphinxcontrib-redoc (>=1.6.0)", "sphinxcontrib-serializinghtml (==1.1.5)", "sphinxcontrib-spelling (>=8.0.0)", "statsd (>=3.3.0)", "thrift-sasl (>=0.2.0)", "time-machine (>=2.13.0)", "towncrier (>=23.11.0)", "twine (>=4.0.2)", "types-aiofiles", "types-certifi", "types-croniter", "types-deprecated", "types-docutils", "types-markdown", "types-paramiko", "types-protobuf", "types-pymysql", "types-python-dateutil", "types-python-slugify", "types-pytz", "types-pyyaml", "types-redis", "types-requests", "types-setuptools", "types-tabulate", "types-termcolor", "types-toml", "uv (>=0.1.32)", "virtualenv", "wheel (>=0.42.0)", "yamllint (>=1.33.0)"] -dingding = ["apache-airflow-providers-dingding"] -discord = ["apache-airflow-providers-discord"] -docker = ["apache-airflow-providers-docker"] -druid = ["apache-airflow[apache-druid]"] -elasticsearch = ["apache-airflow-providers-elasticsearch"] -exasol = ["apache-airflow-providers-exasol"] -fab = ["apache-airflow-providers-fab"] -facebook = ["apache-airflow-providers-facebook"] -ftp = ["apache-airflow-providers-ftp"] -gcp = ["apache-airflow[google]"] -gcp-api = ["apache-airflow[google]"] -github = ["apache-airflow-providers-github"] -github-enterprise = ["apache-airflow[fab]", "authlib (>=1.0.0)"] -google = ["apache-airflow-providers-google"] -google-auth = ["apache-airflow[fab]", "authlib (>=1.0.0)"] -graphviz = ["graphviz (>=0.12)"] -grpc = ["apache-airflow-providers-grpc"] -hashicorp = ["apache-airflow-providers-hashicorp"] -hdfs = ["apache-airflow[apache-hdfs]"] -hive = ["apache-airflow[apache-hive]"] -http = ["apache-airflow-providers-http"] -imap = ["apache-airflow-providers-imap"] -influxdb = ["apache-airflow-providers-influxdb"] -jdbc = ["apache-airflow-providers-jdbc"] -jenkins = ["apache-airflow-providers-jenkins"] -kerberos = ["pykerberos (>=1.1.13)", "requests-kerberos (>=0.10.0)", "thrift-sasl (>=0.2.0)"] -kubernetes = ["apache-airflow[cncf-kubernetes]"] -ldap = ["ldap3 (>=2.5.1)", "python-ldap"] -leveldb = ["plyvel"] -microsoft-azure = ["apache-airflow-providers-microsoft-azure"] -microsoft-mssql = ["apache-airflow-providers-microsoft-mssql"] -microsoft-psrp = ["apache-airflow-providers-microsoft-psrp"] -microsoft-winrm = ["apache-airflow-providers-microsoft-winrm"] -mongo = ["apache-airflow-providers-mongo"] -mssql = ["apache-airflow[microsoft-mssql]"] -mysql = ["apache-airflow-providers-mysql"] -neo4j = ["apache-airflow-providers-neo4j"] -odbc = ["apache-airflow-providers-odbc"] -openai = ["apache-airflow-providers-openai"] -openfaas = ["apache-airflow-providers-openfaas"] -openlineage = ["apache-airflow-providers-openlineage"] -opensearch = ["apache-airflow-providers-opensearch"] -opsgenie = ["apache-airflow-providers-opsgenie"] -oracle = ["apache-airflow-providers-oracle"] -otel = ["opentelemetry-exporter-prometheus"] -pagerduty = ["apache-airflow-providers-pagerduty"] -pandas = ["pandas (>=1.2.5,<2.2)"] -papermill = ["apache-airflow-providers-papermill"] -password = ["bcrypt (>=2.0.0)", "flask-bcrypt (>=0.7.1)"] -pgvector = ["apache-airflow-providers-pgvector"] -pinecone = ["apache-airflow-providers-pinecone"] -pinot = ["apache-airflow[apache-pinot]"] -postgres = ["apache-airflow-providers-postgres"] -presto = ["apache-airflow-providers-presto"] -pydantic = ["pydantic (>=2.3.0)"] -qdrant = ["apache-airflow-providers-qdrant"] -rabbitmq = ["amqp"] -redis = ["apache-airflow-providers-redis"] -s3 = ["apache-airflow[amazon]"] -s3fs = ["s3fs (>=2023.10.0)"] -salesforce = ["apache-airflow-providers-salesforce"] -samba = ["apache-airflow-providers-samba"] -saml = ["python3-saml (>=1.16.0)"] -segment = ["apache-airflow-providers-segment"] -sendgrid = ["apache-airflow-providers-sendgrid"] -sentry = ["blinker (>=1.1)", "sentry-sdk (>=1.32.0,!=1.33.0)"] -sftp = ["apache-airflow-providers-sftp"] -singularity = ["apache-airflow-providers-singularity"] -slack = ["apache-airflow-providers-slack"] -smtp = ["apache-airflow-providers-smtp"] -snowflake = ["apache-airflow-providers-snowflake"] -spark = ["apache-airflow[apache-spark]"] -sqlite = ["apache-airflow-providers-sqlite"] -ssh = ["apache-airflow-providers-ssh"] -statsd = ["statsd (>=3.3.0)"] -tableau = ["apache-airflow-providers-tableau"] -tabular = ["apache-airflow-providers-tabular"] -telegram = ["apache-airflow-providers-telegram"] -teradata = ["apache-airflow-providers-teradata"] -trino = ["apache-airflow-providers-trino"] -uv = ["uv (>=0.1.32)"] -vertica = ["apache-airflow-providers-vertica"] -virtualenv = ["virtualenv"] -weaviate = ["apache-airflow-providers-weaviate"] -webhdfs = ["apache-airflow[apache-webhdfs]"] -winrm = ["apache-airflow[microsoft-winrm]"] -yandex = ["apache-airflow-providers-yandex"] -zendesk = ["apache-airflow-providers-zendesk"] - -[[package]] -name = "apache-airflow-providers-common-io" -version = "1.3.2" -description = "Provider package apache-airflow-providers-common-io for Apache Airflow" -optional = false -python-versions = "~=3.8" -files = [ - {file = "apache_airflow_providers_common_io-1.3.2-py3-none-any.whl", hash = "sha256:7c0299d8eb2e3fc7b99f522c4d333e2b888edbf47861a8f3e3ae78707ae77aab"}, - {file = "apache_airflow_providers_common_io-1.3.2.tar.gz", hash = "sha256:1212e484a16ad311bcb979e84ad1fa1cc45d7f5ba4dfcbd7978887bd30809e75"}, -] - -[package.dependencies] -apache-airflow = ">=2.8.0" - -[package.extras] -openlineage = ["apache-airflow-providers-openlineage"] - -[[package]] -name = "apache-airflow-providers-common-sql" -version = "1.14.0" -description = "Provider package apache-airflow-providers-common-sql for Apache Airflow" -optional = false -python-versions = "~=3.8" -files = [ - {file = "apache_airflow_providers_common_sql-1.14.0-py3-none-any.whl", hash = "sha256:620ba5bf559964159b3faf0bf921e666d1c3bb74ade27daa385f9e9ecd413a1c"}, - {file = "apache_airflow_providers_common_sql-1.14.0.tar.gz", hash = "sha256:6179512edf261ede96adda31a535eec024471a5708a7528d27a4ece72f35783f"}, -] - -[package.dependencies] -apache-airflow = ">=2.7.0" -more-itertools = ">=9.0.0" -sqlparse = ">=0.4.2" - -[package.extras] -openlineage = ["apache-airflow-providers-openlineage"] -pandas = ["pandas (>=1.2.5,<2.2)"] - -[[package]] -name = "apache-airflow-providers-fab" -version = "1.1.1" -description = "Provider package apache-airflow-providers-fab for Apache Airflow" -optional = false -python-versions = "~=3.8" -files = [ - {file = "apache_airflow_providers_fab-1.1.1-py3-none-any.whl", hash = "sha256:5d393d209ef432618e1926b019fb7b543d1fc932b592c39b4170779c409a38a5"}, - {file = "apache_airflow_providers_fab-1.1.1.tar.gz", hash = "sha256:60c1722f9985675e65f78c05bca8b4eb708a3140a2ed16d8de2c4cc4a4ecadc3"}, -] - -[package.dependencies] -apache-airflow = ">=2.9.0" -flask = ">=2.2,<2.3" -flask-appbuilder = "4.4.1" -flask-login = ">=0.6.2" -google-re2 = ">=1.0" -jmespath = "*" - -[[package]] -name = "apache-airflow-providers-ftp" -version = "3.9.1" -description = "Provider package apache-airflow-providers-ftp for Apache Airflow" -optional = false -python-versions = "~=3.8" -files = [ - {file = "apache_airflow_providers_ftp-3.9.1-py3-none-any.whl", hash = "sha256:74744b27f356bc42b528605d33a868ed2b4c670a1c90a857fb05402740e6f980"}, - {file = "apache_airflow_providers_ftp-3.9.1.tar.gz", hash = "sha256:b5cc4445a6fabb73f760c678d1b7f1d10586dfc6c9d34746c7462180f1cbb3ce"}, -] - -[package.dependencies] -apache-airflow = ">=2.7.0" - -[package.extras] -openlineage = ["apache-airflow-providers-openlineage"] - -[[package]] -name = "apache-airflow-providers-google" -version = "10.15.0" -description = "Provider package apache-airflow-providers-google for Apache Airflow" -optional = false -python-versions = "~=3.8" -files = [ - {file = "apache_airflow_providers_google-10.15.0-py3-none-any.whl", hash = "sha256:70d2d4feb66f06cd750ea673344f3be20d2d575c9645f7e2b030c73551a297fc"}, - {file = "apache_airflow_providers_google-10.15.0.tar.gz", hash = "sha256:ff48fa0a29abec2645a8008c47d0a84a759b7203412707a5e2fc01558c4052a3"}, -] - -[package.dependencies] -apache-airflow = ">=2.6.0" -apache-airflow-providers-common-sql = ">=1.7.2" -asgiref = ">=3.5.2" -gcloud-aio-auth = ">=4.0.0,<5.0.0" -gcloud-aio-bigquery = ">=6.1.2" -gcloud-aio-storage = ">=9.0.0" -gcsfs = ">=2023.10.0" -google-ads = ">=22.1.0" -google-analytics-admin = "*" -google-api-core = ">=2.11.0,<2.16.0 || >2.16.0" -google-api-python-client = ">=1.6.0" -google-auth = ">=1.0.0" -google-auth-httplib2 = ">=0.0.1" -google-cloud-aiplatform = ">=1.22.1" -google-cloud-automl = ">=2.12.0" -google-cloud-batch = ">=0.13.0" -google-cloud-bigquery-datatransfer = ">=3.13.0" -google-cloud-bigtable = ">=2.17.0" -google-cloud-build = ">=3.22.0" -google-cloud-compute = ">=1.10.0" -google-cloud-container = ">=2.17.4" -google-cloud-datacatalog = ">=3.11.1" -google-cloud-dataflow-client = ">=0.8.6" -google-cloud-dataform = ">=0.5.0" -google-cloud-dataplex = ">=1.10.0" -google-cloud-dataproc = ">=5.8.0" -google-cloud-dataproc-metastore = ">=1.12.0" -google-cloud-dlp = ">=3.12.0" -google-cloud-kms = ">=2.15.0" -google-cloud-language = ">=2.9.0" -google-cloud-logging = ">=3.5.0" -google-cloud-memcache = ">=1.7.0" -google-cloud-monitoring = ">=2.18.0" -google-cloud-orchestration-airflow = ">=1.10.0" -google-cloud-os-login = ">=2.9.1" -google-cloud-pubsub = ">=2.19.0" -google-cloud-redis = ">=2.12.0" -google-cloud-run = ">=0.9.0" -google-cloud-secret-manager = ">=2.16.0" -google-cloud-spanner = ">=3.11.1" -google-cloud-speech = ">=2.18.0" -google-cloud-storage = ">=2.7.0" -google-cloud-storage-transfer = ">=1.4.1" -google-cloud-tasks = ">=2.13.0" -google-cloud-texttospeech = ">=2.14.1" -google-cloud-translate = ">=3.11.0" -google-cloud-videointelligence = ">=2.11.0" -google-cloud-vision = ">=3.4.0" -google-cloud-workflows = ">=1.10.0" -grpcio-gcp = ">=0.2.2" -httpx = "*" -json-merge-patch = ">=0.2" -looker-sdk = ">=22.2.0" -pandas = ">=1.2.5" -pandas-gbq = "*" -proto-plus = ">=1.19.6" -PyOpenSSL = "*" -sqlalchemy-bigquery = ">=1.2.1" -sqlalchemy-spanner = ">=1.6.2" - -[package.extras] -amazon = ["apache-airflow-providers-amazon (>=2.6.0)"] -apache-beam = ["apache-airflow-providers-apache-beam", "apache-beam[gcp]"] -apache-cassandra = ["apache-airflow-providers-apache-cassandra"] -cncf-kubernetes = ["apache-airflow-providers-cncf-kubernetes (>=7.2.0)"] -common-sql = ["apache-airflow-providers-common-sql"] -facebook = ["apache-airflow-providers-facebook (>=2.2.0)"] -leveldb = ["plyvel"] -microsoft-azure = ["apache-airflow-providers-microsoft-azure"] -microsoft-mssql = ["apache-airflow-providers-microsoft-mssql"] -mysql = ["apache-airflow-providers-mysql"] -openlineage = ["apache-airflow-providers-openlineage"] -oracle = ["apache-airflow-providers-oracle (>=3.1.0)"] -postgres = ["apache-airflow-providers-postgres"] -presto = ["apache-airflow-providers-presto"] -salesforce = ["apache-airflow-providers-salesforce"] -sftp = ["apache-airflow-providers-sftp"] -ssh = ["apache-airflow-providers-ssh"] -trino = ["apache-airflow-providers-trino"] - -[[package]] -name = "apache-airflow-providers-http" -version = "4.11.1" -description = "Provider package apache-airflow-providers-http for Apache Airflow" -optional = false -python-versions = "~=3.8" -files = [ - {file = "apache_airflow_providers_http-4.11.1-py3-none-any.whl", hash = "sha256:f8aff8d009d8068654bed6c84c1bf13100ebbda4563c5fdea067047ba85a84bf"}, - {file = "apache_airflow_providers_http-4.11.1.tar.gz", hash = "sha256:401c4c976ca35388574afa85282fa35ed0514a77ef7ca442ea74132b3442a869"}, -] - -[package.dependencies] -aiohttp = ">=3.9.2" -apache-airflow = ">=2.7.0" -asgiref = "*" -requests = ">=2.27.0,<3" -requests_toolbelt = "*" - -[[package]] -name = "apache-airflow-providers-imap" -version = "3.6.1" -description = "Provider package apache-airflow-providers-imap for Apache Airflow" -optional = false -python-versions = "~=3.8" -files = [ - {file = "apache_airflow_providers_imap-3.6.1-py3-none-any.whl", hash = "sha256:1630dfad25a4db28da37ed4cb522674e37d0d981238fdb34ed2933c7f348763a"}, - {file = "apache_airflow_providers_imap-3.6.1.tar.gz", hash = "sha256:20e8052b43f32c3e711cbe0ffe3763cf550ffb06011ed4c57c3e806dd99dfa06"}, -] - -[package.dependencies] -apache-airflow = ">=2.7.0" - -[[package]] -name = "apache-airflow-providers-smtp" -version = "1.7.1" -description = "Provider package apache-airflow-providers-smtp for Apache Airflow" -optional = false -python-versions = "~=3.8" -files = [ - {file = "apache_airflow_providers_smtp-1.7.1-py3-none-any.whl", hash = "sha256:eab0910fa1351e58e1e87bb2489084ad5157e33a8752cce3164fd38f4b50c694"}, - {file = "apache_airflow_providers_smtp-1.7.1.tar.gz", hash = "sha256:707c4e2d75ce328693b55429f1f771e00cd2a2d6b52ce14edc20cb6d785be76e"}, -] - -[package.dependencies] -apache-airflow = ">=2.7.0" - -[[package]] -name = "apache-airflow-providers-sqlite" -version = "3.8.1" -description = "Provider package apache-airflow-providers-sqlite for Apache Airflow" -optional = false -python-versions = "~=3.8" -files = [ - {file = "apache_airflow_providers_sqlite-3.8.1-py3-none-any.whl", hash = "sha256:be9749275ac266245a2973269842674b10c3ad184790f9f0fd75e76e1d3b2440"}, - {file = "apache_airflow_providers_sqlite-3.8.1.tar.gz", hash = "sha256:b958c5aa725fcf6505c77dc3d600f3c6f9255be5405ea51fa966ae1d85842d3e"}, -] - -[package.dependencies] -apache-airflow = ">=2.7.0" -apache-airflow-providers-common-sql = ">=1.3.1" - -[package.extras] -common-sql = ["apache-airflow-providers-common-sql"] - -[[package]] -name = "apispec" -version = "6.6.1" -description = "A pluggable API specification generator. Currently supports the OpenAPI Specification (f.k.a. the Swagger specification)." -optional = false -python-versions = ">=3.8" -files = [ - {file = "apispec-6.6.1-py3-none-any.whl", hash = "sha256:6460315cb38ac6a2ff42d9e2b8dc0435c37d4428d3abeda96ff97b5dc8eb6b94"}, - {file = "apispec-6.6.1.tar.gz", hash = "sha256:f5caa47cee75fe03b9c50b5594048b4c052eeca2c212e0dac12dbb6175d9a659"}, -] - -[package.dependencies] -packaging = ">=21.3" -PyYAML = {version = ">=3.10", optional = true, markers = "extra == \"yaml\""} - -[package.extras] -dev = ["apispec[tests]", "pre-commit (>=3.5,<4.0)", "tox"] -docs = ["apispec[marshmallow]", "pyyaml (==6.0.1)", "sphinx (==7.3.7)", "sphinx-issues (==4.1.0)", "sphinx-rtd-theme (==2.0.0)"] -marshmallow = ["marshmallow (>=3.18.0)"] -tests = ["apispec[marshmallow,yaml]", "openapi-spec-validator (==0.7.1)", "pytest"] -yaml = ["PyYAML (>=3.10)"] - [[package]] name = "appnope" version = "0.1.4" @@ -673,37 +145,6 @@ files = [ {file = "appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee"}, ] -[[package]] -name = "argcomplete" -version = "3.4.0" -description = "Bash tab completion for argparse" -optional = false -python-versions = ">=3.8" -files = [ - {file = "argcomplete-3.4.0-py3-none-any.whl", hash = "sha256:69a79e083a716173e5532e0fa3bef45f793f4e61096cf52b5a42c0211c8b8aa5"}, - {file = "argcomplete-3.4.0.tar.gz", hash = "sha256:c2abcdfe1be8ace47ba777d4fce319eb13bf8ad9dace8d085dcad6eded88057f"}, -] - -[package.extras] -test = ["coverage", "mypy", "pexpect", "ruff", "wheel"] - -[[package]] -name = "asgiref" -version = "3.8.1" -description = "ASGI specs, helper code, and adapters" -optional = false -python-versions = ">=3.8" -files = [ - {file = "asgiref-3.8.1-py3-none-any.whl", hash = "sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47"}, - {file = "asgiref-3.8.1.tar.gz", hash = "sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590"}, -] - -[package.dependencies] -typing-extensions = {version = ">=4", markers = "python_version < \"3.11\""} - -[package.extras] -tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] - [[package]] name = "asttokens" version = "2.4.1" @@ -878,17 +319,6 @@ files = [ [package.extras] dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] -[[package]] -name = "backoff" -version = "2.2.1" -description = "Function decoration for backoff and retry" -optional = false -python-versions = ">=3.7,<4.0" -files = [ - {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, - {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, -] - [[package]] name = "beautifulsoup4" version = "4.12.3" @@ -910,17 +340,6 @@ charset-normalizer = ["charset-normalizer"] html5lib = ["html5lib"] lxml = ["lxml"] -[[package]] -name = "blinker" -version = "1.8.2" -description = "Fast, simple object-to-object and broadcast signaling" -optional = false -python-versions = ">=3.8" -files = [ - {file = "blinker-1.8.2-py3-none-any.whl", hash = "sha256:1779309f71bf239144b9399d06ae925637cf6634cf6bd131104184531bf67c01"}, - {file = "blinker-1.8.2.tar.gz", hash = "sha256:8f77b09d3bf7c795e969e9486f39c2c5e9c39d4ee07424be2bc594ece9642d83"}, -] - [[package]] name = "bokeh" version = "3.4.1" @@ -992,17 +411,6 @@ files = [ {file = "bracex-2.4.tar.gz", hash = "sha256:a27eaf1df42cf561fed58b7a8f3fdf129d1ea16a81e1fadd1d17989bc6384beb"}, ] -[[package]] -name = "cachelib" -version = "0.9.0" -description = "A collection of cache libraries in the same API interface." -optional = false -python-versions = ">=3.7" -files = [ - {file = "cachelib-0.9.0-py3-none-any.whl", hash = "sha256:811ceeb1209d2fe51cd2b62810bd1eccf70feba5c52641532498be5c675493b3"}, - {file = "cachelib-0.9.0.tar.gz", hash = "sha256:38222cc7c1b79a23606de5c2607f4925779e37cdcea1c2ad21b8bae94b5425a5"}, -] - [[package]] name = "cachetools" version = "5.3.3" @@ -1014,31 +422,6 @@ files = [ {file = "cachetools-5.3.3.tar.gz", hash = "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105"}, ] -[[package]] -name = "cattrs" -version = "23.2.3" -description = "Composable complex class support for attrs and dataclasses." -optional = false -python-versions = ">=3.8" -files = [ - {file = "cattrs-23.2.3-py3-none-any.whl", hash = "sha256:0341994d94971052e9ee70662542699a3162ea1e0c62f7ce1b4a57f563685108"}, - {file = "cattrs-23.2.3.tar.gz", hash = "sha256:a934090d95abaa9e911dac357e3a8699e0b4b14f8529bcc7d2b1ad9d51672b9f"}, -] - -[package.dependencies] -attrs = ">=23.1.0" -exceptiongroup = {version = ">=1.1.1", markers = "python_version < \"3.11\""} -typing-extensions = {version = ">=4.1.0,<4.6.3 || >4.6.3", markers = "python_version < \"3.11\""} - -[package.extras] -bson = ["pymongo (>=4.4.0)"] -cbor2 = ["cbor2 (>=5.4.6)"] -msgpack = ["msgpack (>=1.0.5)"] -orjson = ["orjson (>=3.9.2)"] -pyyaml = ["pyyaml (>=6.0)"] -tomlkit = ["tomlkit (>=0.11.8)"] -ujson = ["ujson (>=5.7.0)"] - [[package]] name = "certifi" version = "2024.6.2" @@ -1125,17 +508,6 @@ files = [ {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, ] -[[package]] -name = "chardet" -version = "5.2.0" -description = "Universal encoding detector for Python 3" -optional = false -python-versions = ">=3.7" -files = [ - {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, - {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, -] - [[package]] name = "charset-normalizer" version = "3.3.2" @@ -1249,21 +621,6 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} -[[package]] -name = "clickclick" -version = "20.10.2" -description = "Click utility functions" -optional = false -python-versions = "*" -files = [ - {file = "clickclick-20.10.2-py2.py3-none-any.whl", hash = "sha256:c8f33e6d9ec83f68416dd2136a7950125bd256ec39ccc9a85c6e280a16be2bb5"}, - {file = "clickclick-20.10.2.tar.gz", hash = "sha256:4efb13e62353e34c5eef7ed6582c4920b418d7dedc86d819e22ee089ba01802c"}, -] - -[package.dependencies] -click = ">=4.0" -PyYAML = ">=3.11" - [[package]] name = "colorama" version = "0.4.6" @@ -1275,20 +632,6 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -[[package]] -name = "colorlog" -version = "4.8.0" -description = "Log formatting with colors!" -optional = false -python-versions = "*" -files = [ - {file = "colorlog-4.8.0-py2.py3-none-any.whl", hash = "sha256:3dd15cb27e8119a24c1a7b5c93f9f3b455855e0f73993b1c25921b2f646f1dcd"}, - {file = "colorlog-4.8.0.tar.gz", hash = "sha256:59b53160c60902c405cdec28d38356e09d40686659048893e026ecbd589516b1"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "sys_platform == \"win32\""} - [[package]] name = "comm" version = "0.2.2" @@ -1320,49 +663,6 @@ files = [ [package.extras] test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"] -[[package]] -name = "configupdater" -version = "3.2" -description = "Parser like ConfigParser but for updating configuration files" -optional = false -python-versions = ">=3.6" -files = [ - {file = "ConfigUpdater-3.2-py2.py3-none-any.whl", hash = "sha256:0f65a041627d7693840b4dd743581db4c441c97195298a29d075f91b79539df2"}, - {file = "ConfigUpdater-3.2.tar.gz", hash = "sha256:9fdac53831c1b062929bf398b649b87ca30e7f1a735f3fbf482072804106306b"}, -] - -[package.extras] -testing = ["flake8", "pytest", "pytest-cov", "pytest-randomly", "pytest-xdist", "sphinx"] - -[[package]] -name = "connexion" -version = "2.14.1" -description = "Connexion - API first applications with OpenAPI/Swagger and Flask" -optional = false -python-versions = ">=3.6" -files = [ - {file = "connexion-2.14.1-py2.py3-none-any.whl", hash = "sha256:f343717241b4c4802a694c38fee66fb1693c897fe4ea5a957fa9b3b07caf6394"}, - {file = "connexion-2.14.1.tar.gz", hash = "sha256:99aa5781e70a7b94f8ffae8cf89f309d49cdb811bbd65a8e2f2546f3b19a01e6"}, -] - -[package.dependencies] -clickclick = ">=1.2,<21" -flask = ">=1.0.4,<3" -inflection = ">=0.3.1,<0.6" -itsdangerous = ">=0.24" -jsonschema = ">=2.5.1,<5" -packaging = ">=20" -PyYAML = ">=5.1,<7" -requests = ">=2.9.1,<3" -werkzeug = ">=1.0,<3" - -[package.extras] -aiohttp = ["MarkupSafe (>=0.23)", "aiohttp (>=2.3.10,<4)", "aiohttp-jinja2 (>=0.14.0,<2)"] -docs = ["sphinx-autoapi (==1.8.1)"] -flask = ["flask (>=1.0.4,<3)", "itsdangerous (>=0.24)"] -swagger-ui = ["swagger-ui-bundle (>=0.0.2,<0.1)"] -tests = ["MarkupSafe (>=0.23)", "aiohttp (>=2.3.10,<4)", "aiohttp-jinja2 (>=0.14.0,<2)", "aiohttp-remotes", "decorator (>=5,<6)", "flask (>=1.0.4,<3)", "itsdangerous (>=0.24)", "pytest (>=6,<7)", "pytest-aiohttp", "pytest-cov (>=2,<3)", "swagger-ui-bundle (>=0.0.2,<0.1)", "testfixtures (>=6,<7)"] - [[package]] name = "contourpy" version = "1.2.1" @@ -1493,35 +793,6 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1 [package.extras] toml = ["tomli"] -[[package]] -name = "cron-descriptor" -version = "1.4.3" -description = "A Python library that converts cron expressions into human readable strings." -optional = false -python-versions = "*" -files = [ - {file = "cron_descriptor-1.4.3-py3-none-any.whl", hash = "sha256:a67ba21804983b1427ed7f3e1ec27ee77bf24c652b0430239c268c5ddfbf9dc0"}, - {file = "cron_descriptor-1.4.3.tar.gz", hash = "sha256:7b1a00d7d25d6ae6896c0da4457e790b98cba778398a3d48e341e5e0d33f0488"}, -] - -[package.extras] -dev = ["polib"] - -[[package]] -name = "croniter" -version = "2.0.5" -description = "croniter provides iteration for datetime object with cron like format" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.6" -files = [ - {file = "croniter-2.0.5-py2.py3-none-any.whl", hash = "sha256:fdbb44920944045cc323db54599b321325141d82d14fa7453bc0699826bbe9ed"}, - {file = "croniter-2.0.5.tar.gz", hash = "sha256:f1f8ca0af64212fbe99b1bee125ee5a1b53a9c1b433968d8bca8817b79d237f3"}, -] - -[package.dependencies] -python-dateutil = "*" -pytz = ">2021.1" - [[package]] name = "cryptography" version = "41.0.7" @@ -1717,26 +988,6 @@ files = [ {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, ] -[[package]] -name = "dnspython" -version = "2.6.1" -description = "DNS toolkit" -optional = false -python-versions = ">=3.8" -files = [ - {file = "dnspython-2.6.1-py3-none-any.whl", hash = "sha256:5ef3b9680161f6fa89daf8ad451b5f1a33b18ae8a1c6778cdf4b43f08c0a6e50"}, - {file = "dnspython-2.6.1.tar.gz", hash = "sha256:e8f0f9c23a7b7cb99ded64e6c3a6f3e701d78f50c55e002b839dea7225cff7cc"}, -] - -[package.extras] -dev = ["black (>=23.1.0)", "coverage (>=7.0)", "flake8 (>=7)", "mypy (>=1.8)", "pylint (>=3)", "pytest (>=7.4)", "pytest-cov (>=4.1.0)", "sphinx (>=7.2.0)", "twine (>=4.0.0)", "wheel (>=0.42.0)"] -dnssec = ["cryptography (>=41)"] -doh = ["h2 (>=4.1.0)", "httpcore (>=1.0.0)", "httpx (>=0.26.0)"] -doq = ["aioquic (>=0.9.25)"] -idna = ["idna (>=3.6)"] -trio = ["trio (>=0.23)"] -wmi = ["wmi (>=1.5.1)"] - [[package]] name = "docker-pycreds" version = "0.4.0" @@ -1751,17 +1002,6 @@ files = [ [package.dependencies] six = ">=1.4.0" -[[package]] -name = "docstring-parser" -version = "0.16" -description = "Parse Python docstrings in reST, Google and Numpydoc format" -optional = false -python-versions = ">=3.6,<4.0" -files = [ - {file = "docstring_parser-0.16-py3-none-any.whl", hash = "sha256:bf0a1387354d3691d102edef7ec124f219ef639982d096e26e3b60aeffa90637"}, - {file = "docstring_parser-0.16.tar.gz", hash = "sha256:538beabd0af1e2db0146b6bd3caa526c35a34d61af9fd2887f3a8a27a739aa6e"}, -] - [[package]] name = "docstring-parser-fork" version = "0.0.8" @@ -1774,49 +1014,23 @@ files = [ ] [[package]] -name = "docutils" -version = "0.21.2" -description = "Docutils -- Python Documentation Utilities" +name = "exceptiongroup" +version = "1.2.1" +description = "Backport of PEP 654 (exception groups)" optional = false -python-versions = ">=3.9" +python-versions = ">=3.7" files = [ - {file = "docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2"}, - {file = "docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f"}, + {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, + {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, ] +[package.extras] +test = ["pytest (>=6)"] + [[package]] -name = "email-validator" -version = "2.2.0" -description = "A robust email address syntax and deliverability validation library." -optional = false -python-versions = ">=3.8" -files = [ - {file = "email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631"}, - {file = "email_validator-2.2.0.tar.gz", hash = "sha256:cb690f344c617a714f22e66ae771445a1ceb46821152df8e165c5f9a364582b7"}, -] - -[package.dependencies] -dnspython = ">=2.0.0" -idna = ">=2.0.0" - -[[package]] -name = "exceptiongroup" -version = "1.2.1" -description = "Backport of PEP 654 (exception groups)" -optional = false -python-versions = ">=3.7" -files = [ - {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, - {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, -] - -[package.extras] -test = ["pytest (>=6)"] - -[[package]] -name = "execnet" -version = "2.1.1" -description = "execnet: rapid multi-Python deployment" +name = "execnet" +version = "2.1.1" +description = "execnet: rapid multi-Python deployment" optional = false python-versions = ">=3.8" files = [ @@ -1873,208 +1087,6 @@ mccabe = ">=0.7.0,<0.8.0" pycodestyle = ">=2.12.0,<2.13.0" pyflakes = ">=3.2.0,<3.3.0" -[[package]] -name = "flask" -version = "2.2.5" -description = "A simple framework for building complex web applications." -optional = false -python-versions = ">=3.7" -files = [ - {file = "Flask-2.2.5-py3-none-any.whl", hash = "sha256:58107ed83443e86067e41eff4631b058178191a355886f8e479e347fa1285fdf"}, - {file = "Flask-2.2.5.tar.gz", hash = "sha256:edee9b0a7ff26621bd5a8c10ff484ae28737a2410d99b0bb9a6850c7fb977aa0"}, -] - -[package.dependencies] -click = ">=8.0" -itsdangerous = ">=2.0" -Jinja2 = ">=3.0" -Werkzeug = ">=2.2.2" - -[package.extras] -async = ["asgiref (>=3.2)"] -dotenv = ["python-dotenv"] - -[[package]] -name = "flask-appbuilder" -version = "4.4.1" -description = "Simple and rapid application development framework, built on top of Flask. includes detailed security, auto CRUD generation for your models, google charts and much more." -optional = false -python-versions = "~=3.7" -files = [ - {file = "Flask-AppBuilder-4.4.1.tar.gz", hash = "sha256:a64d4c3b5197547744c7c41f7eb0fe0206ba1677369ce47903dd08c3c9b753bd"}, - {file = "Flask_AppBuilder-4.4.1-py3-none-any.whl", hash = "sha256:6ebdb384a23c0e111736ac36f6de04f02d40ac2976feedbdd473d8ba0201dd92"}, -] - -[package.dependencies] -apispec = {version = ">=6.0.0,<7", extras = ["yaml"]} -click = ">=8,<9" -colorama = ">=0.3.9,<1" -email-validator = ">=1.0.5" -Flask = ">=2,<3.0.0" -Flask-Babel = ">=1,<3" -Flask-JWT-Extended = ">=4.0.0,<5.0.0" -Flask-Limiter = ">3,<4" -Flask-Login = ">=0.3,<0.7" -Flask-SQLAlchemy = ">=2.4,<3" -Flask-WTF = ">=0.14.2,<2" -jsonschema = ">=3,<5" -marshmallow = ">=3.18.0,<4" -marshmallow-sqlalchemy = ">=0.22.0,<0.29.0" -prison = ">=0.2.1,<1.0.0" -PyJWT = ">=2.0.0,<3.0.0" -python-dateutil = ">=2.3,<3" -SQLAlchemy = "<1.5" -sqlalchemy-utils = ">=0.32.21,<1" -werkzeug = "<4" -WTForms = "<4" - -[package.extras] -jmespath = ["jmespath (>=0.9.5)"] -oauth = ["Authlib (>=0.14,<2.0.0)"] -openid = ["Flask-OpenID (>=1.2.5,<2)"] -talisman = ["flask-talisman (>=1.0.0,<2.0)"] - -[[package]] -name = "flask-babel" -version = "2.0.0" -description = "Adds i18n/l10n support to Flask applications" -optional = false -python-versions = "*" -files = [ - {file = "Flask-Babel-2.0.0.tar.gz", hash = "sha256:f9faf45cdb2e1a32ea2ec14403587d4295108f35017a7821a2b1acb8cfd9257d"}, - {file = "Flask_Babel-2.0.0-py3-none-any.whl", hash = "sha256:e6820a052a8d344e178cdd36dd4bb8aea09b4bda3d5f9fa9f008df2c7f2f5468"}, -] - -[package.dependencies] -Babel = ">=2.3" -Flask = "*" -Jinja2 = ">=2.5" -pytz = "*" - -[package.extras] -dev = ["Pallets-Sphinx-Themes", "bumpversion", "ghp-import", "pytest", "pytest-mock", "sphinx"] - -[[package]] -name = "flask-caching" -version = "2.3.0" -description = "Adds caching support to Flask applications." -optional = false -python-versions = ">=3.8" -files = [ - {file = "Flask_Caching-2.3.0-py3-none-any.whl", hash = "sha256:51771c75682e5abc1483b78b96d9131d7941dc669b073852edfa319dd4e29b6e"}, - {file = "flask_caching-2.3.0.tar.gz", hash = "sha256:d7e4ca64a33b49feb339fcdd17e6ba25f5e01168cf885e53790e885f83a4d2cf"}, -] - -[package.dependencies] -cachelib = ">=0.9.0,<0.10.0" -Flask = "*" - -[[package]] -name = "flask-jwt-extended" -version = "4.6.0" -description = "Extended JWT integration with Flask" -optional = false -python-versions = ">=3.7,<4" -files = [ - {file = "Flask-JWT-Extended-4.6.0.tar.gz", hash = "sha256:9215d05a9413d3855764bcd67035e75819d23af2fafb6b55197eb5a3313fdfb2"}, - {file = "Flask_JWT_Extended-4.6.0-py2.py3-none-any.whl", hash = "sha256:63a28fc9731bcc6c4b8815b6f954b5904caa534fc2ae9b93b1d3ef12930dca95"}, -] - -[package.dependencies] -Flask = ">=2.0,<4.0" -PyJWT = ">=2.0,<3.0" -Werkzeug = ">=0.14" - -[package.extras] -asymmetric-crypto = ["cryptography (>=3.3.1)"] - -[[package]] -name = "flask-limiter" -version = "3.7.0" -description = "Rate limiting for flask applications" -optional = false -python-versions = ">=3.8" -files = [ - {file = "Flask_Limiter-3.7.0-py3-none-any.whl", hash = "sha256:4318382f17ecb09848bc6d0f7bc4bb1bf89bcf162200bf47b7b969126693bfda"}, - {file = "flask_limiter-3.7.0.tar.gz", hash = "sha256:e474462505f6dd0d776db16c46092e9a065ebcb30b10aed0caf54c6b9a4a471a"}, -] - -[package.dependencies] -Flask = ">=2" -limits = ">=2.8" -ordered-set = ">4,<5" -rich = ">=12,<14" -typing-extensions = ">=4" - -[package.extras] -memcached = ["limits[memcached]"] -mongodb = ["limits[mongodb]"] -redis = ["limits[redis]"] - -[[package]] -name = "flask-login" -version = "0.6.3" -description = "User authentication and session management for Flask." -optional = false -python-versions = ">=3.7" -files = [ - {file = "Flask-Login-0.6.3.tar.gz", hash = "sha256:5e23d14a607ef12806c699590b89d0f0e0d67baeec599d75947bf9c147330333"}, - {file = "Flask_Login-0.6.3-py3-none-any.whl", hash = "sha256:849b25b82a436bf830a054e74214074af59097171562ab10bfa999e6b78aae5d"}, -] - -[package.dependencies] -Flask = ">=1.0.4" -Werkzeug = ">=1.0.1" - -[[package]] -name = "flask-session" -version = "0.5.0" -description = "Server-side session support for Flask" -optional = false -python-versions = ">=3.7" -files = [ - {file = "Flask-Session-0.5.0.tar.gz", hash = "sha256:190875e6aebf2953c6803d42379ef3b934bc209ef8ef006f97aecb08f5aaeb86"}, - {file = "flask_session-0.5.0-py3-none-any.whl", hash = "sha256:1619bcbc16f04f64e90f8e0b17145ba5c9700090bb1294e889956c1282d58631"}, -] - -[package.dependencies] -cachelib = "*" -flask = ">=2.2" - -[[package]] -name = "flask-sqlalchemy" -version = "2.5.1" -description = "Adds SQLAlchemy support to your Flask application." -optional = false -python-versions = ">= 2.7, != 3.0.*, != 3.1.*, != 3.2.*, != 3.3.*" -files = [ - {file = "Flask-SQLAlchemy-2.5.1.tar.gz", hash = "sha256:2bda44b43e7cacb15d4e05ff3cc1f8bc97936cc464623424102bfc2c35e95912"}, - {file = "Flask_SQLAlchemy-2.5.1-py2.py3-none-any.whl", hash = "sha256:f12c3d4cc5cc7fdcc148b9527ea05671718c3ea45d50c7e732cceb33f574b390"}, -] - -[package.dependencies] -Flask = ">=0.10" -SQLAlchemy = ">=0.8.0" - -[[package]] -name = "flask-wtf" -version = "1.2.1" -description = "Form rendering, validation, and CSRF protection for Flask with WTForms." -optional = false -python-versions = ">=3.8" -files = [ - {file = "flask_wtf-1.2.1-py3-none-any.whl", hash = "sha256:fa6793f2fb7e812e0fe9743b282118e581fb1b6c45d414b8af05e659bd653287"}, - {file = "flask_wtf-1.2.1.tar.gz", hash = "sha256:8bb269eb9bb46b87e7c8233d7e7debdf1f8b74bf90cc1789988c29b37a97b695"}, -] - -[package.dependencies] -flask = "*" -itsdangerous = "*" -wtforms = "*" - -[package.extras] -email = ["email-validator"] - [[package]] name = "frozenlist" version = "1.4.1" @@ -2200,56 +1212,6 @@ test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe, test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"] tqdm = ["tqdm"] -[[package]] -name = "gcloud-aio-auth" -version = "4.2.3" -description = "Python Client for Google Cloud Auth" -optional = false -python-versions = ">=3.7,<4.0" -files = [ - {file = "gcloud_aio_auth-4.2.3-py3-none-any.whl", hash = "sha256:e4adadd36e35eeeb8537b926840372c3080c2f5a6909d44aa1bacbced2260bb1"}, - {file = "gcloud_aio_auth-4.2.3.tar.gz", hash = "sha256:8e12297c5b45cfc20d629b83e1233f83a1c7d5f830f24f31bc5bb8816c0cda1b"}, -] - -[package.dependencies] -aiohttp = ">=3.3.0,<4.0.0" -backoff = ">=1.0.0,<3.0.0" -chardet = ">=2.0,<6.0" -cryptography = ">=2.0.0,<42.0.0" -pyjwt = ">=1.5.3,<3.0.0" -setuptools = ">=66.0.0,<67.0.0" - -[[package]] -name = "gcloud-aio-bigquery" -version = "7.1.0" -description = "Python Client for Google Cloud BigQuery" -optional = false -python-versions = ">=3.8,<4.0" -files = [ - {file = "gcloud_aio_bigquery-7.1.0-py3-none-any.whl", hash = "sha256:524ae3cc14c1af6977a358829cc673b4471159caa7d62bba7f2d9334262bcd4a"}, - {file = "gcloud_aio_bigquery-7.1.0.tar.gz", hash = "sha256:4a3c775c2677c0588e9caeb2df40d81a54b31c174e562a527cb08e023c4408a3"}, -] - -[package.dependencies] -gcloud-aio-auth = ">=3.1.0,<6.0.0" - -[[package]] -name = "gcloud-aio-storage" -version = "9.2.0" -description = "Python Client for Google Cloud Storage" -optional = false -python-versions = ">=3.8,<4.0" -files = [ - {file = "gcloud_aio_storage-9.2.0-py3-none-any.whl", hash = "sha256:0a8cc27223cea05ad27117fa574ce8697ec32a252382cbfdfe54df0678e92d03"}, - {file = "gcloud_aio_storage-9.2.0.tar.gz", hash = "sha256:47be865222d22fdf873cd5a4af9a285d7b64b176d4d92f1773ecacd52a6a8c6f"}, -] - -[package.dependencies] -aiofiles = ">=0.6.0,<24.0.0" -gcloud-aio-auth = ">=3.6.0,<6.0.0" -pyasn1-modules = ">=0.2.1,<0.4.0" -rsa = ">=3.1.4,<5.0.0" - [[package]] name = "gcsfs" version = "2024.6.0" @@ -2324,944 +1286,191 @@ doc = ["sphinx (==4.3.2)", "sphinx-autodoc-typehints", "sphinx-rtd-theme", "sphi test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"] [[package]] -name = "google" -version = "3.0.0" -description = "Python bindings to the Google search engine." -optional = false -python-versions = "*" -files = [ - {file = "google-3.0.0-py2.py3-none-any.whl", hash = "sha256:889cf695f84e4ae2c55fbc0cfdaf4c1e729417fa52ab1db0485202ba173e4935"}, - {file = "google-3.0.0.tar.gz", hash = "sha256:143530122ee5130509ad5e989f0512f7cb218b2d4eddbafbad40fd10e8d8ccbe"}, -] - -[package.dependencies] -beautifulsoup4 = "*" - -[[package]] -name = "google-ads" -version = "22.1.0" -description = "Client library for the Google Ads API" -optional = false -python-versions = ">=3.7, <3.12" -files = [ - {file = "google-ads-22.1.0.tar.gz", hash = "sha256:cfab38b40eb8424a4a514823bd8b911a57ef55dd64e2112cfa46a70d8090de98"}, - {file = "google_ads-22.1.0-py3-none-any.whl", hash = "sha256:6fdd3fb635678fbb3c8f87271afc81f0e139882b83b48505160fc4daacf33ad0"}, -] - -[package.dependencies] -google-api-core = ">=2.8.0,<=3.0.0" -google-auth-oauthlib = ">=0.3.0,<2.0.0" -googleapis-common-protos = ">=1.56.0,<2.0.0" -grpcio = ">=1.38.1,<2.0.0" -grpcio-status = ">=1.38.1,<2.0.0" -proto-plus = ">=1.19.6,<2.0.0" -protobuf = ">=3.12.0,<3.18.dev0 || >=3.20.dev0,<5.0.0" -PyYAML = ">=5.1,<7.0" -setuptools = ">=40.3.0" - -[package.extras] -tests = ["nox (>=2020.12.31,<2022.6)"] - -[[package]] -name = "google-analytics-admin" -version = "0.22.7" -description = "Google Analytics Admin API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-analytics-admin-0.22.7.tar.gz", hash = "sha256:9546afaddf7ee275ec4729de6da8b15f27d4d245ee8896f80800e22572fc7987"}, - {file = "google_analytics_admin-0.22.7-py2.py3-none-any.whl", hash = "sha256:09463653ecb42306c34fb07c9ff27193737aae8e63c52b8f695bf705e8589b46"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-api-core" -version = "2.19.0" -description = "Google API client core library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-api-core-2.19.0.tar.gz", hash = "sha256:cf1b7c2694047886d2af1128a03ae99e391108a08804f87cfd35970e49c9cd10"}, - {file = "google_api_core-2.19.0-py3-none-any.whl", hash = "sha256:8661eec4078c35428fd3f69a2c7ee29e342896b70f01d1a1cbcb334372dd6251"}, -] - -[package.dependencies] -google-auth = ">=2.14.1,<3.0.dev0" -googleapis-common-protos = ">=1.56.2,<2.0.dev0" -grpcio = {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""} -grpcio-status = {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "extra == \"grpc\""} -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" -requests = ">=2.18.0,<3.0.0.dev0" - -[package.extras] -grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] -grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] -grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] - -[[package]] -name = "google-api-python-client" -version = "2.134.0" -description = "Google API Client Library for Python" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-api-python-client-2.134.0.tar.gz", hash = "sha256:4a8f0bea651a212997cc83c0f271fc86f80ef93d1cee9d84de7dfaeef2a858b6"}, - {file = "google_api_python_client-2.134.0-py2.py3-none-any.whl", hash = "sha256:ba05d60f6239990b7994f6328f17bb154c602d31860fb553016dc9f8ce886945"}, -] - -[package.dependencies] -google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0.dev0" -google-auth = ">=1.32.0,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0.dev0" -google-auth-httplib2 = ">=0.2.0,<1.0.0" -httplib2 = ">=0.19.0,<1.dev0" -uritemplate = ">=3.0.1,<5" - -[[package]] -name = "google-auth" -version = "2.30.0" -description = "Google Authentication Library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-auth-2.30.0.tar.gz", hash = "sha256:ab630a1320f6720909ad76a7dbdb6841cdf5c66b328d690027e4867bdfb16688"}, - {file = "google_auth-2.30.0-py2.py3-none-any.whl", hash = "sha256:8df7da660f62757388b8a7f249df13549b3373f24388cb5d2f1dd91cc18180b5"}, -] - -[package.dependencies] -cachetools = ">=2.0.0,<6.0" -pyasn1-modules = ">=0.2.1" -rsa = ">=3.1.4,<5" - -[package.extras] -aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"] -enterprise-cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"] -pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] -reauth = ["pyu2f (>=0.1.5)"] -requests = ["requests (>=2.20.0,<3.0.0.dev0)"] - -[[package]] -name = "google-auth-httplib2" -version = "0.2.0" -description = "Google Authentication Library: httplib2 transport" -optional = false -python-versions = "*" -files = [ - {file = "google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05"}, - {file = "google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d"}, -] - -[package.dependencies] -google-auth = "*" -httplib2 = ">=0.19.0" - -[[package]] -name = "google-auth-oauthlib" -version = "0.8.0" -description = "Google Authentication Library" -optional = false -python-versions = ">=3.6" -files = [ - {file = "google-auth-oauthlib-0.8.0.tar.gz", hash = "sha256:81056a310fb1c4a3e5a7e1a443e1eb96593c6bbc55b26c0261e4d3295d3e6593"}, - {file = "google_auth_oauthlib-0.8.0-py2.py3-none-any.whl", hash = "sha256:40cc612a13c3336d5433e94e2adb42a0c88f6feb6c55769e44500fc70043a576"}, -] - -[package.dependencies] -google-auth = ">=2.15.0" -requests-oauthlib = ">=0.7.0" - -[package.extras] -tool = ["click (>=6.0.0)"] - -[[package]] -name = "google-cloud-aiplatform" -version = "1.56.0" -description = "Vertex AI API client library" -optional = false -python-versions = ">=3.8" -files = [ - {file = "google-cloud-aiplatform-1.56.0.tar.gz", hash = "sha256:d4cfb085427dac01142915f523949ac2955d6c7f148d95017d3286a77caf5d5e"}, - {file = "google_cloud_aiplatform-1.56.0-py2.py3-none-any.whl", hash = "sha256:ee1ab3bd115c3caebf8ddfd3e47eeb8396a3ec2fc5f5baf1a5c295c8d64333ab"}, -] - -[package.dependencies] -docstring-parser = "<1" -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.8.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<3.0.0dev" -google-cloud-bigquery = ">=1.15.0,<3.20.0 || >3.20.0,<4.0.0dev" -google-cloud-resource-manager = ">=1.3.3,<3.0.0dev" -google-cloud-storage = ">=1.32.0,<3.0.0dev" -packaging = ">=14.3" -proto-plus = ">=1.22.0,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" -pydantic = "<3" -shapely = "<3.0.0dev" - -[package.extras] -autologging = ["mlflow (>=1.27.0,<=2.1.1)"] -cloud-profiler = ["tensorboard-plugin-profile (>=2.4.0,<3.0.0dev)", "tensorflow (>=2.4.0,<3.0.0dev)", "werkzeug (>=2.0.0,<2.1.0dev)"] -datasets = ["pyarrow (>=10.0.1)", "pyarrow (>=14.0.0)", "pyarrow (>=3.0.0,<8.0dev)"] -endpoint = ["requests (>=2.28.1)"] -full = ["cloudpickle (<3.0)", "docker (>=5.0.3)", "explainable-ai-sdk (>=1.0.0)", "fastapi (>=0.71.0,<=0.109.1)", "google-cloud-bigquery", "google-cloud-bigquery-storage", "google-cloud-logging (<4.0)", "google-vizier (>=0.1.6)", "httpx (>=0.23.0,<0.25.0)", "immutabledict", "lit-nlp (==0.4.0)", "mlflow (>=1.27.0,<=2.1.1)", "nest-asyncio (>=1.0.0,<1.6.0)", "numpy (>=1.15.0)", "pandas (>=1.0.0)", "pandas (>=1.0.0,<2.2.0)", "pyarrow (>=10.0.1)", "pyarrow (>=14.0.0)", "pyarrow (>=3.0.0,<8.0dev)", "pyarrow (>=6.0.1)", "pydantic (<2)", "pyyaml (>=5.3.1,<7)", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<=2.9.3)", "ray[default] (>=2.5,<=2.9.3)", "requests (>=2.28.1)", "setuptools (<70.0.0)", "starlette (>=0.17.1)", "tensorboard-plugin-profile (>=2.4.0,<3.0.0dev)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.4.0,<3.0.0dev)", "urllib3 (>=1.21.1,<1.27)", "uvicorn[standard] (>=0.16.0)", "werkzeug (>=2.0.0,<2.1.0dev)"] -langchain = ["langchain (>=0.1.16,<0.3)", "langchain-core (<0.2)", "langchain-google-vertexai (<2)", "openinference-instrumentation-langchain (>=0.1.19,<0.2)", "tenacity (<=8.3)"] -langchain-testing = ["absl-py", "cloudpickle (>=3.0,<4.0)", "langchain (>=0.1.16,<0.3)", "langchain-core (<0.2)", "langchain-google-vertexai (<2)", "openinference-instrumentation-langchain (>=0.1.19,<0.2)", "opentelemetry-exporter-gcp-trace (<2)", "opentelemetry-sdk (<2)", "pydantic (>=2.6.3,<3)", "pytest-xdist", "tenacity (<=8.3)"] -lit = ["explainable-ai-sdk (>=1.0.0)", "lit-nlp (==0.4.0)", "pandas (>=1.0.0)", "tensorflow (>=2.3.0,<3.0.0dev)"] -metadata = ["numpy (>=1.15.0)", "pandas (>=1.0.0)"] -pipelines = ["pyyaml (>=5.3.1,<7)"] -prediction = ["docker (>=5.0.3)", "fastapi (>=0.71.0,<=0.109.1)", "httpx (>=0.23.0,<0.25.0)", "starlette (>=0.17.1)", "uvicorn[standard] (>=0.16.0)"] -preview = ["cloudpickle (<3.0)", "google-cloud-logging (<4.0)"] -private-endpoints = ["requests (>=2.28.1)", "urllib3 (>=1.21.1,<1.27)"] -rapid-evaluation = ["nest-asyncio (>=1.0.0,<1.6.0)", "pandas (>=1.0.0,<2.2.0)"] -ray = ["google-cloud-bigquery", "google-cloud-bigquery-storage", "immutabledict", "pandas (>=1.0.0,<2.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<=2.9.3)", "ray[default] (>=2.5,<=2.9.3)", "setuptools (<70.0.0)"] -ray-testing = ["google-cloud-bigquery", "google-cloud-bigquery-storage", "immutabledict", "pandas (>=1.0.0,<2.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "pytest-xdist", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<=2.9.3)", "ray[default] (>=2.5,<=2.9.3)", "ray[train] (==2.9.3)", "scikit-learn", "setuptools (<70.0.0)", "tensorflow", "torch (>=2.0.0,<2.1.0)", "xgboost", "xgboost-ray"] -reasoningengine = ["cloudpickle (>=3.0,<4.0)", "opentelemetry-exporter-gcp-trace (<2)", "opentelemetry-sdk (<2)", "pydantic (>=2.6.3,<3)"] -tensorboard = ["tensorboard-plugin-profile (>=2.4.0,<3.0.0dev)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.4.0,<3.0.0dev)", "werkzeug (>=2.0.0,<2.1.0dev)"] -testing = ["bigframes", "cloudpickle (<3.0)", "docker (>=5.0.3)", "explainable-ai-sdk (>=1.0.0)", "fastapi (>=0.71.0,<=0.109.1)", "google-api-core (>=2.11,<3.0.0)", "google-cloud-bigquery", "google-cloud-bigquery-storage", "google-cloud-logging (<4.0)", "google-vizier (>=0.1.6)", "grpcio-testing", "httpx (>=0.23.0,<0.25.0)", "immutabledict", "ipython", "kfp (>=2.6.0,<3.0.0)", "lit-nlp (==0.4.0)", "mlflow (>=1.27.0,<=2.1.1)", "nest-asyncio (>=1.0.0,<1.6.0)", "numpy (>=1.15.0)", "pandas (>=1.0.0)", "pandas (>=1.0.0,<2.2.0)", "pyarrow (>=10.0.1)", "pyarrow (>=14.0.0)", "pyarrow (>=3.0.0,<8.0dev)", "pyarrow (>=6.0.1)", "pydantic (<2)", "pyfakefs", "pytest-asyncio", "pytest-xdist", "pyyaml (>=5.3.1,<7)", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<=2.9.3)", "ray[default] (>=2.5,<=2.9.3)", "requests (>=2.28.1)", "requests-toolbelt (<1.0.0)", "scikit-learn", "setuptools (<70.0.0)", "starlette (>=0.17.1)", "tensorboard-plugin-profile (>=2.4.0,<3.0.0dev)", "tensorflow (==2.13.0)", "tensorflow (==2.16.1)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.4.0,<3.0.0dev)", "torch (>=2.0.0,<2.1.0)", "torch (>=2.2.0)", "urllib3 (>=1.21.1,<1.27)", "uvicorn[standard] (>=0.16.0)", "werkzeug (>=2.0.0,<2.1.0dev)", "xgboost"] -vizier = ["google-vizier (>=0.1.6)"] -xai = ["tensorflow (>=2.3.0,<3.0.0dev)"] - -[[package]] -name = "google-cloud-appengine-logging" -version = "1.4.3" -description = "Google Cloud Appengine Logging API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-appengine-logging-1.4.3.tar.gz", hash = "sha256:fb504e6199fe8de85baa9d31cecf6776877851fe58867de603317ec7cc739987"}, - {file = "google_cloud_appengine_logging-1.4.3-py2.py3-none-any.whl", hash = "sha256:8e30af51d853f219caf29e8b8b342b9ce8214b29f334dafae38d39aaaff7d372"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-audit-log" -version = "0.2.5" -description = "Google Cloud Audit Protos" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-audit-log-0.2.5.tar.gz", hash = "sha256:86e2faba3383adc8fd04a5bd7fd4f960b3e4aedaa7ed950f2f891ce16902eb6b"}, - {file = "google_cloud_audit_log-0.2.5-py2.py3-none-any.whl", hash = "sha256:18b94d4579002a450b7902cd2e8b8fdcb1ea2dd4df3b41f8f82be6d9f7fcd746"}, -] - -[package.dependencies] -googleapis-common-protos = ">=1.56.2,<2.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-automl" -version = "2.13.3" -description = "Google Cloud Automl API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-automl-2.13.3.tar.gz", hash = "sha256:891a9082eaedeb17bc5bb724fc150b7702c684d7420f9f22ae3f7c5fc611c71a"}, - {file = "google_cloud_automl-2.13.3-py2.py3-none-any.whl", hash = "sha256:8117943ad1534f1d11d40e0937ade7965c56a6d7fd2c2314e56942361727a6a2"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[package.extras] -libcst = ["libcst (>=0.2.5)"] -pandas = ["pandas (>=1.0.5)"] -storage = ["google-cloud-storage (>=1.18.0,<3.0.0dev)"] - -[[package]] -name = "google-cloud-batch" -version = "0.17.21" -description = "Google Cloud Batch API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-batch-0.17.21.tar.gz", hash = "sha256:ead5fd10553280a2fa9a05d892ff0446198ba381ad2895c88a80803e34e38a0a"}, - {file = "google_cloud_batch-0.17.21-py2.py3-none-any.whl", hash = "sha256:a202defeb82ed30479c80d13e6ef2d3952cc6f47d28bdf8376d12fa36ad407ac"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-bigquery" -version = "3.25.0" -description = "Google BigQuery API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-bigquery-3.25.0.tar.gz", hash = "sha256:5b2aff3205a854481117436836ae1403f11f2594e6810a98886afd57eda28509"}, - {file = "google_cloud_bigquery-3.25.0-py2.py3-none-any.whl", hash = "sha256:7f0c371bc74d2a7fb74dacbc00ac0f90c8c2bec2289b51dd6685a275873b1ce9"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<3.0.0dev" -google-cloud-core = ">=1.6.0,<3.0.0dev" -google-resumable-media = ">=0.6.0,<3.0dev" -packaging = ">=20.0.0" -python-dateutil = ">=2.7.2,<3.0dev" -requests = ">=2.21.0,<3.0.0dev" - -[package.extras] -all = ["Shapely (>=1.8.4,<3.0.0dev)", "db-dtypes (>=0.3.0,<2.0.0dev)", "geopandas (>=0.9.0,<1.0dev)", "google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "importlib-metadata (>=1.0.0)", "ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)", "ipywidgets (>=7.7.0)", "opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)", "pandas (>=1.1.0)", "proto-plus (>=1.15.0,<2.0.0dev)", "protobuf (>=3.19.5,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev)", "pyarrow (>=3.0.0)", "tqdm (>=4.7.4,<5.0.0dev)"] -bigquery-v2 = ["proto-plus (>=1.15.0,<2.0.0dev)", "protobuf (>=3.19.5,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev)"] -bqstorage = ["google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "pyarrow (>=3.0.0)"] -geopandas = ["Shapely (>=1.8.4,<3.0.0dev)", "geopandas (>=0.9.0,<1.0dev)"] -ipython = ["ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)"] -ipywidgets = ["ipykernel (>=6.0.0)", "ipywidgets (>=7.7.0)"] -opentelemetry = ["opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)"] -pandas = ["db-dtypes (>=0.3.0,<2.0.0dev)", "importlib-metadata (>=1.0.0)", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)"] -tqdm = ["tqdm (>=4.7.4,<5.0.0dev)"] - -[[package]] -name = "google-cloud-bigquery-datatransfer" -version = "3.15.3" -description = "Google Cloud Bigquery Datatransfer API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-bigquery-datatransfer-3.15.3.tar.gz", hash = "sha256:e4c84eed31209c43b43f08dda7307eec0666a2431d8c087acf4a682ad41f3ab1"}, - {file = "google_cloud_bigquery_datatransfer-3.15.3-py2.py3-none-any.whl", hash = "sha256:cb8a0bf980d7d9386af638c42c006093f948147fc4bca3f11f724969517651dd"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-bigtable" -version = "2.24.0" -description = "Google Cloud Bigtable API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-bigtable-2.24.0.tar.gz", hash = "sha256:ace75f62ca3c52d6619d4ff7aed982129cae508baf776d81b33313f4f9ea5ed4"}, - {file = "google_cloud_bigtable-2.24.0-py2.py3-none-any.whl", hash = "sha256:09f35c1afcd57fec405ca5713919f09122a9beaf60a4f6952af6d4ac03065c91"}, -] - -[package.dependencies] -google-api-core = {version = ">=2.16.0,<3.0.0dev", extras = ["grpc"]} -google-cloud-core = ">=1.4.4,<3.0.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -proto-plus = ">=1.22.0,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[package.extras] -libcst = ["libcst (>=0.2.5)"] - -[[package]] -name = "google-cloud-build" -version = "3.24.0" -description = "Google Cloud Build API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-build-3.24.0.tar.gz", hash = "sha256:dbaf1c0df4bd8579a16b985332412847a3fd6f58bd2b72b38fbd791b80d0a900"}, - {file = "google_cloud_build-3.24.0-py2.py3-none-any.whl", hash = "sha256:1ba689730b5e27b8e226f3c7ba38e5af006321dcfbae211f995f26f9294fb19a"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-compute" -version = "1.19.0" -description = "Google Cloud Compute API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-compute-1.19.0.tar.gz", hash = "sha256:a07b3408b3f77786dcb196669ff3767bc51080718a7c2d3fd779defc8d817b4d"}, - {file = "google_cloud_compute-1.19.0-py2.py3-none-any.whl", hash = "sha256:48d5582dd257fc817113556ab40990ec7a50fb6b2e341bc8a55ef90e2e66501d"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-container" -version = "2.47.0" -description = "Google Cloud Container API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-container-2.47.0.tar.gz", hash = "sha256:b6fcce293bb69f96fd243a3d109c38f0152b26e3b0391f492ba3d0ca2e381df2"}, - {file = "google_cloud_container-2.47.0-py2.py3-none-any.whl", hash = "sha256:327e7b3ff68dbaca367c1840e8c2b0fc8a3d922264faf96dd0233c98785ee42a"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-core" -version = "2.4.1" -description = "Google Cloud API client core library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-core-2.4.1.tar.gz", hash = "sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073"}, - {file = "google_cloud_core-2.4.1-py2.py3-none-any.whl", hash = "sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61"}, -] - -[package.dependencies] -google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0dev" -google-auth = ">=1.25.0,<3.0dev" - -[package.extras] -grpc = ["grpcio (>=1.38.0,<2.0dev)", "grpcio-status (>=1.38.0,<2.0.dev0)"] - -[[package]] -name = "google-cloud-datacatalog" -version = "3.19.0" -description = "Google Cloud Datacatalog API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-datacatalog-3.19.0.tar.gz", hash = "sha256:3a8d0ef787f0edd4703e7e27c91a1b5f85bdbe04d61609132ec77269f7d52692"}, - {file = "google_cloud_datacatalog-3.19.0-py2.py3-none-any.whl", hash = "sha256:c76ccf0d98d150c2feb1552518f6efde441c820664742ccbe4daf749e009f58d"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-dataflow-client" -version = "0.8.10" -description = "Google Cloud Dataflow Client API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-dataflow-client-0.8.10.tar.gz", hash = "sha256:6aa8178743bce4b6c9142b492e649db84f55db70a88ec29a3d889054c4752144"}, - {file = "google_cloud_dataflow_client-0.8.10-py2.py3-none-any.whl", hash = "sha256:4b0dce886ec65fbf3195f87e956d559f6d4bd88a03b527aa8498c303e2d4d39a"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-dataform" -version = "0.5.9" -description = "Google Cloud Dataform API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-dataform-0.5.9.tar.gz", hash = "sha256:2733c3fbda4f0134629e2d6554db93e1840683567043906268718b1a874eb9ac"}, - {file = "google_cloud_dataform-0.5.9-py2.py3-none-any.whl", hash = "sha256:c94e61358a68de389803bb06b936f815108211b3324e1f143db9a8bd2bb6a081"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-dataplex" -version = "2.0.1" -description = "Google Cloud Dataplex API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-dataplex-2.0.1.tar.gz", hash = "sha256:f4ccb1f76eb7b8a2ae01cdcb2041bb613045d262b57bd65d2c7522e766923c15"}, - {file = "google_cloud_dataplex-2.0.1-py2.py3-none-any.whl", hash = "sha256:ebe732dcf54b372c4af8ee40e2f43e3eff329b98d2060b1a803a0e9e656f5da6"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-dataproc" -version = "5.10.1" -description = "Google Cloud Dataproc API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-dataproc-5.10.1.tar.gz", hash = "sha256:f3f0f0f3933328e80273774540368432550e296c255928657069a31a2de01c39"}, - {file = "google_cloud_dataproc-5.10.1-py2.py3-none-any.whl", hash = "sha256:28b763c9b019ca7d7c3e917ade04647c00494e77d4e682ca221d53e8d36f70af"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev" - -[[package]] -name = "google-cloud-dataproc-metastore" -version = "1.15.3" -description = "Google Cloud Dataproc Metastore API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-dataproc-metastore-1.15.3.tar.gz", hash = "sha256:1f8a5a66e43a4d1c119e9be225ba66b041f432d046b5c4da6717fbd56fcfb4f5"}, - {file = "google_cloud_dataproc_metastore-1.15.3-py2.py3-none-any.whl", hash = "sha256:871dcea73ae2ab2ecfb6da1fe1bc82206eee817b744755f7b24aaef554b8d896"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-dlp" -version = "3.18.0" -description = "Google Cloud Dlp API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-dlp-3.18.0.tar.gz", hash = "sha256:47408cc75e8398c7dce93c3f62a3ed8582b20f125cc8e0dd331015ae3eb62f7e"}, - {file = "google_cloud_dlp-3.18.0-py2.py3-none-any.whl", hash = "sha256:68c5c3767b9d96597b6c51c97fe4d824439ca71622048b3d44b89ed7d6eecf0d"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-kms" -version = "2.23.0" -description = "Google Cloud Kms API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-kms-2.23.0.tar.gz", hash = "sha256:52b5fc6f70b163516fad610aafc088fbe788f501d5bbe1d5f458cdc3fcbafd2c"}, - {file = "google_cloud_kms-2.23.0-py2.py3-none-any.whl", hash = "sha256:cbf68592a5626ac4de43d8149f7933e9b9bd311e1c88d0d16d9e56a49d7a21c0"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-language" -version = "2.13.3" -description = "Google Cloud Language API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-language-2.13.3.tar.gz", hash = "sha256:569d35260af906de25b8e2a76b6364e05809b8453afd89da738d4a4fcb90846f"}, - {file = "google_cloud_language-2.13.3-py2.py3-none-any.whl", hash = "sha256:b060de20d8ed2b20b7b7ebd8a46ae240df36c9977db383db5155276d2b1f6b9a"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-logging" -version = "3.10.0" -description = "Stackdriver Logging API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-logging-3.10.0.tar.gz", hash = "sha256:d93d347351240ddb14cfe201987a2d32cf9d7f478b8b2fabed3015b425b3274f"}, - {file = "google_cloud_logging-3.10.0-py2.py3-none-any.whl", hash = "sha256:132192beb45731130a2ffbcd4b2b5cbd87370e7dcfa7397ae4002154f542bd20"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -google-cloud-appengine-logging = ">=0.1.0,<2.0.0dev" -google-cloud-audit-log = ">=0.1.0,<1.0.0dev" -google-cloud-core = ">=2.0.0,<3.0.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -proto-plus = ">=1.22.0,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-memcache" -version = "1.9.3" -description = "Google Cloud Memcache API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-memcache-1.9.3.tar.gz", hash = "sha256:ee5ff95afe47efc1a0746ba13b2f56996bc018f158f6b0cad69d835b0447d6e8"}, - {file = "google_cloud_memcache-1.9.3-py2.py3-none-any.whl", hash = "sha256:d1574670dcdb861cc772e2b797f7e5fb83b9dd7ef2ce34fbe4ff54d454a816d3"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-monitoring" -version = "2.21.0" -description = "Google Cloud Monitoring API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-monitoring-2.21.0.tar.gz", hash = "sha256:e7b1c8758fc3563ffb9a347bc5172e2782f44c121bc80fc15283e289cff675bf"}, - {file = "google_cloud_monitoring-2.21.0-py2.py3-none-any.whl", hash = "sha256:1b174e656a3bfd767c269bf2ba023b40e0a80a85e36ed0b75bb272be65e76904"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[package.extras] -pandas = ["pandas (>=0.23.2)"] - -[[package]] -name = "google-cloud-orchestration-airflow" -version = "1.12.1" -description = "Google Cloud Orchestration Airflow API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-orchestration-airflow-1.12.1.tar.gz", hash = "sha256:659a72f506a9441f1938664e98cdced9a4a852dc5d8c533a5c2f822cefe72eb1"}, - {file = "google_cloud_orchestration_airflow-1.12.1-py2.py3-none-any.whl", hash = "sha256:9dab139528860c3d10b9affe6a7e34848f10e15f5800554792c1bab37f713e3d"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-os-login" -version = "2.14.3" -description = "Google Cloud Os Login API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-os-login-2.14.3.tar.gz", hash = "sha256:b1a15ec7ee306de890750822dc4aab04c64e5bcf76447590f45551bf1afcf1e8"}, - {file = "google_cloud_os_login-2.14.3-py2.py3-none-any.whl", hash = "sha256:de19bb2ade36e982256050972b5f68be34a0f4c70365c76a7cd91de3f0e7985f"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-pubsub" -version = "2.21.5" -description = "Google Cloud Pub/Sub API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-pubsub-2.21.5.tar.gz", hash = "sha256:4fa96e7f200359ccc49cf6657e31ac35f5e6e55d00fbb3cedfa672903cf75b24"}, - {file = "google_cloud_pubsub-2.21.5-py2.py3-none-any.whl", hash = "sha256:fbd6b00a1e28ea47609b2a5562aeecbaf31ad9cf4f7a83f91c3605e869c6447c"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<3.0.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -grpcio = ">=1.51.3,<2.0dev" -grpcio-status = ">=1.33.2" -proto-plus = ">=1.22.0,<2.0.0dev" -protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev" - -[package.extras] -libcst = ["libcst (>=0.3.10)"] - -[[package]] -name = "google-cloud-redis" -version = "2.15.3" -description = "Google Cloud Redis API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-redis-2.15.3.tar.gz", hash = "sha256:e6a231e45107038cfe498dfad1f6dad2ca7bdca3a931323730be03ab7a000a8f"}, - {file = "google_cloud_redis-2.15.3-py2.py3-none-any.whl", hash = "sha256:df79f6f8a81f948d2d5343a76e7c9108ab56eab9279be0763da6968f497b2bdb"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-resource-manager" -version = "1.12.3" -description = "Google Cloud Resource Manager API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-resource-manager-1.12.3.tar.gz", hash = "sha256:809851824119834e4f2310b2c4f38621c1d16b2bb14d5b9f132e69c79d355e7f"}, - {file = "google_cloud_resource_manager-1.12.3-py2.py3-none-any.whl", hash = "sha256:92be7d6959927b76d90eafc4028985c37975a46ded5466a018f02e8649e113d4"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-run" -version = "0.10.5" -description = "Google Cloud Run API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-run-0.10.5.tar.gz", hash = "sha256:d6c546693ad1232794b75c617871f2e46283c0cd0c4cc3886b85b8f0a3ba575f"}, - {file = "google_cloud_run-0.10.5-py2.py3-none-any.whl", hash = "sha256:0f68125f13d1a87f74e1e78c37fa6afb4b094d76dad2b9f951d5526648e8939c"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-secret-manager" -version = "2.20.0" -description = "Google Cloud Secret Manager API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google-cloud-secret-manager-2.20.0.tar.gz", hash = "sha256:a086a7413aaf4fffbd1c4fe9229ef0ce9bcf48f5a8df5b449c4a32deb5a2cfde"}, - {file = "google_cloud_secret_manager-2.20.0-py2.py3-none-any.whl", hash = "sha256:c20bf22e59d220c51aa84a1db3411b14b83aa71f788fae8d273c03a4bf3e77ed"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" - -[[package]] -name = "google-cloud-spanner" -version = "3.47.0" -description = "Google Cloud Spanner API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google_cloud_spanner-3.47.0-py2.py3-none-any.whl", hash = "sha256:b05fa4ffccf08af3f32c3c5c77edd1486a76ddbd42db8ad8865830fab3cfda80"}, -] - -[package.dependencies] -google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-cloud-core = ">=1.4.4,<3.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -grpc-interceptor = ">=0.15.4" -proto-plus = ">=1.22.0,<2.0.0dev" -protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" -sqlparse = ">=0.4.4" - -[package.extras] -libcst = ["libcst (>=0.2.5)"] -tracing = ["opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0,<0.23dev)", "opentelemetry-sdk (>=1.1.0)"] - -[[package]] -name = "google-cloud-speech" -version = "2.26.0" -description = "Google Cloud Speech API client library" +name = "google" +version = "3.0.0" +description = "Python bindings to the Google search engine." optional = false -python-versions = ">=3.7" +python-versions = "*" files = [ - {file = "google-cloud-speech-2.26.0.tar.gz", hash = "sha256:d3156a78496aeacff403429408a1b13efe996da6f0544a25567904ad801671d5"}, - {file = "google_cloud_speech-2.26.0-py2.py3-none-any.whl", hash = "sha256:8b61aebcbcc9bd5450933c94c431584a07667e022d12834f9037bb2c0e673c87"}, + {file = "google-3.0.0-py2.py3-none-any.whl", hash = "sha256:889cf695f84e4ae2c55fbc0cfdaf4c1e729417fa52ab1db0485202ba173e4935"}, + {file = "google-3.0.0.tar.gz", hash = "sha256:143530122ee5130509ad5e989f0512f7cb218b2d4eddbafbad40fd10e8d8ccbe"}, ] [package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" +beautifulsoup4 = "*" [[package]] -name = "google-cloud-storage" -version = "2.17.0" -description = "Google Cloud Storage API client library" +name = "google-api-core" +version = "2.19.0" +description = "Google API client core library" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-storage-2.17.0.tar.gz", hash = "sha256:49378abff54ef656b52dca5ef0f2eba9aa83dc2b2c72c78714b03a1a95fe9388"}, - {file = "google_cloud_storage-2.17.0-py2.py3-none-any.whl", hash = "sha256:5b393bc766b7a3bc6f5407b9e665b2450d36282614b7945e570b3480a456d1e1"}, + {file = "google-api-core-2.19.0.tar.gz", hash = "sha256:cf1b7c2694047886d2af1128a03ae99e391108a08804f87cfd35970e49c9cd10"}, + {file = "google_api_core-2.19.0-py3-none-any.whl", hash = "sha256:8661eec4078c35428fd3f69a2c7ee29e342896b70f01d1a1cbcb334372dd6251"}, ] [package.dependencies] -google-api-core = ">=2.15.0,<3.0.0dev" -google-auth = ">=2.26.1,<3.0dev" -google-cloud-core = ">=2.3.0,<3.0dev" -google-crc32c = ">=1.0,<2.0dev" -google-resumable-media = ">=2.6.0" -requests = ">=2.18.0,<3.0.0dev" +google-auth = ">=2.14.1,<3.0.dev0" +googleapis-common-protos = ">=1.56.2,<2.0.dev0" +grpcio = {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""} +grpcio-status = {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "extra == \"grpc\""} +proto-plus = ">=1.22.3,<2.0.0dev" +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" +requests = ">=2.18.0,<3.0.0.dev0" [package.extras] -protobuf = ["protobuf (<5.0.0dev)"] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] +grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] +grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] -name = "google-cloud-storage-transfer" -version = "1.11.3" -description = "Google Cloud Storage Transfer API client library" +name = "google-auth" +version = "2.30.0" +description = "Google Authentication Library" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-storage-transfer-1.11.3.tar.gz", hash = "sha256:c4564749492f41957f44c41b76f61f72c12b519424b841895bfe2d932c6f1490"}, - {file = "google_cloud_storage_transfer-1.11.3-py2.py3-none-any.whl", hash = "sha256:698e1b21c9dc710f543b2e6dc3c70334b872d4a80fafc0b2718fc91b6f45858e"}, + {file = "google-auth-2.30.0.tar.gz", hash = "sha256:ab630a1320f6720909ad76a7dbdb6841cdf5c66b328d690027e4867bdfb16688"}, + {file = "google_auth-2.30.0-py2.py3-none-any.whl", hash = "sha256:8df7da660f62757388b8a7f249df13549b3373f24388cb5d2f1dd91cc18180b5"}, ] [package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" +cachetools = ">=2.0.0,<6.0" +pyasn1-modules = ">=0.2.1" +rsa = ">=3.1.4,<5" + +[package.extras] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"] +enterprise-cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"] +pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] +reauth = ["pyu2f (>=0.1.5)"] +requests = ["requests (>=2.20.0,<3.0.0.dev0)"] [[package]] -name = "google-cloud-tasks" -version = "2.16.3" -description = "Google Cloud Tasks API client library" +name = "google-auth-oauthlib" +version = "0.8.0" +description = "Google Authentication Library" optional = false -python-versions = ">=3.7" +python-versions = ">=3.6" files = [ - {file = "google-cloud-tasks-2.16.3.tar.gz", hash = "sha256:d891fe7006db4d6122838aa6de4aca6e0077bab224edcae684666ae3e303c45f"}, - {file = "google_cloud_tasks-2.16.3-py2.py3-none-any.whl", hash = "sha256:a1a8473f5c76907525b51ae4c20182a42333b5c3e1dc5782d7af6c2f58241a0c"}, + {file = "google-auth-oauthlib-0.8.0.tar.gz", hash = "sha256:81056a310fb1c4a3e5a7e1a443e1eb96593c6bbc55b26c0261e4d3295d3e6593"}, + {file = "google_auth_oauthlib-0.8.0-py2.py3-none-any.whl", hash = "sha256:40cc612a13c3336d5433e94e2adb42a0c88f6feb6c55769e44500fc70043a576"}, ] [package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" +google-auth = ">=2.15.0" +requests-oauthlib = ">=0.7.0" + +[package.extras] +tool = ["click (>=6.0.0)"] [[package]] -name = "google-cloud-texttospeech" -version = "2.16.3" -description = "Google Cloud Texttospeech API client library" +name = "google-cloud-bigquery" +version = "3.25.0" +description = "Google BigQuery API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-texttospeech-2.16.3.tar.gz", hash = "sha256:fabc315032d137da0710bb4c268734d336212d8fa8316b23b277dd3a84ce721c"}, - {file = "google_cloud_texttospeech-2.16.3-py2.py3-none-any.whl", hash = "sha256:5d1e23f9270908a5d7ecf2af04105fbd3a7ddde60fe48506e397bd18c1ece499"}, + {file = "google-cloud-bigquery-3.25.0.tar.gz", hash = "sha256:5b2aff3205a854481117436836ae1403f11f2594e6810a98886afd57eda28509"}, + {file = "google_cloud_bigquery-3.25.0-py2.py3-none-any.whl", hash = "sha256:7f0c371bc74d2a7fb74dacbc00ac0f90c8c2bec2289b51dd6685a275873b1ce9"}, ] [package.dependencies] google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" +google-auth = ">=2.14.1,<3.0.0dev" +google-cloud-core = ">=1.6.0,<3.0.0dev" +google-resumable-media = ">=0.6.0,<3.0dev" +packaging = ">=20.0.0" +python-dateutil = ">=2.7.2,<3.0dev" +requests = ">=2.21.0,<3.0.0dev" + +[package.extras] +all = ["Shapely (>=1.8.4,<3.0.0dev)", "db-dtypes (>=0.3.0,<2.0.0dev)", "geopandas (>=0.9.0,<1.0dev)", "google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "importlib-metadata (>=1.0.0)", "ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)", "ipywidgets (>=7.7.0)", "opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)", "pandas (>=1.1.0)", "proto-plus (>=1.15.0,<2.0.0dev)", "protobuf (>=3.19.5,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev)", "pyarrow (>=3.0.0)", "tqdm (>=4.7.4,<5.0.0dev)"] +bigquery-v2 = ["proto-plus (>=1.15.0,<2.0.0dev)", "protobuf (>=3.19.5,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev)"] +bqstorage = ["google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "pyarrow (>=3.0.0)"] +geopandas = ["Shapely (>=1.8.4,<3.0.0dev)", "geopandas (>=0.9.0,<1.0dev)"] +ipython = ["ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)"] +ipywidgets = ["ipykernel (>=6.0.0)", "ipywidgets (>=7.7.0)"] +opentelemetry = ["opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)"] +pandas = ["db-dtypes (>=0.3.0,<2.0.0dev)", "importlib-metadata (>=1.0.0)", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)"] +tqdm = ["tqdm (>=4.7.4,<5.0.0dev)"] [[package]] -name = "google-cloud-translate" -version = "3.15.3" -description = "Google Cloud Translate API client library" +name = "google-cloud-core" +version = "2.4.1" +description = "Google Cloud API client core library" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-translate-3.15.3.tar.gz", hash = "sha256:ed587a1e60cf847c3b1add6b28256e2d1722fa73a4285334f5ec5ca933e616f2"}, - {file = "google_cloud_translate-3.15.3-py2.py3-none-any.whl", hash = "sha256:2e92cd286fb25d5cf9fc0e8710f524fdf80656b6b64947d244e91696e69346ad"}, + {file = "google-cloud-core-2.4.1.tar.gz", hash = "sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073"}, + {file = "google_cloud_core-2.4.1-py2.py3-none-any.whl", hash = "sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61"}, ] [package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -google-cloud-core = ">=1.4.4,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" +google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0dev" +google-auth = ">=1.25.0,<3.0dev" + +[package.extras] +grpc = ["grpcio (>=1.38.0,<2.0dev)", "grpcio-status (>=1.38.0,<2.0.dev0)"] [[package]] -name = "google-cloud-videointelligence" -version = "2.13.3" -description = "Google Cloud Videointelligence API client library" +name = "google-cloud-dataproc" +version = "5.10.1" +description = "Google Cloud Dataproc API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-videointelligence-2.13.3.tar.gz", hash = "sha256:1460721d8120661c477c01af0edd3db03845857f52146a4a39f16b4acfb36f6d"}, - {file = "google_cloud_videointelligence-2.13.3-py2.py3-none-any.whl", hash = "sha256:2eddfc42c4ef60378b638a5f4dfe466fadefae5cd6b1134eee377b5937be2c9e"}, + {file = "google-cloud-dataproc-5.10.1.tar.gz", hash = "sha256:f3f0f0f3933328e80273774540368432550e296c255928657069a31a2de01c39"}, + {file = "google_cloud_dataproc-5.10.1-py2.py3-none-any.whl", hash = "sha256:28b763c9b019ca7d7c3e917ade04647c00494e77d4e682ca221d53e8d36f70af"}, ] [package.dependencies] google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" +grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" +protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev" [[package]] -name = "google-cloud-vision" -version = "3.7.2" -description = "Google Cloud Vision API client library" +name = "google-cloud-secret-manager" +version = "2.20.0" +description = "Google Cloud Secret Manager API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-vision-3.7.2.tar.gz", hash = "sha256:044330ad618c810333ff2296cd27ffd145f249638d1b35b270de6b460b00e8d2"}, - {file = "google_cloud_vision-3.7.2-py2.py3-none-any.whl", hash = "sha256:a313088fcd9c016af0427c2447eea1ad01bc192ca4cc33babaace1be587bbf75"}, + {file = "google-cloud-secret-manager-2.20.0.tar.gz", hash = "sha256:a086a7413aaf4fffbd1c4fe9229ef0ce9bcf48f5a8df5b449c4a32deb5a2cfde"}, + {file = "google_cloud_secret_manager-2.20.0-py2.py3-none-any.whl", hash = "sha256:c20bf22e59d220c51aa84a1db3411b14b83aa71f788fae8d273c03a4bf3e77ed"}, ] [package.dependencies] google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" +grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" proto-plus = ">=1.22.3,<2.0.0dev" protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" [[package]] -name = "google-cloud-workflows" -version = "1.14.3" -description = "Google Cloud Workflows API client library" +name = "google-cloud-storage" +version = "2.17.0" +description = "Google Cloud Storage API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-workflows-1.14.3.tar.gz", hash = "sha256:2140b97f78037b967d4185576886772fe17be77d2ae9996d207a334ed5a77ffe"}, - {file = "google_cloud_workflows-1.14.3-py2.py3-none-any.whl", hash = "sha256:8e700e1e6e9e9ca6375d16cad6230c52cd123beca4597c86a956940bcce996d1"}, + {file = "google-cloud-storage-2.17.0.tar.gz", hash = "sha256:49378abff54ef656b52dca5ef0f2eba9aa83dc2b2c72c78714b03a1a95fe9388"}, + {file = "google_cloud_storage-2.17.0-py2.py3-none-any.whl", hash = "sha256:5b393bc766b7a3bc6f5407b9e665b2450d36282614b7945e570b3480a456d1e1"}, ] [package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} -google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" -proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" +google-api-core = ">=2.15.0,<3.0.0dev" +google-auth = ">=2.26.1,<3.0dev" +google-cloud-core = ">=2.3.0,<3.0dev" +google-crc32c = ">=1.0,<2.0dev" +google-resumable-media = ">=2.6.0" +requests = ">=2.18.0,<3.0.0dev" + +[package.extras] +protobuf = ["protobuf (<5.0.0dev)"] [[package]] name = "google-crc32c" @@ -3343,129 +1552,6 @@ files = [ [package.extras] testing = ["pytest"] -[[package]] -name = "google-re2" -version = "1.1.20240601" -description = "RE2 Python bindings" -optional = false -python-versions = "~=3.8" -files = [ - {file = "google_re2-1.1.20240601-1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:7f2034ac5fc866b507a9fd8d0419f190d967b5c19dac11292347ab25ca3a5d09"}, - {file = "google_re2-1.1.20240601-1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:1906dd4a00cb1130eab588a20fa00962e631043bb8ed74474525841730312024"}, - {file = "google_re2-1.1.20240601-1-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:88858d1e8855d845590b6199cad9035738de98942fcd9acc26be4de7f076e011"}, - {file = "google_re2-1.1.20240601-1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:f1b312054643fc705743ebd11b083b32575f4bdf11cbe8fb5d7898e0aabf4969"}, - {file = "google_re2-1.1.20240601-1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:44dc3d9f085d5c122d9bb2efb5d0a98b1c0b0e8073b57fb77738496f6bf398d7"}, - {file = "google_re2-1.1.20240601-1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:9f77b0a6ad9e47af3a85fc6f43a7c9316fcd53f04641a73e19d8a15691561773"}, - {file = "google_re2-1.1.20240601-1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e50cf20b0ec36210d7550fd28a3713cdac4ac8a8cc57d038194de2b9a8749ff"}, - {file = "google_re2-1.1.20240601-1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0775d9114980a0b78d60be778e3c57c2e2e666ac2ce6621b7cc1cf59f89cf9"}, - {file = "google_re2-1.1.20240601-1-cp310-cp310-win32.whl", hash = "sha256:37728529376aeb27b414eba497a70faeed5febf46f1ce06119c34ec6cf7998c8"}, - {file = "google_re2-1.1.20240601-1-cp310-cp310-win_amd64.whl", hash = "sha256:cbb834f68e3a0cbc4ad1a28f6b0b93668ceca9571d2b083e9438128452dd74ca"}, - {file = "google_re2-1.1.20240601-1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c6a6f8e769b76089d19a3f87be0f64355d85be6519e5a44f8600ab53dc1afa4b"}, - {file = "google_re2-1.1.20240601-1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:7eafa78ab0fbc915173043b898ed9c74d7d55768503d8b2f4e37a7267585b000"}, - {file = "google_re2-1.1.20240601-1-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:766a3d241b73f2e678e853212ab61c12d23fec54c4f61c636453b7dee97a3f4d"}, - {file = "google_re2-1.1.20240601-2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:b2633f8db3f6eeaccd52f38fe32690f434e4c41ab1c84735252df11883b1972b"}, - {file = "google_re2-1.1.20240601-2-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:389beabe9244f0008a79b5deedb38eaf3c2ae9259785746d8b8a5579ad46bb71"}, - {file = "google_re2-1.1.20240601-2-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:f9ed2a0e92ed79f944d836c533e69ca6e3e9dd85ac7af6f7adf715ee9793e28b"}, - {file = "google_re2-1.1.20240601-2-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:9ec8533a75d4bc8a6d1f6cce3a8489667d6c75e10b64c94f2091ac8eda0d9c26"}, - {file = "google_re2-1.1.20240601-2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:c73266bd88c8cde2af9b183f03f2b8d52587956138c918d9378e66fbe5315744"}, - {file = "google_re2-1.1.20240601-2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:68e71346c60486ce9075ea87983c8ab19ec1eae8202dae21e1ba3146ae6a4328"}, - {file = "google_re2-1.1.20240601-2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:376c62ea5c669bf1cf17e5005f73802265eb5b60ba8dfe239f6acd4f3043a1f8"}, - {file = "google_re2-1.1.20240601-2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8fe4d50da0db4e63388a9b8222d73f29d6f40cb6188e28d70c4126c96e194d5"}, - {file = "google_re2-1.1.20240601-2-cp310-cp310-win32.whl", hash = "sha256:362a42c407ceae7b76890e8cd40b5428be811282d66e3d962b4181c5c8101c20"}, - {file = "google_re2-1.1.20240601-2-cp310-cp310-win_amd64.whl", hash = "sha256:ae4cce9bd7ec7fc110b6db8dedc43905abf6d3cad357c93dec815e1e6310ef59"}, - {file = "google_re2-1.1.20240601-2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:67ff76bd1cae55720285a4b1889f42690008101e9b6628d8a84080474dceef23"}, - {file = "google_re2-1.1.20240601-2-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:f44179b03e5047f1ae1eca321d9fffa2841914cc1e8524cde2c25d3feb93eb99"}, - {file = "google_re2-1.1.20240601-2-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:7470e394925926a1b0233dc77971354667292c3c6a75b680952e8242b0ddc6dd"}, - {file = "google_re2-1.1.20240601-2-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:edc1c22fc52561615f9347763c19649568f0d984f6a000e3c954243c662c41a2"}, - {file = "google_re2-1.1.20240601-2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:625b0ebbcc64dfcd5b7cacf4c68ee5679361cb7b9d0915df262bceca98744760"}, - {file = "google_re2-1.1.20240601-2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:c40dd618164e285880ce2c56f150fb5fd54d3508ce2fd97b28e503b4e72e8655"}, - {file = "google_re2-1.1.20240601-2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:363632640c8b95d94c5d563fa14bcba793501dac3fe9f18f25ac13b75265cdfb"}, - {file = "google_re2-1.1.20240601-2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e0a5a5b0f153b348bf121ee4ea2ea51bc2bbdd4bec71ff9ec28b3725a28cfbd"}, - {file = "google_re2-1.1.20240601-2-cp311-cp311-win32.whl", hash = "sha256:82b980f6ccfcc23a7c7761be08b49ad6835d8f20f612befb739f28d91fe8290b"}, - {file = "google_re2-1.1.20240601-2-cp311-cp311-win_amd64.whl", hash = "sha256:15d7517727aa3e7d23d6692ca6e44545f09ca8ae7bb3608509ea5729f23179b3"}, - {file = "google_re2-1.1.20240601-2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:087fca3f81eb9dbd411a24ed4dbe4fdd5f64b10a1db3cfd183527e7044ff5835"}, - {file = "google_re2-1.1.20240601-2-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:c2a52a88819cdcb38fa9124e5e9ede6fd76945ee4bb408d59e78c6e25012d276"}, - {file = "google_re2-1.1.20240601-2-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:9c7114b4481deafaba66b236f54c24b35753228afd1b1ca93aa47e8601d037f7"}, - {file = "google_re2-1.1.20240601-2-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:a827b7c8008f5c3b2ae6750a93e5a289bfc52eb66d3db9ca40b6b6ea617ac603"}, - {file = "google_re2-1.1.20240601-2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:aa067e14de6fdd16a2e4832d07445a8d9ef9f888e616c0b246bd6df81b21b66f"}, - {file = "google_re2-1.1.20240601-2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:c51f3e66a982927810acb5ad7f2c255595ca67159f3777ab30becf6c39cea9c0"}, - {file = "google_re2-1.1.20240601-2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec277e51b45133d32bfdc6f3ef3a7dcbb835da2e33ce3e4622f9ba734e6e12fa"}, - {file = "google_re2-1.1.20240601-2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:78d9974fb0a7a942277385106dcf887710e3a3c65ce04f21d8e850d8950d5f0e"}, - {file = "google_re2-1.1.20240601-2-cp312-cp312-win32.whl", hash = "sha256:c8189aad330cf21ecf822c618ae4bd6156309bcc2d5bfa46471cf18e518e8a88"}, - {file = "google_re2-1.1.20240601-2-cp312-cp312-win_amd64.whl", hash = "sha256:de31b81ab64a91486240442fcbde367129925eb524c02aaa6afdf02856b300fe"}, - {file = "google_re2-1.1.20240601-2-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:9e2b73edf865a5eb4d7cb9833ae978c535434b0e27303742109f9d4c72006bd4"}, - {file = "google_re2-1.1.20240601-2-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:02b25f81f739d9370280c8e949844dd602a72d0ade914ca62c5a658d0c23a87d"}, - {file = "google_re2-1.1.20240601-2-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:c6d61c33ff7754dc6054e3dbae8f7fc85aa925f5a7ae4c07d0668627cbca4657"}, - {file = "google_re2-1.1.20240601-2-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:544cc5421124def304ab442b326e322454906b0348b3416c728c6f88266a4fbb"}, - {file = "google_re2-1.1.20240601-2-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:0013ddbbae8d42bd8ab42213c5da3741630eae98b6eca816d72bc77fdda3d356"}, - {file = "google_re2-1.1.20240601-2-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:41638ba091c79af68af29374dc250948ec3c0413012c5a1ba9a0807bb7819641"}, - {file = "google_re2-1.1.20240601-2-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:864d16144f2a044107ef0fda304678874483b504605a2f4b53f5c9b01d431cb5"}, - {file = "google_re2-1.1.20240601-2-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:48cac1cf44389155e21b601f3d388d9d7cca58d8f45e221a039f877783c48419"}, - {file = "google_re2-1.1.20240601-2-cp38-cp38-win32.whl", hash = "sha256:32757eb4da382a7173da3944d2c9473787b82d76a2a122dc30713f4030c5619e"}, - {file = "google_re2-1.1.20240601-2-cp38-cp38-win_amd64.whl", hash = "sha256:bf58d89749a0dcd689d87a56c8ae8604adbb28ad063ae652f03e074057b00012"}, - {file = "google_re2-1.1.20240601-2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:6abc7debc08a51810d1d3cd57031f543e50e402c05235eb7e7f1e464442ca637"}, - {file = "google_re2-1.1.20240601-2-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:2f19de42708b11aaa91e14dd05dacb9cdcbe6f9579c9a434757a4b6baea22fd0"}, - {file = "google_re2-1.1.20240601-2-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:4c1d4974da79dec3c0b064393d555fb5c185fe29bc30ec4521fa208f5357bcd0"}, - {file = "google_re2-1.1.20240601-2-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:515854058ac39685bd8b3dc7543a702ffe855f701fe6ea99e96ba3152a333420"}, - {file = "google_re2-1.1.20240601-2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:69edab9f90761ed5855e2d05cdfcb7d7dd6411f22d8ff4f6ff57c4836ec0aa49"}, - {file = "google_re2-1.1.20240601-2-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:e3aae4047f2891450f09073c16408a96b214ce63b6d98a58367685d7bdb7df79"}, - {file = "google_re2-1.1.20240601-2-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba3a21de393247ffff3b0b35a1c3916cc486a688626f1baaa8deac6eb4d1a6bd"}, - {file = "google_re2-1.1.20240601-2-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:df08589bdbab42e169adc0d860f0b4cc7a64f576587c2b33ae1f028689c18d71"}, - {file = "google_re2-1.1.20240601-2-cp39-cp39-win32.whl", hash = "sha256:3f639baa417b64afd9f1c9513975ef3b821678046695eed258a833247c1c6fef"}, - {file = "google_re2-1.1.20240601-2-cp39-cp39-win_amd64.whl", hash = "sha256:f6e0093c5832135cba465c3eb01de2472a933222359a30d2f2119123b563f2aa"}, - {file = "google_re2-1.1.20240601-3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:39867a553b4a521fcb2bccae81130b027b499d229c473e78dfa5ffd3a8b69367"}, - {file = "google_re2-1.1.20240601-3-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:b568f0817ad4ae701ed2ebadaaa306fe4ea848612ebc731f4cda65e22a64686f"}, - {file = "google_re2-1.1.20240601-3-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:84c95604371386081fe90cbb1b0e430cfc46a31d5e18fe4dcbb66af1eafae6f3"}, - {file = "google_re2-1.1.20240601-3-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:a44c93eaf651d1b20394d8987bb58b3febcd4be3726438d96146782b031ad9b5"}, - {file = "google_re2-1.1.20240601-3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:77e17daff9f8b1207264098d48b99448966d16fec2b5dd50689c9526de3419e9"}, - {file = "google_re2-1.1.20240601-3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:dd70c81ec855d84e0e7af875263d0136a8da8be2f33721f60f74a14db8662d99"}, - {file = "google_re2-1.1.20240601-3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f58bbde74f4329f9ef5d130a47f1786b9c39fcffba4408171c5f03e6fc80afd6"}, - {file = "google_re2-1.1.20240601-3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b4388b4d3161e8c7799f425cd894db440ec8ef2046b433b5fe4d97a0f1da708b"}, - {file = "google_re2-1.1.20240601-3-cp310-cp310-win32.whl", hash = "sha256:1325738300c4bc30b22e4db17d6248c2da186b52707f76bd3e2e9fc38718460a"}, - {file = "google_re2-1.1.20240601-3-cp310-cp310-win_amd64.whl", hash = "sha256:68862630d86f9c97268f65dd4b485cb18ab755920d0a878d7846d9f64d3a3a60"}, - {file = "google_re2-1.1.20240601-3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:a382f6562c0fb7140cc4a276f3ae1de93652a1a337710d7b643ed8ed11bf24d1"}, - {file = "google_re2-1.1.20240601-3-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:c421e929ceb86d954c584868049884cacb43b9388f3488e1029317f4ba9458f8"}, - {file = "google_re2-1.1.20240601-3-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:23cf5b38f57f41f76dec335f4847ab0acc2a58452c6cca67fc516db98e10c8ad"}, - {file = "google_re2-1.1.20240601-3-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:7e7545db6d053a69c5f82a605aabe9e60293ab59d744c206488f3b0da6ce01da"}, - {file = "google_re2-1.1.20240601-3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:e78ab5ee714c4d912e7c039ac4373d0a1a8dbc08149409db265c8243c01ce852"}, - {file = "google_re2-1.1.20240601-3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:c9b30c50f202ca1618e24ab70a7afcc5b8686e04f12a3c7f1107a06656b1d636"}, - {file = "google_re2-1.1.20240601-3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2854f9f8b475d321342dc91f4b6e30848e93bc0db21b93ac6e93769557022cfc"}, - {file = "google_re2-1.1.20240601-3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3ece158c1bdda0fb0d9a297102dc7420f7658090141a298a2d005357e0a767ce"}, - {file = "google_re2-1.1.20240601-3-cp311-cp311-win32.whl", hash = "sha256:ae34ffecb4e5d5e7a66187da8d8b07fb4f7d850cbfd327f12ed07ed369d9478e"}, - {file = "google_re2-1.1.20240601-3-cp311-cp311-win_amd64.whl", hash = "sha256:ed68a6548ccdf1af6db731bde1b03e453d0ce183d0ec6733bd4a5c153ab9fa03"}, - {file = "google_re2-1.1.20240601-3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:6680c9f2c8b98195ca346d0df03aa41a42c940b4694ca1b393c4fc387de2ebc2"}, - {file = "google_re2-1.1.20240601-3-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:933e6ada02c26ff14b47abe78ffc74859019b8723f610a1ddc6e33e2bc6d2d79"}, - {file = "google_re2-1.1.20240601-3-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:4fd74967dfed3dc3fbf90bb8398eb88f6381f39bf8acd5ea038814605e527c01"}, - {file = "google_re2-1.1.20240601-3-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:50272ba8d5fba4f576c171cab9cd306546121c6155cc525cbf601fb5954dda21"}, - {file = "google_re2-1.1.20240601-3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:83475203103521bd1647adaf7f7592dfc73fad9fbbcfa9756a2dc53ff183ca00"}, - {file = "google_re2-1.1.20240601-3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:3174d348683751bd13a12cb998fbb47b2302f2fe6c35d0dc9cdaf430442ea266"}, - {file = "google_re2-1.1.20240601-3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79f3cac915446aa44d3aec612befb09f8aed5cb834ffa3e21542fe174e8333a6"}, - {file = "google_re2-1.1.20240601-3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c960ddee4416d78d50490da660660499a6e5920a65862e3e9e08ffac7ce957fe"}, - {file = "google_re2-1.1.20240601-3-cp312-cp312-win32.whl", hash = "sha256:7293bd39e8e69310ae5517221313f94ba0cc06fe1248dbfdd03cf98c46746907"}, - {file = "google_re2-1.1.20240601-3-cp312-cp312-win_amd64.whl", hash = "sha256:fb495ac61628b0c8747cb3d4deb0d941f081bcf3d913083475fafd0a4d826582"}, - {file = "google_re2-1.1.20240601-3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:96f7be3e3a3349c99d51078e6e47e691a91d07b86f2c21e12ec6a069f1d2f58e"}, - {file = "google_re2-1.1.20240601-3-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:1b1789ca35c46318cbb09adb6030fb5e097a67cc2073f483bd67c7ef8de10357"}, - {file = "google_re2-1.1.20240601-3-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:9b818d9eae04c595e58e015255a68a1bc6fc19f47d687d24f76dda6dffc8ecc2"}, - {file = "google_re2-1.1.20240601-3-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:f7fa2d737c9968a6738b0b2d06981f02f95b9ca8b9d838c877cbb0f7b1cb4546"}, - {file = "google_re2-1.1.20240601-3-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:f03f71abeeb61600027d90c65f6512f0bdea04692e88dd16808014371da4a8ef"}, - {file = "google_re2-1.1.20240601-3-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:985a9b29513a3ed116acaa9abe51fcca28776d27dcd71aa62dbe7c025c6f1761"}, - {file = "google_re2-1.1.20240601-3-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93d6b28ab695d8ca6a8b2889622ad46ad577f9d304379c3aad42ce3dee93fb08"}, - {file = "google_re2-1.1.20240601-3-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d76ce0a1c0ad7a9ed9659df582fc60b7c2a9eb2b837e9db57dd44c77bd9a0dd1"}, - {file = "google_re2-1.1.20240601-3-cp38-cp38-win32.whl", hash = "sha256:378daba7d5918d08e2d91ce16f0ae935edbfca739cb31acb699bdb8b40704e8e"}, - {file = "google_re2-1.1.20240601-3-cp38-cp38-win_amd64.whl", hash = "sha256:5719e37dd11b3b5d432439d986a119ea96dc2988908ffe3a493da97f7db5964d"}, - {file = "google_re2-1.1.20240601-3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:5b56c1686271f7e8762a5e23b76faf2071daa7a74d900474790271be1730e858"}, - {file = "google_re2-1.1.20240601-3-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:f95caca5de4ac6ead5821b5a17e281721662b8eecc37036f42a5fe8741136e47"}, - {file = "google_re2-1.1.20240601-3-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:01297cbe44df033adec9d21a2241a7b50efb77bce625f196bf33bc0a87d6906d"}, - {file = "google_re2-1.1.20240601-3-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:c38e71f60ce1de30410790a3036dcb18f0fa3d8fb163218b2e8f4a55342101d1"}, - {file = "google_re2-1.1.20240601-3-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:15fa53562218ac897ad1df784cba3625cff2ccfa0f4e18b839ccf1e800e8d335"}, - {file = "google_re2-1.1.20240601-3-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:73ec934dcd803b32b3394b15cccab5017c3a403b137794b73cea2cf31d8b3f2e"}, - {file = "google_re2-1.1.20240601-3-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96e74a21304caffbfee0afe5291d3180ec65e9ba28f2ea8a32ad58369e3028dc"}, - {file = "google_re2-1.1.20240601-3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22e99029ecea3273fd2213b759aeb01f5b906de2e28d2f4e9316ed2391e9ad40"}, - {file = "google_re2-1.1.20240601-3-cp39-cp39-win32.whl", hash = "sha256:868de13b7cf57710bf3ee9f43dd63332a3ed7880f4e5202771056fc0fa3b8fea"}, - {file = "google_re2-1.1.20240601-3-cp39-cp39-win_amd64.whl", hash = "sha256:52beae3d11b15bd3a13d669c728fd423999a77bf7a8f132425322ba4dbe3c61c"}, - {file = "google_re2-1.1.20240601.tar.gz", hash = "sha256:3187f68855307754d49f398e583853d5b08d783d3fe662f6916b999075f7e095"}, -] - [[package]] name = "google-resumable-media" version = "2.7.1" @@ -3502,77 +1588,6 @@ protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4 [package.extras] grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] -[[package]] -name = "greenlet" -version = "3.0.3" -description = "Lightweight in-process concurrent programming" -optional = false -python-versions = ">=3.7" -files = [ - {file = "greenlet-3.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a"}, - {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881"}, - {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dca1e2f3ca00b84a396bc1bce13dd21f680f035314d2379c4160c98153b2059b"}, - {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ed7fb269f15dc662787f4119ec300ad0702fa1b19d2135a37c2c4de6fadfd4a"}, - {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd4f49ae60e10adbc94b45c0b5e6a179acc1736cf7a90160b404076ee283cf83"}, - {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:73a411ef564e0e097dbe7e866bb2dda0f027e072b04da387282b02c308807405"}, - {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7f362975f2d179f9e26928c5b517524e89dd48530a0202570d55ad6ca5d8a56f"}, - {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:649dde7de1a5eceb258f9cb00bdf50e978c9db1b996964cd80703614c86495eb"}, - {file = "greenlet-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:68834da854554926fbedd38c76e60c4a2e3198c6fbed520b106a8986445caaf9"}, - {file = "greenlet-3.0.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:b1b5667cced97081bf57b8fa1d6bfca67814b0afd38208d52538316e9422fc61"}, - {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52f59dd9c96ad2fc0d5724107444f76eb20aaccb675bf825df6435acb7703559"}, - {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:afaff6cf5200befd5cec055b07d1c0a5a06c040fe5ad148abcd11ba6ab9b114e"}, - {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe754d231288e1e64323cfad462fcee8f0288654c10bdf4f603a39ed923bef33"}, - {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2797aa5aedac23af156bbb5a6aa2cd3427ada2972c828244eb7d1b9255846379"}, - {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7f009caad047246ed379e1c4dbcb8b020f0a390667ea74d2387be2998f58a22"}, - {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c5e1536de2aad7bf62e27baf79225d0d64360d4168cf2e6becb91baf1ed074f3"}, - {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:894393ce10ceac937e56ec00bb71c4c2f8209ad516e96033e4b3b1de270e200d"}, - {file = "greenlet-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:1ea188d4f49089fc6fb283845ab18a2518d279c7cd9da1065d7a84e991748728"}, - {file = "greenlet-3.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be"}, - {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e"}, - {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676"}, - {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc"}, - {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230"}, - {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf"}, - {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305"}, - {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6"}, - {file = "greenlet-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2"}, - {file = "greenlet-3.0.3-cp37-cp37m-macosx_11_0_universal2.whl", hash = "sha256:5b51e85cb5ceda94e79d019ed36b35386e8c37d22f07d6a751cb659b180d5274"}, - {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:daf3cb43b7cf2ba96d614252ce1684c1bccee6b2183a01328c98d36fcd7d5cb0"}, - {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99bf650dc5d69546e076f413a87481ee1d2d09aaaaaca058c9251b6d8c14783f"}, - {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dd6e660effd852586b6a8478a1d244b8dc90ab5b1321751d2ea15deb49ed414"}, - {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3391d1e16e2a5a1507d83e4a8b100f4ee626e8eca43cf2cadb543de69827c4c"}, - {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1f145462f1fa6e4a4ae3c0f782e580ce44d57c8f2c7aae1b6fa88c0b2efdb41"}, - {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1a7191e42732df52cb5f39d3527217e7ab73cae2cb3694d241e18f53d84ea9a7"}, - {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0448abc479fab28b00cb472d278828b3ccca164531daab4e970a0458786055d6"}, - {file = "greenlet-3.0.3-cp37-cp37m-win32.whl", hash = "sha256:b542be2440edc2d48547b5923c408cbe0fc94afb9f18741faa6ae970dbcb9b6d"}, - {file = "greenlet-3.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:01bc7ea167cf943b4c802068e178bbf70ae2e8c080467070d01bfa02f337ee67"}, - {file = "greenlet-3.0.3-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:1996cb9306c8595335bb157d133daf5cf9f693ef413e7673cb07e3e5871379ca"}, - {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ddc0f794e6ad661e321caa8d2f0a55ce01213c74722587256fb6566049a8b04"}, - {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9db1c18f0eaad2f804728c67d6c610778456e3e1cc4ab4bbd5eeb8e6053c6fc"}, - {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7170375bcc99f1a2fbd9c306f5be8764eaf3ac6b5cb968862cad4c7057756506"}, - {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b66c9c1e7ccabad3a7d037b2bcb740122a7b17a53734b7d72a344ce39882a1b"}, - {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:098d86f528c855ead3479afe84b49242e174ed262456c342d70fc7f972bc13c4"}, - {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:81bb9c6d52e8321f09c3d165b2a78c680506d9af285bfccbad9fb7ad5a5da3e5"}, - {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fd096eb7ffef17c456cfa587523c5f92321ae02427ff955bebe9e3c63bc9f0da"}, - {file = "greenlet-3.0.3-cp38-cp38-win32.whl", hash = "sha256:d46677c85c5ba00a9cb6f7a00b2bfa6f812192d2c9f7d9c4f6a55b60216712f3"}, - {file = "greenlet-3.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:419b386f84949bf0e7c73e6032e3457b82a787c1ab4a0e43732898a761cc9dbf"}, - {file = "greenlet-3.0.3-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:da70d4d51c8b306bb7a031d5cff6cc25ad253affe89b70352af5f1cb68e74b53"}, - {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086152f8fbc5955df88382e8a75984e2bb1c892ad2e3c80a2508954e52295257"}, - {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d73a9fe764d77f87f8ec26a0c85144d6a951a6c438dfe50487df5595c6373eac"}, - {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7dcbe92cc99f08c8dd11f930de4d99ef756c3591a5377d1d9cd7dd5e896da71"}, - {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1551a8195c0d4a68fac7a4325efac0d541b48def35feb49d803674ac32582f61"}, - {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:64d7675ad83578e3fc149b617a444fab8efdafc9385471f868eb5ff83e446b8b"}, - {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b37eef18ea55f2ffd8f00ff8fe7c8d3818abd3e25fb73fae2ca3b672e333a7a6"}, - {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:77457465d89b8263bca14759d7c1684df840b6811b2499838cc5b040a8b5b113"}, - {file = "greenlet-3.0.3-cp39-cp39-win32.whl", hash = "sha256:57e8974f23e47dac22b83436bdcf23080ade568ce77df33159e019d161ce1d1e"}, - {file = "greenlet-3.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:c5ee858cfe08f34712f548c3c363e807e7186f03ad7a5039ebadb29e8c6be067"}, - {file = "greenlet-3.0.3.tar.gz", hash = "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491"}, -] - -[package.extras] -docs = ["Sphinx", "furo"] -test = ["objgraph", "psutil"] - [[package]] name = "griffe" version = "0.47.0" @@ -3603,23 +1618,6 @@ googleapis-common-protos = {version = ">=1.56.0,<2.0.0dev", extras = ["grpc"]} grpcio = ">=1.44.0,<2.0.0dev" protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" -[[package]] -name = "grpc-interceptor" -version = "0.15.4" -description = "Simplifies gRPC interceptors" -optional = false -python-versions = ">=3.7,<4.0" -files = [ - {file = "grpc-interceptor-0.15.4.tar.gz", hash = "sha256:1f45c0bcb58b6f332f37c637632247c9b02bc6af0fdceb7ba7ce8d2ebbfb0926"}, - {file = "grpc_interceptor-0.15.4-py3-none-any.whl", hash = "sha256:0035f33228693ed3767ee49d937bac424318db173fef4d2d0170b3215f254d9d"}, -] - -[package.dependencies] -grpcio = ">=1.49.1,<2.0.0" - -[package.extras] -testing = ["protobuf (>=4.21.9)"] - [[package]] name = "grpcio" version = "1.64.1" @@ -3678,20 +1676,6 @@ files = [ [package.extras] protobuf = ["grpcio-tools (>=1.64.1)"] -[[package]] -name = "grpcio-gcp" -version = "0.2.2" -description = "gRPC extensions for Google Cloud Platform" -optional = false -python-versions = "*" -files = [ - {file = "grpcio-gcp-0.2.2.tar.gz", hash = "sha256:e292605effc7da39b7a8734c719afb12ec4b5362add3528d8afad3aa3aa9057c"}, - {file = "grpcio_gcp-0.2.2-py2.py3-none-any.whl", hash = "sha256:1ef8e8531eab11356a3eb4c5b84e79e0d923d6782d19e1b1a45e1cabe4e783d7"}, -] - -[package.dependencies] -grpcio = ">=1.12.0" - [[package]] name = "grpcio-status" version = "1.48.2" @@ -3708,38 +1692,6 @@ googleapis-common-protos = ">=1.5.5" grpcio = ">=1.48.2" protobuf = ">=3.12.0" -[[package]] -name = "gunicorn" -version = "22.0.0" -description = "WSGI HTTP Server for UNIX" -optional = false -python-versions = ">=3.7" -files = [ - {file = "gunicorn-22.0.0-py3-none-any.whl", hash = "sha256:350679f91b24062c86e386e198a15438d53a7a8207235a78ba1b53df4c4378d9"}, - {file = "gunicorn-22.0.0.tar.gz", hash = "sha256:4a0b436239ff76fb33f11c07a16482c521a7e09c1ce3cc293c2330afe01bec63"}, -] - -[package.dependencies] -packaging = "*" - -[package.extras] -eventlet = ["eventlet (>=0.24.1,!=0.36.0)"] -gevent = ["gevent (>=1.4.0)"] -setproctitle = ["setproctitle"] -testing = ["coverage", "eventlet", "gevent", "pytest", "pytest-cov"] -tornado = ["tornado (>=0.2)"] - -[[package]] -name = "h11" -version = "0.14.0" -description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -optional = false -python-versions = ">=3.7" -files = [ - {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, - {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, -] - [[package]] name = "hail" version = "0.2.127" @@ -3787,65 +1739,6 @@ tabulate = ">=0.8.9,<1" typer = ">=0.9.0,<1" uvloop = {version = ">=0.19.0,<1", markers = "sys_platform != \"win32\""} -[[package]] -name = "httpcore" -version = "1.0.5" -description = "A minimal low-level HTTP client." -optional = false -python-versions = ">=3.8" -files = [ - {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"}, - {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"}, -] - -[package.dependencies] -certifi = "*" -h11 = ">=0.13,<0.15" - -[package.extras] -asyncio = ["anyio (>=4.0,<5.0)"] -http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] -trio = ["trio (>=0.22.0,<0.26.0)"] - -[[package]] -name = "httplib2" -version = "0.22.0" -description = "A comprehensive HTTP client library." -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc"}, - {file = "httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"}, -] - -[package.dependencies] -pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0.2,<3.0.3 || >3.0.3,<4", markers = "python_version > \"3.0\""} - -[[package]] -name = "httpx" -version = "0.27.0" -description = "The next generation HTTP client." -optional = false -python-versions = ">=3.8" -files = [ - {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"}, - {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"}, -] - -[package.dependencies] -anyio = "*" -certifi = "*" -httpcore = "==1.*" -idna = "*" -sniffio = "*" - -[package.extras] -brotli = ["brotli", "brotlicffi"] -cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] -http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] - [[package]] name = "huggingface-hub" version = "0.23.4" @@ -3918,66 +1811,21 @@ optional = false python-versions = ">=3.8" files = [ {file = "identify-2.5.36-py2.py3-none-any.whl", hash = "sha256:37d93f380f4de590500d9dba7db359d0d3da95ffe7f9de1753faa159e71e7dfa"}, - {file = "identify-2.5.36.tar.gz", hash = "sha256:e5e00f54165f9047fbebeb4a560f9acfb8af4c88232be60a488e9b68d122745d"}, -] - -[package.extras] -license = ["ukkonen"] - -[[package]] -name = "idna" -version = "3.7" -description = "Internationalized Domain Names in Applications (IDNA)" -optional = false -python-versions = ">=3.5" -files = [ - {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, - {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, -] - -[[package]] -name = "importlib-metadata" -version = "7.1.0" -description = "Read metadata from Python packages" -optional = false -python-versions = ">=3.8" -files = [ - {file = "importlib_metadata-7.1.0-py3-none-any.whl", hash = "sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570"}, - {file = "importlib_metadata-7.1.0.tar.gz", hash = "sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"}, -] - -[package.dependencies] -zipp = ">=0.5" - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -perf = ["ipython"] -testing = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] - -[[package]] -name = "importlib-resources" -version = "6.4.0" -description = "Read resources from Python packages" -optional = false -python-versions = ">=3.8" -files = [ - {file = "importlib_resources-6.4.0-py3-none-any.whl", hash = "sha256:50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c"}, - {file = "importlib_resources-6.4.0.tar.gz", hash = "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"}, + {file = "identify-2.5.36.tar.gz", hash = "sha256:e5e00f54165f9047fbebeb4a560f9acfb8af4c88232be60a488e9b68d122745d"}, ] [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["jaraco.test (>=5.4)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"] +license = ["ukkonen"] [[package]] -name = "inflection" -version = "0.5.1" -description = "A port of Ruby on Rails inflector to Python" +name = "idna" +version = "3.7" +description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.5" files = [ - {file = "inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2"}, - {file = "inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417"}, + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, ] [[package]] @@ -4115,17 +1963,6 @@ files = [ [package.extras] colors = ["colorama (>=0.4.6)"] -[[package]] -name = "itsdangerous" -version = "2.2.0" -description = "Safely pass data to untrusted environments and back." -optional = false -python-versions = ">=3.8" -files = [ - {file = "itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef"}, - {file = "itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173"}, -] - [[package]] name = "janus" version = "1.0.0" @@ -4212,51 +2049,6 @@ files = [ [package.dependencies] six = ">=1.13,<2.0" -[[package]] -name = "json-merge-patch" -version = "0.2" -description = "JSON Merge Patch library (https://tools.ietf.org/html/rfc7386)" -optional = false -python-versions = "*" -files = [ - {file = "json-merge-patch-0.2.tar.gz", hash = "sha256:09898b6d427c08754e2a97c709cf2dfd7e28bd10c5683a538914975eab778d39"}, -] - -[[package]] -name = "jsonschema" -version = "4.22.0" -description = "An implementation of JSON Schema validation for Python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "jsonschema-4.22.0-py3-none-any.whl", hash = "sha256:ff4cfd6b1367a40e7bc6411caec72effadd3db0bbe5017de188f2d6108335802"}, - {file = "jsonschema-4.22.0.tar.gz", hash = "sha256:5b22d434a45935119af990552c862e5d6d564e8f6601206b305a61fdf661a2b7"}, -] - -[package.dependencies] -attrs = ">=22.2.0" -jsonschema-specifications = ">=2023.03.6" -referencing = ">=0.28.4" -rpds-py = ">=0.7.1" - -[package.extras] -format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] -format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"] - -[[package]] -name = "jsonschema-specifications" -version = "2023.12.1" -description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" -optional = false -python-versions = ">=3.8" -files = [ - {file = "jsonschema_specifications-2023.12.1-py3-none-any.whl", hash = "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c"}, - {file = "jsonschema_specifications-2023.12.1.tar.gz", hash = "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc"}, -] - -[package.dependencies] -referencing = ">=0.31.0" - [[package]] name = "jupyter-client" version = "8.6.2" @@ -4299,129 +2091,6 @@ traitlets = ">=5.3" docs = ["myst-parser", "pydata-sphinx-theme", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "traitlets"] test = ["ipykernel", "pre-commit", "pytest (<8)", "pytest-cov", "pytest-timeout"] -[[package]] -name = "lazy-object-proxy" -version = "1.10.0" -description = "A fast and thorough lazy object proxy." -optional = false -python-versions = ">=3.8" -files = [ - {file = "lazy-object-proxy-1.10.0.tar.gz", hash = "sha256:78247b6d45f43a52ef35c25b5581459e85117225408a4128a3daf8bf9648ac69"}, - {file = "lazy_object_proxy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:855e068b0358ab916454464a884779c7ffa312b8925c6f7401e952dcf3b89977"}, - {file = "lazy_object_proxy-1.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab7004cf2e59f7c2e4345604a3e6ea0d92ac44e1c2375527d56492014e690c3"}, - {file = "lazy_object_proxy-1.10.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc0d2fc424e54c70c4bc06787e4072c4f3b1aa2f897dfdc34ce1013cf3ceef05"}, - {file = "lazy_object_proxy-1.10.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e2adb09778797da09d2b5ebdbceebf7dd32e2c96f79da9052b2e87b6ea495895"}, - {file = "lazy_object_proxy-1.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1f711e2c6dcd4edd372cf5dec5c5a30d23bba06ee012093267b3376c079ec83"}, - {file = "lazy_object_proxy-1.10.0-cp310-cp310-win32.whl", hash = "sha256:76a095cfe6045c7d0ca77db9934e8f7b71b14645f0094ffcd842349ada5c5fb9"}, - {file = "lazy_object_proxy-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:b4f87d4ed9064b2628da63830986c3d2dca7501e6018347798313fcf028e2fd4"}, - {file = "lazy_object_proxy-1.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fec03caabbc6b59ea4a638bee5fce7117be8e99a4103d9d5ad77f15d6f81020c"}, - {file = "lazy_object_proxy-1.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02c83f957782cbbe8136bee26416686a6ae998c7b6191711a04da776dc9e47d4"}, - {file = "lazy_object_proxy-1.10.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:009e6bb1f1935a62889ddc8541514b6a9e1fcf302667dcb049a0be5c8f613e56"}, - {file = "lazy_object_proxy-1.10.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75fc59fc450050b1b3c203c35020bc41bd2695ed692a392924c6ce180c6f1dc9"}, - {file = "lazy_object_proxy-1.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:782e2c9b2aab1708ffb07d4bf377d12901d7a1d99e5e410d648d892f8967ab1f"}, - {file = "lazy_object_proxy-1.10.0-cp311-cp311-win32.whl", hash = "sha256:edb45bb8278574710e68a6b021599a10ce730d156e5b254941754a9cc0b17d03"}, - {file = "lazy_object_proxy-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:e271058822765ad5e3bca7f05f2ace0de58a3f4e62045a8c90a0dfd2f8ad8cc6"}, - {file = "lazy_object_proxy-1.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e98c8af98d5707dcdecc9ab0863c0ea6e88545d42ca7c3feffb6b4d1e370c7ba"}, - {file = "lazy_object_proxy-1.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:952c81d415b9b80ea261d2372d2a4a2332a3890c2b83e0535f263ddfe43f0d43"}, - {file = "lazy_object_proxy-1.10.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80b39d3a151309efc8cc48675918891b865bdf742a8616a337cb0090791a0de9"}, - {file = "lazy_object_proxy-1.10.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e221060b701e2aa2ea991542900dd13907a5c90fa80e199dbf5a03359019e7a3"}, - {file = "lazy_object_proxy-1.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:92f09ff65ecff3108e56526f9e2481b8116c0b9e1425325e13245abfd79bdb1b"}, - {file = "lazy_object_proxy-1.10.0-cp312-cp312-win32.whl", hash = "sha256:3ad54b9ddbe20ae9f7c1b29e52f123120772b06dbb18ec6be9101369d63a4074"}, - {file = "lazy_object_proxy-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:127a789c75151db6af398b8972178afe6bda7d6f68730c057fbbc2e96b08d282"}, - {file = "lazy_object_proxy-1.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4ed0518a14dd26092614412936920ad081a424bdcb54cc13349a8e2c6d106a"}, - {file = "lazy_object_proxy-1.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ad9e6ed739285919aa9661a5bbed0aaf410aa60231373c5579c6b4801bd883c"}, - {file = "lazy_object_proxy-1.10.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fc0a92c02fa1ca1e84fc60fa258458e5bf89d90a1ddaeb8ed9cc3147f417255"}, - {file = "lazy_object_proxy-1.10.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0aefc7591920bbd360d57ea03c995cebc204b424524a5bd78406f6e1b8b2a5d8"}, - {file = "lazy_object_proxy-1.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5faf03a7d8942bb4476e3b62fd0f4cf94eaf4618e304a19865abf89a35c0bbee"}, - {file = "lazy_object_proxy-1.10.0-cp38-cp38-win32.whl", hash = "sha256:e333e2324307a7b5d86adfa835bb500ee70bfcd1447384a822e96495796b0ca4"}, - {file = "lazy_object_proxy-1.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:cb73507defd385b7705c599a94474b1d5222a508e502553ef94114a143ec6696"}, - {file = "lazy_object_proxy-1.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:366c32fe5355ef5fc8a232c5436f4cc66e9d3e8967c01fb2e6302fd6627e3d94"}, - {file = "lazy_object_proxy-1.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2297f08f08a2bb0d32a4265e98a006643cd7233fb7983032bd61ac7a02956b3b"}, - {file = "lazy_object_proxy-1.10.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18dd842b49456aaa9a7cf535b04ca4571a302ff72ed8740d06b5adcd41fe0757"}, - {file = "lazy_object_proxy-1.10.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:217138197c170a2a74ca0e05bddcd5f1796c735c37d0eee33e43259b192aa424"}, - {file = "lazy_object_proxy-1.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9a3a87cf1e133e5b1994144c12ca4aa3d9698517fe1e2ca82977781b16955658"}, - {file = "lazy_object_proxy-1.10.0-cp39-cp39-win32.whl", hash = "sha256:30b339b2a743c5288405aa79a69e706a06e02958eab31859f7f3c04980853b70"}, - {file = "lazy_object_proxy-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:a899b10e17743683b293a729d3a11f2f399e8a90c73b089e29f5d0fe3509f0dd"}, - {file = "lazy_object_proxy-1.10.0-pp310.pp311.pp312.pp38.pp39-none-any.whl", hash = "sha256:80fa48bd89c8f2f456fc0765c11c23bf5af827febacd2f523ca5bc1893fcc09d"}, -] - -[[package]] -name = "limits" -version = "3.13.0" -description = "Rate limiting utilities" -optional = false -python-versions = ">=3.8" -files = [ - {file = "limits-3.13.0-py3-none-any.whl", hash = "sha256:9767f7233da4255e9904b79908a728e8ec0984c0b086058b4cbbd309aea553f6"}, - {file = "limits-3.13.0.tar.gz", hash = "sha256:6571b0c567bfa175a35fed9f8a954c0c92f1c3200804282f1b8f1de4ad98a953"}, -] - -[package.dependencies] -deprecated = ">=1.2" -importlib-resources = ">=1.3" -packaging = ">=21,<25" -typing-extensions = "*" - -[package.extras] -all = ["aetcd", "coredis (>=3.4.0,<5)", "emcache (>=0.6.1)", "emcache (>=1)", "etcd3", "motor (>=3,<4)", "pymemcache (>3,<5.0.0)", "pymongo (>4.1,<5)", "redis (>3,!=4.5.2,!=4.5.3,<6.0.0)", "redis (>=4.2.0,!=4.5.2,!=4.5.3)"] -async-etcd = ["aetcd"] -async-memcached = ["emcache (>=0.6.1)", "emcache (>=1)"] -async-mongodb = ["motor (>=3,<4)"] -async-redis = ["coredis (>=3.4.0,<5)"] -etcd = ["etcd3"] -memcached = ["pymemcache (>3,<5.0.0)"] -mongodb = ["pymongo (>4.1,<5)"] -redis = ["redis (>3,!=4.5.2,!=4.5.3,<6.0.0)"] -rediscluster = ["redis (>=4.2.0,!=4.5.2,!=4.5.3)"] - -[[package]] -name = "linkify-it-py" -version = "2.0.3" -description = "Links recognition library with FULL unicode support." -optional = false -python-versions = ">=3.7" -files = [ - {file = "linkify-it-py-2.0.3.tar.gz", hash = "sha256:68cda27e162e9215c17d786649d1da0021a451bdc436ef9e0fa0ba5234b9b048"}, - {file = "linkify_it_py-2.0.3-py3-none-any.whl", hash = "sha256:6bcbc417b0ac14323382aef5c5192c0075bf8a9d6b41820a2b66371eac6b6d79"}, -] - -[package.dependencies] -uc-micro-py = "*" - -[package.extras] -benchmark = ["pytest", "pytest-benchmark"] -dev = ["black", "flake8", "isort", "pre-commit", "pyproject-flake8"] -doc = ["myst-parser", "sphinx", "sphinx-book-theme"] -test = ["coverage", "pytest", "pytest-cov"] - -[[package]] -name = "lockfile" -version = "0.12.2" -description = "Platform-independent file locking module" -optional = false -python-versions = "*" -files = [ - {file = "lockfile-0.12.2-py2.py3-none-any.whl", hash = "sha256:6c3cb24f344923d30b2785d5ad75182c8ea7ac1b6171b08657258ec7429d50fa"}, - {file = "lockfile-0.12.2.tar.gz", hash = "sha256:6aed02de03cba24efabcd600b30540140634fc06cfa603822d508d5361e9f799"}, -] - -[[package]] -name = "looker-sdk" -version = "24.10.0" -description = "Looker REST API" -optional = false -python-versions = ">=3.6" -files = [ - {file = "looker_sdk-24.10.0-py3-none-any.whl", hash = "sha256:26b4c919a0ec81bfa4d9c5eadddfad4c64ae46a20b0f299e7eabe300feaa1842"}, - {file = "looker_sdk-24.10.0.tar.gz", hash = "sha256:a6fe1e47912d10c90dadba0da33150dd694e24d65b84cb149a025ca26e3046cb"}, -] - -[package.dependencies] -attrs = {version = ">=20.1.0", markers = "python_version >= \"3.7\""} -cattrs = {version = ">=1.3", markers = "python_version >= \"3.7\""} -requests = ">=2.22" -typing-extensions = ">=4.1.1" - [[package]] name = "lxml" version = "5.3.0" @@ -4576,25 +2245,6 @@ html5 = ["html5lib"] htmlsoup = ["BeautifulSoup4"] source = ["Cython (>=3.0.11)"] -[[package]] -name = "mako" -version = "1.3.5" -description = "A super-fast templating language that borrows the best ideas from the existing templating languages." -optional = false -python-versions = ">=3.8" -files = [ - {file = "Mako-1.3.5-py3-none-any.whl", hash = "sha256:260f1dbc3a519453a9c856dedfe4beb4e50bd5a26d96386cb6c80856556bb91a"}, - {file = "Mako-1.3.5.tar.gz", hash = "sha256:48dbc20568c1d276a2698b36d968fa76161bf127194907ea6fc594fa81f943bc"}, -] - -[package.dependencies] -MarkupSafe = ">=0.9.2" - -[package.extras] -babel = ["Babel"] -lingua = ["lingua"] -testing = ["pytest"] - [[package]] name = "markdown" version = "3.6" @@ -4610,30 +2260,6 @@ files = [ docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"] testing = ["coverage", "pyyaml"] -[[package]] -name = "markdown-it-py" -version = "3.0.0" -description = "Python port of markdown-it. Markdown parsing, done right!" -optional = false -python-versions = ">=3.8" -files = [ - {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, - {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, -] - -[package.dependencies] -mdurl = ">=0.1,<1.0" - -[package.extras] -benchmarking = ["psutil", "pytest", "pytest-benchmark"] -code-style = ["pre-commit (>=3.0,<4.0)"] -compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] -linkify = ["linkify-it-py (>=1,<3)"] -plugins = ["mdit-py-plugins"] -profiling = ["gprof2dot"] -rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] -testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] - [[package]] name = "markupsafe" version = "2.1.5" @@ -4703,65 +2329,6 @@ files = [ {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, ] -[[package]] -name = "marshmallow" -version = "3.21.3" -description = "A lightweight library for converting complex datatypes to and from native Python datatypes." -optional = false -python-versions = ">=3.8" -files = [ - {file = "marshmallow-3.21.3-py3-none-any.whl", hash = "sha256:86ce7fb914aa865001a4b2092c4c2872d13bc347f3d42673272cabfdbad386f1"}, - {file = "marshmallow-3.21.3.tar.gz", hash = "sha256:4f57c5e050a54d66361e826f94fba213eb10b67b2fdb02c3e0343ce207ba1662"}, -] - -[package.dependencies] -packaging = ">=17.0" - -[package.extras] -dev = ["marshmallow[tests]", "pre-commit (>=3.5,<4.0)", "tox"] -docs = ["alabaster (==0.7.16)", "autodocsumm (==0.2.12)", "sphinx (==7.3.7)", "sphinx-issues (==4.1.0)", "sphinx-version-warning (==1.1.2)"] -tests = ["pytest", "pytz", "simplejson"] - -[[package]] -name = "marshmallow-oneofschema" -version = "3.1.1" -description = "marshmallow multiplexing schema" -optional = false -python-versions = ">=3.8" -files = [ - {file = "marshmallow_oneofschema-3.1.1-py3-none-any.whl", hash = "sha256:ff4cb2a488785ee8edd521a765682c2c80c78b9dc48894124531bdfa1ec9303b"}, - {file = "marshmallow_oneofschema-3.1.1.tar.gz", hash = "sha256:68b4a57d0281a04ac25d4eb7a4c5865a57090a0a8fd30fd6362c8e833ac6a6d9"}, -] - -[package.dependencies] -marshmallow = ">=3.0.0,<4.0.0" - -[package.extras] -dev = ["marshmallow-oneofschema[tests]", "pre-commit (>=3.5,<4.0)", "tox"] -tests = ["pytest"] - -[[package]] -name = "marshmallow-sqlalchemy" -version = "0.28.2" -description = "SQLAlchemy integration with the marshmallow (de)serialization library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "marshmallow-sqlalchemy-0.28.2.tar.gz", hash = "sha256:2ab0f1280c793e5aec81deab3e63ec23688ddfe05e5f38ac960368a1079520a1"}, - {file = "marshmallow_sqlalchemy-0.28.2-py2.py3-none-any.whl", hash = "sha256:c31b3bdf794de1d78c53e1c495502cbb3eeb06ed216869980c71d6159e7e9e66"}, -] - -[package.dependencies] -marshmallow = ">=3.0.0" -packaging = ">=21.3" -SQLAlchemy = ">=1.3.0,<2.0" - -[package.extras] -dev = ["flake8 (==6.0.0)", "flake8-bugbear (==23.2.13)", "pre-commit (==3.1.0)", "pytest", "pytest-lazy-fixture (>=0.6.2)", "tox"] -docs = ["alabaster (==0.7.13)", "sphinx (==6.1.3)", "sphinx-issues (==3.0.1)"] -lint = ["flake8 (==6.0.0)", "flake8-bugbear (==23.2.13)", "pre-commit (==3.1.0)"] -tests = ["pytest", "pytest-lazy-fixture (>=0.6.2)"] - [[package]] name = "matplotlib-inline" version = "0.1.7" @@ -4787,36 +2354,6 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] -[[package]] -name = "mdit-py-plugins" -version = "0.4.1" -description = "Collection of plugins for markdown-it-py" -optional = false -python-versions = ">=3.8" -files = [ - {file = "mdit_py_plugins-0.4.1-py3-none-any.whl", hash = "sha256:1020dfe4e6bfc2c79fb49ae4e3f5b297f5ccd20f010187acc52af2921e27dc6a"}, - {file = "mdit_py_plugins-0.4.1.tar.gz", hash = "sha256:834b8ac23d1cd60cec703646ffd22ae97b7955a6d596eb1d304be1e251ae499c"}, -] - -[package.dependencies] -markdown-it-py = ">=1.0.0,<4.0.0" - -[package.extras] -code-style = ["pre-commit"] -rtd = ["myst-parser", "sphinx-book-theme"] -testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] - -[[package]] -name = "mdurl" -version = "0.1.2" -description = "Markdown URL utilities" -optional = false -python-versions = ">=3.7" -files = [ - {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, - {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, -] - [[package]] name = "mergedeep" version = "1.3.4" @@ -4828,24 +2365,6 @@ files = [ {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"}, ] -[[package]] -name = "methodtools" -version = "0.4.7" -description = "Expand standard functools to methods" -optional = false -python-versions = "*" -files = [ - {file = "methodtools-0.4.7-py2.py3-none-any.whl", hash = "sha256:5e188c780b236adc12e75b5f078c5afb419ef99eb648569fc6d7071f053a1f11"}, - {file = "methodtools-0.4.7.tar.gz", hash = "sha256:e213439dd64cfe60213f7015da6efe5dd4003fd89376db3baa09fe13ec2bb0ba"}, -] - -[package.dependencies] -wirerope = ">=0.4.7" - -[package.extras] -doc = ["sphinx"] -test = ["functools32 (>=3.2.3-2)", "pytest (>=4.6.7)", "pytest-cov (>=2.6.1)"] - [[package]] name = "mkdocs" version = "1.6.0" @@ -5079,17 +2598,6 @@ files = [ griffe = ">=0.47" mkdocstrings = ">=0.25" -[[package]] -name = "more-itertools" -version = "10.3.0" -description = "More routines for operating on iterables, beyond itertools" -optional = false -python-versions = ">=3.8" -files = [ - {file = "more-itertools-10.3.0.tar.gz", hash = "sha256:e5d93ef411224fbcef366a6e8ddc4c5781bc6359d43412a65dd5964e46111463"}, - {file = "more_itertools-10.3.0-py3-none-any.whl", hash = "sha256:ea6a02e24a9161e51faad17a8782b92a0df82c12c1c8886fec7f0c3fa1a1b320"}, -] - [[package]] name = "msal" version = "1.29.0" @@ -5415,148 +2923,6 @@ files = [ antlr4-python3-runtime = "==4.9.*" PyYAML = ">=5.1.0" -[[package]] -name = "opentelemetry-api" -version = "1.25.0" -description = "OpenTelemetry Python API" -optional = false -python-versions = ">=3.8" -files = [ - {file = "opentelemetry_api-1.25.0-py3-none-any.whl", hash = "sha256:757fa1aa020a0f8fa139f8959e53dec2051cc26b832e76fa839a6d76ecefd737"}, - {file = "opentelemetry_api-1.25.0.tar.gz", hash = "sha256:77c4985f62f2614e42ce77ee4c9da5fa5f0bc1e1821085e9a47533a9323ae869"}, -] - -[package.dependencies] -deprecated = ">=1.2.6" -importlib-metadata = ">=6.0,<=7.1" - -[[package]] -name = "opentelemetry-exporter-otlp" -version = "1.25.0" -description = "OpenTelemetry Collector Exporters" -optional = false -python-versions = ">=3.8" -files = [ - {file = "opentelemetry_exporter_otlp-1.25.0-py3-none-any.whl", hash = "sha256:d67a831757014a3bc3174e4cd629ae1493b7ba8d189e8a007003cacb9f1a6b60"}, - {file = "opentelemetry_exporter_otlp-1.25.0.tar.gz", hash = "sha256:ce03199c1680a845f82e12c0a6a8f61036048c07ec7a0bd943142aca8fa6ced0"}, -] - -[package.dependencies] -opentelemetry-exporter-otlp-proto-grpc = "1.25.0" -opentelemetry-exporter-otlp-proto-http = "1.25.0" - -[[package]] -name = "opentelemetry-exporter-otlp-proto-common" -version = "1.25.0" -description = "OpenTelemetry Protobuf encoding" -optional = false -python-versions = ">=3.8" -files = [ - {file = "opentelemetry_exporter_otlp_proto_common-1.25.0-py3-none-any.whl", hash = "sha256:15637b7d580c2675f70246563363775b4e6de947871e01d0f4e3881d1848d693"}, - {file = "opentelemetry_exporter_otlp_proto_common-1.25.0.tar.gz", hash = "sha256:c93f4e30da4eee02bacd1e004eb82ce4da143a2f8e15b987a9f603e0a85407d3"}, -] - -[package.dependencies] -opentelemetry-proto = "1.25.0" - -[[package]] -name = "opentelemetry-exporter-otlp-proto-grpc" -version = "1.25.0" -description = "OpenTelemetry Collector Protobuf over gRPC Exporter" -optional = false -python-versions = ">=3.8" -files = [ - {file = "opentelemetry_exporter_otlp_proto_grpc-1.25.0-py3-none-any.whl", hash = "sha256:3131028f0c0a155a64c430ca600fd658e8e37043cb13209f0109db5c1a3e4eb4"}, - {file = "opentelemetry_exporter_otlp_proto_grpc-1.25.0.tar.gz", hash = "sha256:c0b1661415acec5af87625587efa1ccab68b873745ca0ee96b69bb1042087eac"}, -] - -[package.dependencies] -deprecated = ">=1.2.6" -googleapis-common-protos = ">=1.52,<2.0" -grpcio = ">=1.0.0,<2.0.0" -opentelemetry-api = ">=1.15,<2.0" -opentelemetry-exporter-otlp-proto-common = "1.25.0" -opentelemetry-proto = "1.25.0" -opentelemetry-sdk = ">=1.25.0,<1.26.0" - -[[package]] -name = "opentelemetry-exporter-otlp-proto-http" -version = "1.25.0" -description = "OpenTelemetry Collector Protobuf over HTTP Exporter" -optional = false -python-versions = ">=3.8" -files = [ - {file = "opentelemetry_exporter_otlp_proto_http-1.25.0-py3-none-any.whl", hash = "sha256:2eca686ee11b27acd28198b3ea5e5863a53d1266b91cda47c839d95d5e0541a6"}, - {file = "opentelemetry_exporter_otlp_proto_http-1.25.0.tar.gz", hash = "sha256:9f8723859e37c75183ea7afa73a3542f01d0fd274a5b97487ea24cb683d7d684"}, -] - -[package.dependencies] -deprecated = ">=1.2.6" -googleapis-common-protos = ">=1.52,<2.0" -opentelemetry-api = ">=1.15,<2.0" -opentelemetry-exporter-otlp-proto-common = "1.25.0" -opentelemetry-proto = "1.25.0" -opentelemetry-sdk = ">=1.25.0,<1.26.0" -requests = ">=2.7,<3.0" - -[[package]] -name = "opentelemetry-proto" -version = "1.25.0" -description = "OpenTelemetry Python Proto" -optional = false -python-versions = ">=3.8" -files = [ - {file = "opentelemetry_proto-1.25.0-py3-none-any.whl", hash = "sha256:f07e3341c78d835d9b86665903b199893befa5e98866f63d22b00d0b7ca4972f"}, - {file = "opentelemetry_proto-1.25.0.tar.gz", hash = "sha256:35b6ef9dc4a9f7853ecc5006738ad40443701e52c26099e197895cbda8b815a3"}, -] - -[package.dependencies] -protobuf = ">=3.19,<5.0" - -[[package]] -name = "opentelemetry-sdk" -version = "1.25.0" -description = "OpenTelemetry Python SDK" -optional = false -python-versions = ">=3.8" -files = [ - {file = "opentelemetry_sdk-1.25.0-py3-none-any.whl", hash = "sha256:d97ff7ec4b351692e9d5a15af570c693b8715ad78b8aafbec5c7100fe966b4c9"}, - {file = "opentelemetry_sdk-1.25.0.tar.gz", hash = "sha256:ce7fc319c57707ef5bf8b74fb9f8ebdb8bfafbe11898410e0d2a761d08a98ec7"}, -] - -[package.dependencies] -opentelemetry-api = "1.25.0" -opentelemetry-semantic-conventions = "0.46b0" -typing-extensions = ">=3.7.4" - -[[package]] -name = "opentelemetry-semantic-conventions" -version = "0.46b0" -description = "OpenTelemetry Semantic Conventions" -optional = false -python-versions = ">=3.8" -files = [ - {file = "opentelemetry_semantic_conventions-0.46b0-py3-none-any.whl", hash = "sha256:6daef4ef9fa51d51855d9f8e0ccd3a1bd59e0e545abe99ac6203804e36ab3e07"}, - {file = "opentelemetry_semantic_conventions-0.46b0.tar.gz", hash = "sha256:fbc982ecbb6a6e90869b15c1673be90bd18c8a56ff1cffc0864e38e2edffaefa"}, -] - -[package.dependencies] -opentelemetry-api = "1.25.0" - -[[package]] -name = "ordered-set" -version = "4.1.0" -description = "An OrderedSet is a custom MutableSet that remembers its order, so that every" -optional = false -python-versions = ">=3.7" -files = [ - {file = "ordered-set-4.1.0.tar.gz", hash = "sha256:694a8e44c87657c59292ede72891eb91d34131f6531463aab3009191c77364a8"}, - {file = "ordered_set-4.1.0-py3-none-any.whl", hash = "sha256:046e1132c71fcf3330438a539928932caf51ddbc582496833e23de611de14562"}, -] - -[package.extras] -dev = ["black", "mypy", "pytest"] - [[package]] name = "orjson" version = "3.10.5" @@ -5773,105 +3139,6 @@ files = [ {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, ] -[[package]] -name = "pendulum" -version = "3.0.0" -description = "Python datetimes made easy" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pendulum-3.0.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2cf9e53ef11668e07f73190c805dbdf07a1939c3298b78d5a9203a86775d1bfd"}, - {file = "pendulum-3.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fb551b9b5e6059377889d2d878d940fd0bbb80ae4810543db18e6f77b02c5ef6"}, - {file = "pendulum-3.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c58227ac260d5b01fc1025176d7b31858c9f62595737f350d22124a9a3ad82d"}, - {file = "pendulum-3.0.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60fb6f415fea93a11c52578eaa10594568a6716602be8430b167eb0d730f3332"}, - {file = "pendulum-3.0.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b69f6b4dbcb86f2c2fe696ba991e67347bcf87fe601362a1aba6431454b46bde"}, - {file = "pendulum-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:138afa9c373ee450ede206db5a5e9004fd3011b3c6bbe1e57015395cd076a09f"}, - {file = "pendulum-3.0.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:83d9031f39c6da9677164241fd0d37fbfc9dc8ade7043b5d6d62f56e81af8ad2"}, - {file = "pendulum-3.0.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0c2308af4033fa534f089595bcd40a95a39988ce4059ccd3dc6acb9ef14ca44a"}, - {file = "pendulum-3.0.0-cp310-none-win_amd64.whl", hash = "sha256:9a59637cdb8462bdf2dbcb9d389518c0263799189d773ad5c11db6b13064fa79"}, - {file = "pendulum-3.0.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3725245c0352c95d6ca297193192020d1b0c0f83d5ee6bb09964edc2b5a2d508"}, - {file = "pendulum-3.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6c035f03a3e565ed132927e2c1b691de0dbf4eb53b02a5a3c5a97e1a64e17bec"}, - {file = "pendulum-3.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:597e66e63cbd68dd6d58ac46cb7a92363d2088d37ccde2dae4332ef23e95cd00"}, - {file = "pendulum-3.0.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99a0f8172e19f3f0c0e4ace0ad1595134d5243cf75985dc2233e8f9e8de263ca"}, - {file = "pendulum-3.0.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:77d8839e20f54706aed425bec82a83b4aec74db07f26acd039905d1237a5e1d4"}, - {file = "pendulum-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afde30e8146292b059020fbc8b6f8fd4a60ae7c5e6f0afef937bbb24880bdf01"}, - {file = "pendulum-3.0.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:660434a6fcf6303c4efd36713ca9212c753140107ee169a3fc6c49c4711c2a05"}, - {file = "pendulum-3.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dee9e5a48c6999dc1106eb7eea3e3a50e98a50651b72c08a87ee2154e544b33e"}, - {file = "pendulum-3.0.0-cp311-none-win_amd64.whl", hash = "sha256:d4cdecde90aec2d67cebe4042fd2a87a4441cc02152ed7ed8fb3ebb110b94ec4"}, - {file = "pendulum-3.0.0-cp311-none-win_arm64.whl", hash = "sha256:773c3bc4ddda2dda9f1b9d51fe06762f9200f3293d75c4660c19b2614b991d83"}, - {file = "pendulum-3.0.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:409e64e41418c49f973d43a28afe5df1df4f1dd87c41c7c90f1a63f61ae0f1f7"}, - {file = "pendulum-3.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a38ad2121c5ec7c4c190c7334e789c3b4624798859156b138fcc4d92295835dc"}, - {file = "pendulum-3.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fde4d0b2024b9785f66b7f30ed59281bd60d63d9213cda0eb0910ead777f6d37"}, - {file = "pendulum-3.0.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b2c5675769fb6d4c11238132962939b960fcb365436b6d623c5864287faa319"}, - {file = "pendulum-3.0.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8af95e03e066826f0f4c65811cbee1b3123d4a45a1c3a2b4fc23c4b0dff893b5"}, - {file = "pendulum-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2165a8f33cb15e06c67070b8afc87a62b85c5a273e3aaa6bc9d15c93a4920d6f"}, - {file = "pendulum-3.0.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ad5e65b874b5e56bd942546ea7ba9dd1d6a25121db1c517700f1c9de91b28518"}, - {file = "pendulum-3.0.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:17fe4b2c844bbf5f0ece69cfd959fa02957c61317b2161763950d88fed8e13b9"}, - {file = "pendulum-3.0.0-cp312-none-win_amd64.whl", hash = "sha256:78f8f4e7efe5066aca24a7a57511b9c2119f5c2b5eb81c46ff9222ce11e0a7a5"}, - {file = "pendulum-3.0.0-cp312-none-win_arm64.whl", hash = "sha256:28f49d8d1e32aae9c284a90b6bb3873eee15ec6e1d9042edd611b22a94ac462f"}, - {file = "pendulum-3.0.0-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:d4e2512f4e1a4670284a153b214db9719eb5d14ac55ada5b76cbdb8c5c00399d"}, - {file = "pendulum-3.0.0-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:3d897eb50883cc58d9b92f6405245f84b9286cd2de6e8694cb9ea5cb15195a32"}, - {file = "pendulum-3.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e169cc2ca419517f397811bbe4589cf3cd13fca6dc38bb352ba15ea90739ebb"}, - {file = "pendulum-3.0.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f17c3084a4524ebefd9255513692f7e7360e23c8853dc6f10c64cc184e1217ab"}, - {file = "pendulum-3.0.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:826d6e258052715f64d05ae0fc9040c0151e6a87aae7c109ba9a0ed930ce4000"}, - {file = "pendulum-3.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2aae97087872ef152a0c40e06100b3665d8cb86b59bc8471ca7c26132fccd0f"}, - {file = "pendulum-3.0.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ac65eeec2250d03106b5e81284ad47f0d417ca299a45e89ccc69e36130ca8bc7"}, - {file = "pendulum-3.0.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a5346d08f3f4a6e9e672187faa179c7bf9227897081d7121866358af369f44f9"}, - {file = "pendulum-3.0.0-cp37-none-win_amd64.whl", hash = "sha256:235d64e87946d8f95c796af34818c76e0f88c94d624c268693c85b723b698aa9"}, - {file = "pendulum-3.0.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:6a881d9c2a7f85bc9adafcfe671df5207f51f5715ae61f5d838b77a1356e8b7b"}, - {file = "pendulum-3.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d7762d2076b9b1cb718a6631ad6c16c23fc3fac76cbb8c454e81e80be98daa34"}, - {file = "pendulum-3.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e8e36a8130819d97a479a0e7bf379b66b3b1b520e5dc46bd7eb14634338df8c"}, - {file = "pendulum-3.0.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7dc843253ac373358ffc0711960e2dd5b94ab67530a3e204d85c6e8cb2c5fa10"}, - {file = "pendulum-3.0.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0a78ad3635d609ceb1e97d6aedef6a6a6f93433ddb2312888e668365908c7120"}, - {file = "pendulum-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b30a137e9e0d1f751e60e67d11fc67781a572db76b2296f7b4d44554761049d6"}, - {file = "pendulum-3.0.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c95984037987f4a457bb760455d9ca80467be792236b69d0084f228a8ada0162"}, - {file = "pendulum-3.0.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d29c6e578fe0f893766c0d286adbf0b3c726a4e2341eba0917ec79c50274ec16"}, - {file = "pendulum-3.0.0-cp38-none-win_amd64.whl", hash = "sha256:deaba8e16dbfcb3d7a6b5fabdd5a38b7c982809567479987b9c89572df62e027"}, - {file = "pendulum-3.0.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:b11aceea5b20b4b5382962b321dbc354af0defe35daa84e9ff3aae3c230df694"}, - {file = "pendulum-3.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a90d4d504e82ad236afac9adca4d6a19e4865f717034fc69bafb112c320dcc8f"}, - {file = "pendulum-3.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:825799c6b66e3734227756fa746cc34b3549c48693325b8b9f823cb7d21b19ac"}, - {file = "pendulum-3.0.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad769e98dc07972e24afe0cff8d365cb6f0ebc7e65620aa1976fcfbcadc4c6f3"}, - {file = "pendulum-3.0.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6fc26907eb5fb8cc6188cc620bc2075a6c534d981a2f045daa5f79dfe50d512"}, - {file = "pendulum-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c717eab1b6d898c00a3e0fa7781d615b5c5136bbd40abe82be100bb06df7a56"}, - {file = "pendulum-3.0.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3ddd1d66d1a714ce43acfe337190be055cdc221d911fc886d5a3aae28e14b76d"}, - {file = "pendulum-3.0.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:822172853d7a9cf6da95d7b66a16c7160cb99ae6df55d44373888181d7a06edc"}, - {file = "pendulum-3.0.0-cp39-none-win_amd64.whl", hash = "sha256:840de1b49cf1ec54c225a2a6f4f0784d50bd47f68e41dc005b7f67c7d5b5f3ae"}, - {file = "pendulum-3.0.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3b1f74d1e6ffe5d01d6023870e2ce5c2191486928823196f8575dcc786e107b1"}, - {file = "pendulum-3.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:729e9f93756a2cdfa77d0fc82068346e9731c7e884097160603872686e570f07"}, - {file = "pendulum-3.0.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e586acc0b450cd21cbf0db6bae386237011b75260a3adceddc4be15334689a9a"}, - {file = "pendulum-3.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22e7944ffc1f0099a79ff468ee9630c73f8c7835cd76fdb57ef7320e6a409df4"}, - {file = "pendulum-3.0.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:fa30af36bd8e50686846bdace37cf6707bdd044e5cb6e1109acbad3277232e04"}, - {file = "pendulum-3.0.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:440215347b11914ae707981b9a57ab9c7b6983ab0babde07063c6ee75c0dc6e7"}, - {file = "pendulum-3.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:314c4038dc5e6a52991570f50edb2f08c339debdf8cea68ac355b32c4174e820"}, - {file = "pendulum-3.0.0-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5acb1d386337415f74f4d1955c4ce8d0201978c162927d07df8eb0692b2d8533"}, - {file = "pendulum-3.0.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a789e12fbdefaffb7b8ac67f9d8f22ba17a3050ceaaa635cd1cc4645773a4b1e"}, - {file = "pendulum-3.0.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:860aa9b8a888e5913bd70d819306749e5eb488e6b99cd6c47beb701b22bdecf5"}, - {file = "pendulum-3.0.0-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:5ebc65ea033ef0281368217fbf59f5cb05b338ac4dd23d60959c7afcd79a60a0"}, - {file = "pendulum-3.0.0-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d9fef18ab0386ef6a9ac7bad7e43ded42c83ff7ad412f950633854f90d59afa8"}, - {file = "pendulum-3.0.0-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:1c134ba2f0571d0b68b83f6972e2307a55a5a849e7dac8505c715c531d2a8795"}, - {file = "pendulum-3.0.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:385680812e7e18af200bb9b4a49777418c32422d05ad5a8eb85144c4a285907b"}, - {file = "pendulum-3.0.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9eec91cd87c59fb32ec49eb722f375bd58f4be790cae11c1b70fac3ee4f00da0"}, - {file = "pendulum-3.0.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4386bffeca23c4b69ad50a36211f75b35a4deb6210bdca112ac3043deb7e494a"}, - {file = "pendulum-3.0.0-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:dfbcf1661d7146d7698da4b86e7f04814221081e9fe154183e34f4c5f5fa3bf8"}, - {file = "pendulum-3.0.0-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:04a1094a5aa1daa34a6b57c865b25f691848c61583fb22722a4df5699f6bf74c"}, - {file = "pendulum-3.0.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5b0ec85b9045bd49dd3a3493a5e7ddfd31c36a2a60da387c419fa04abcaecb23"}, - {file = "pendulum-3.0.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:0a15b90129765b705eb2039062a6daf4d22c4e28d1a54fa260892e8c3ae6e157"}, - {file = "pendulum-3.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:bb8f6d7acd67a67d6fedd361ad2958ff0539445ef51cbe8cd288db4306503cd0"}, - {file = "pendulum-3.0.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd69b15374bef7e4b4440612915315cc42e8575fcda2a3d7586a0d88192d0c88"}, - {file = "pendulum-3.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc00f8110db6898360c53c812872662e077eaf9c75515d53ecc65d886eec209a"}, - {file = "pendulum-3.0.0-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:83a44e8b40655d0ba565a5c3d1365d27e3e6778ae2a05b69124db9e471255c4a"}, - {file = "pendulum-3.0.0-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:1a3604e9fbc06b788041b2a8b78f75c243021e0f512447806a6d37ee5214905d"}, - {file = "pendulum-3.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:92c307ae7accebd06cbae4729f0ba9fa724df5f7d91a0964b1b972a22baa482b"}, - {file = "pendulum-3.0.0.tar.gz", hash = "sha256:5d034998dea404ec31fae27af6b22cff1708f830a1ed7353be4d1019bb9f584e"}, -] - -[package.dependencies] -python-dateutil = ">=2.6" -tzdata = ">=2020.1" - -[package.extras] -test = ["time-machine (>=2.6.0)"] - [[package]] name = "pep8-naming" version = "0.14.1" @@ -6070,33 +3337,16 @@ pyyaml = ">=5.1" virtualenv = ">=20.10.0" [[package]] -name = "prettier" -version = "0.0.7" -description = "Properly pprint of nested objects" -optional = false -python-versions = "*" -files = [ - {file = "prettier-0.0.7-py3-none-any.whl", hash = "sha256:20e76791de41cafe481328dd49552303f29ca192151cee1b120c26f66cae9bfc"}, - {file = "prettier-0.0.7.tar.gz", hash = "sha256:6c34b8cd09fd9c8956c05d6395ea3f575e0122dce494ba57685c07065abed427"}, -] - -[[package]] -name = "prison" -version = "0.2.1" -description = "Rison encoder/decoder" +name = "prettier" +version = "0.0.7" +description = "Properly pprint of nested objects" optional = false python-versions = "*" files = [ - {file = "prison-0.2.1-py2.py3-none-any.whl", hash = "sha256:f90bab63fca497aa0819a852f64fb21a4e181ed9f6114deaa5dc04001a7555c5"}, - {file = "prison-0.2.1.tar.gz", hash = "sha256:e6cd724044afcb1a8a69340cad2f1e3151a5839fd3a8027fd1357571e797c599"}, + {file = "prettier-0.0.7-py3-none-any.whl", hash = "sha256:20e76791de41cafe481328dd49552303f29ca192151cee1b120c26f66cae9bfc"}, + {file = "prettier-0.0.7.tar.gz", hash = "sha256:6c34b8cd09fd9c8956c05d6395ea3f575e0122dce494ba57685c07065abed427"}, ] -[package.dependencies] -six = "*" - -[package.extras] -dev = ["nose", "pipreqs", "twine"] - [[package]] name = "prompt-toolkit" version = "3.0.47" @@ -6396,116 +3646,6 @@ files = [ {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] -[[package]] -name = "pydantic" -version = "2.7.4" -description = "Data validation using Python type hints" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pydantic-2.7.4-py3-none-any.whl", hash = "sha256:ee8538d41ccb9c0a9ad3e0e5f07bf15ed8015b481ced539a1759d8cc89ae90d0"}, - {file = "pydantic-2.7.4.tar.gz", hash = "sha256:0c84efd9548d545f63ac0060c1e4d39bb9b14db8b3c0652338aecc07b5adec52"}, -] - -[package.dependencies] -annotated-types = ">=0.4.0" -pydantic-core = "2.18.4" -typing-extensions = ">=4.6.1" - -[package.extras] -email = ["email-validator (>=2.0.0)"] - -[[package]] -name = "pydantic-core" -version = "2.18.4" -description = "Core functionality for Pydantic validation and serialization" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pydantic_core-2.18.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f76d0ad001edd426b92233d45c746fd08f467d56100fd8f30e9ace4b005266e4"}, - {file = "pydantic_core-2.18.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:59ff3e89f4eaf14050c8022011862df275b552caef8082e37b542b066ce1ff26"}, - {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a55b5b16c839df1070bc113c1f7f94a0af4433fcfa1b41799ce7606e5c79ce0a"}, - {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4d0dcc59664fcb8974b356fe0a18a672d6d7cf9f54746c05f43275fc48636851"}, - {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8951eee36c57cd128f779e641e21eb40bc5073eb28b2d23f33eb0ef14ffb3f5d"}, - {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4701b19f7e3a06ea655513f7938de6f108123bf7c86bbebb1196eb9bd35cf724"}, - {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e00a3f196329e08e43d99b79b286d60ce46bed10f2280d25a1718399457e06be"}, - {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:97736815b9cc893b2b7f663628e63f436018b75f44854c8027040e05230eeddb"}, - {file = "pydantic_core-2.18.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6891a2ae0e8692679c07728819b6e2b822fb30ca7445f67bbf6509b25a96332c"}, - {file = "pydantic_core-2.18.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bc4ff9805858bd54d1a20efff925ccd89c9d2e7cf4986144b30802bf78091c3e"}, - {file = "pydantic_core-2.18.4-cp310-none-win32.whl", hash = "sha256:1b4de2e51bbcb61fdebd0ab86ef28062704f62c82bbf4addc4e37fa4b00b7cbc"}, - {file = "pydantic_core-2.18.4-cp310-none-win_amd64.whl", hash = "sha256:6a750aec7bf431517a9fd78cb93c97b9b0c496090fee84a47a0d23668976b4b0"}, - {file = "pydantic_core-2.18.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:942ba11e7dfb66dc70f9ae66b33452f51ac7bb90676da39a7345e99ffb55402d"}, - {file = "pydantic_core-2.18.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b2ebef0e0b4454320274f5e83a41844c63438fdc874ea40a8b5b4ecb7693f1c4"}, - {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a642295cd0c8df1b86fc3dced1d067874c353a188dc8e0f744626d49e9aa51c4"}, - {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f09baa656c904807e832cf9cce799c6460c450c4ad80803517032da0cd062e2"}, - {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98906207f29bc2c459ff64fa007afd10a8c8ac080f7e4d5beff4c97086a3dabd"}, - {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19894b95aacfa98e7cb093cd7881a0c76f55731efad31073db4521e2b6ff5b7d"}, - {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fbbdc827fe5e42e4d196c746b890b3d72876bdbf160b0eafe9f0334525119c8"}, - {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f85d05aa0918283cf29a30b547b4df2fbb56b45b135f9e35b6807cb28bc47951"}, - {file = "pydantic_core-2.18.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e85637bc8fe81ddb73fda9e56bab24560bdddfa98aa64f87aaa4e4b6730c23d2"}, - {file = "pydantic_core-2.18.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2f5966897e5461f818e136b8451d0551a2e77259eb0f73a837027b47dc95dab9"}, - {file = "pydantic_core-2.18.4-cp311-none-win32.whl", hash = "sha256:44c7486a4228413c317952e9d89598bcdfb06399735e49e0f8df643e1ccd0558"}, - {file = "pydantic_core-2.18.4-cp311-none-win_amd64.whl", hash = "sha256:8a7164fe2005d03c64fd3b85649891cd4953a8de53107940bf272500ba8a788b"}, - {file = "pydantic_core-2.18.4-cp311-none-win_arm64.whl", hash = "sha256:4e99bc050fe65c450344421017f98298a97cefc18c53bb2f7b3531eb39bc7805"}, - {file = "pydantic_core-2.18.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:6f5c4d41b2771c730ea1c34e458e781b18cc668d194958e0112455fff4e402b2"}, - {file = "pydantic_core-2.18.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2fdf2156aa3d017fddf8aea5adfba9f777db1d6022d392b682d2a8329e087cef"}, - {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4748321b5078216070b151d5271ef3e7cc905ab170bbfd27d5c83ee3ec436695"}, - {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:847a35c4d58721c5dc3dba599878ebbdfd96784f3fb8bb2c356e123bdcd73f34"}, - {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c40d4eaad41f78e3bbda31b89edc46a3f3dc6e171bf0ecf097ff7a0ffff7cb1"}, - {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:21a5e440dbe315ab9825fcd459b8814bb92b27c974cbc23c3e8baa2b76890077"}, - {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01dd777215e2aa86dfd664daed5957704b769e726626393438f9c87690ce78c3"}, - {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4b06beb3b3f1479d32befd1f3079cc47b34fa2da62457cdf6c963393340b56e9"}, - {file = "pydantic_core-2.18.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:564d7922e4b13a16b98772441879fcdcbe82ff50daa622d681dd682175ea918c"}, - {file = "pydantic_core-2.18.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0eb2a4f660fcd8e2b1c90ad566db2b98d7f3f4717c64fe0a83e0adb39766d5b8"}, - {file = "pydantic_core-2.18.4-cp312-none-win32.whl", hash = "sha256:8b8bab4c97248095ae0c4455b5a1cd1cdd96e4e4769306ab19dda135ea4cdb07"}, - {file = "pydantic_core-2.18.4-cp312-none-win_amd64.whl", hash = "sha256:14601cdb733d741b8958224030e2bfe21a4a881fb3dd6fbb21f071cabd48fa0a"}, - {file = "pydantic_core-2.18.4-cp312-none-win_arm64.whl", hash = "sha256:c1322d7dd74713dcc157a2b7898a564ab091ca6c58302d5c7b4c07296e3fd00f"}, - {file = "pydantic_core-2.18.4-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:823be1deb01793da05ecb0484d6c9e20baebb39bd42b5d72636ae9cf8350dbd2"}, - {file = "pydantic_core-2.18.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ebef0dd9bf9b812bf75bda96743f2a6c5734a02092ae7f721c048d156d5fabae"}, - {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae1d6df168efb88d7d522664693607b80b4080be6750c913eefb77e34c12c71a"}, - {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f9899c94762343f2cc2fc64c13e7cae4c3cc65cdfc87dd810a31654c9b7358cc"}, - {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99457f184ad90235cfe8461c4d70ab7dd2680e28821c29eca00252ba90308c78"}, - {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18f469a3d2a2fdafe99296a87e8a4c37748b5080a26b806a707f25a902c040a8"}, - {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7cdf28938ac6b8b49ae5e92f2735056a7ba99c9b110a474473fd71185c1af5d"}, - {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:938cb21650855054dc54dfd9120a851c974f95450f00683399006aa6e8abb057"}, - {file = "pydantic_core-2.18.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:44cd83ab6a51da80fb5adbd9560e26018e2ac7826f9626bc06ca3dc074cd198b"}, - {file = "pydantic_core-2.18.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:972658f4a72d02b8abfa2581d92d59f59897d2e9f7e708fdabe922f9087773af"}, - {file = "pydantic_core-2.18.4-cp38-none-win32.whl", hash = "sha256:1d886dc848e60cb7666f771e406acae54ab279b9f1e4143babc9c2258213daa2"}, - {file = "pydantic_core-2.18.4-cp38-none-win_amd64.whl", hash = "sha256:bb4462bd43c2460774914b8525f79b00f8f407c945d50881568f294c1d9b4443"}, - {file = "pydantic_core-2.18.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:44a688331d4a4e2129140a8118479443bd6f1905231138971372fcde37e43528"}, - {file = "pydantic_core-2.18.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a2fdd81edd64342c85ac7cf2753ccae0b79bf2dfa063785503cb85a7d3593223"}, - {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86110d7e1907ab36691f80b33eb2da87d780f4739ae773e5fc83fb272f88825f"}, - {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:46387e38bd641b3ee5ce247563b60c5ca098da9c56c75c157a05eaa0933ed154"}, - {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:123c3cec203e3f5ac7b000bd82235f1a3eced8665b63d18be751f115588fea30"}, - {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dc1803ac5c32ec324c5261c7209e8f8ce88e83254c4e1aebdc8b0a39f9ddb443"}, - {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53db086f9f6ab2b4061958d9c276d1dbe3690e8dd727d6abf2321d6cce37fa94"}, - {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:abc267fa9837245cc28ea6929f19fa335f3dc330a35d2e45509b6566dc18be23"}, - {file = "pydantic_core-2.18.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a0d829524aaefdebccb869eed855e2d04c21d2d7479b6cada7ace5448416597b"}, - {file = "pydantic_core-2.18.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:509daade3b8649f80d4e5ff21aa5673e4ebe58590b25fe42fac5f0f52c6f034a"}, - {file = "pydantic_core-2.18.4-cp39-none-win32.whl", hash = "sha256:ca26a1e73c48cfc54c4a76ff78df3727b9d9f4ccc8dbee4ae3f73306a591676d"}, - {file = "pydantic_core-2.18.4-cp39-none-win_amd64.whl", hash = "sha256:c67598100338d5d985db1b3d21f3619ef392e185e71b8d52bceacc4a7771ea7e"}, - {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:574d92eac874f7f4db0ca653514d823a0d22e2354359d0759e3f6a406db5d55d"}, - {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1f4d26ceb5eb9eed4af91bebeae4b06c3fb28966ca3a8fb765208cf6b51102ab"}, - {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77450e6d20016ec41f43ca4a6c63e9fdde03f0ae3fe90e7c27bdbeaece8b1ed4"}, - {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d323a01da91851a4f17bf592faf46149c9169d68430b3146dcba2bb5e5719abc"}, - {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43d447dd2ae072a0065389092a231283f62d960030ecd27565672bd40746c507"}, - {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:578e24f761f3b425834f297b9935e1ce2e30f51400964ce4801002435a1b41ef"}, - {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:81b5efb2f126454586d0f40c4d834010979cb80785173d1586df845a632e4e6d"}, - {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ab86ce7c8f9bea87b9d12c7f0af71102acbf5ecbc66c17796cff45dae54ef9a5"}, - {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:90afc12421df2b1b4dcc975f814e21bc1754640d502a2fbcc6d41e77af5ec312"}, - {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:51991a89639a912c17bef4b45c87bd83593aee0437d8102556af4885811d59f5"}, - {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:293afe532740370aba8c060882f7d26cfd00c94cae32fd2e212a3a6e3b7bc15e"}, - {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b48ece5bde2e768197a2d0f6e925f9d7e3e826f0ad2271120f8144a9db18d5c8"}, - {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:eae237477a873ab46e8dd748e515c72c0c804fb380fbe6c85533c7de51f23a8f"}, - {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:834b5230b5dfc0c1ec37b2fda433b271cbbc0e507560b5d1588e2cc1148cf1ce"}, - {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e858ac0a25074ba4bce653f9b5d0a85b7456eaddadc0ce82d3878c22489fa4ee"}, - {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2fd41f6eff4c20778d717af1cc50eca52f5afe7805ee530a4fbd0bae284f16e9"}, - {file = "pydantic_core-2.18.4.tar.gz", hash = "sha256:ec3beeada09ff865c344ff3bc2f427f5e6c26401cc6113d77e372c3fdac73864"}, -] - -[package.dependencies] -typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" - [[package]] name = "pydata-google-auth" version = "1.8.2" @@ -6615,24 +3755,6 @@ pyyaml = "*" [package.extras] extra = ["pygments (>=2.12)"] -[[package]] -name = "pyopenssl" -version = "24.1.0" -description = "Python wrapper module around the OpenSSL library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "pyOpenSSL-24.1.0-py3-none-any.whl", hash = "sha256:17ed5be5936449c5418d1cd269a1a9e9081bc54c17aed272b45856a3d3dc86ad"}, - {file = "pyOpenSSL-24.1.0.tar.gz", hash = "sha256:cabed4bfaa5df9f1a16c0ef64a0cb65318b5cd077a7eda7d6970131ca2f41a6f"}, -] - -[package.dependencies] -cryptography = ">=41.0.5,<43" - -[package.extras] -docs = ["sphinx (!=5.2.0,!=5.2.0.post0,!=7.2.5)", "sphinx-rtd-theme"] -test = ["pretend", "pytest (>=3.0.1)", "pytest-rerunfailures"] - [[package]] name = "pyparsing" version = "3.1.2" @@ -6745,26 +3867,6 @@ psutil = ["psutil (>=3.0)"] setproctitle = ["setproctitle"] testing = ["filelock"] -[[package]] -name = "python-daemon" -version = "3.0.1" -description = "Library to implement a well-behaved Unix daemon process." -optional = false -python-versions = ">=3" -files = [ - {file = "python-daemon-3.0.1.tar.gz", hash = "sha256:6c57452372f7eaff40934a1c03ad1826bf5e793558e87fef49131e6464b4dae5"}, - {file = "python_daemon-3.0.1-py3-none-any.whl", hash = "sha256:42bb848a3260a027fa71ad47ecd959e471327cb34da5965962edd5926229f341"}, -] - -[package.dependencies] -docutils = "*" -lockfile = ">=0.10" -setuptools = ">=62.4.0" - -[package.extras] -devel = ["coverage", "docutils", "isort", "testscenarios (>=0.4)", "testtools", "twine"] -test = ["coverage", "docutils", "testscenarios (>=0.4)", "testtools"] - [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -6790,37 +3892,6 @@ files = [ {file = "python_json_logger-2.0.7-py3-none-any.whl", hash = "sha256:f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd"}, ] -[[package]] -name = "python-nvd3" -version = "0.16.0" -description = "Python NVD3 - Chart Library for d3.js" -optional = false -python-versions = "*" -files = [ - {file = "python-nvd3-0.16.0.tar.gz", hash = "sha256:0115887289b3f751716ddd05c7b53ac5f05e71201e52496decdac453a50dcf7e"}, -] - -[package.dependencies] -Jinja2 = ">=2.8" -python-slugify = ">=1.2.5" - -[[package]] -name = "python-slugify" -version = "8.0.4" -description = "A Python slugify application that also handles Unicode" -optional = false -python-versions = ">=3.7" -files = [ - {file = "python-slugify-8.0.4.tar.gz", hash = "sha256:59202371d1d05b54a9e7720c5e038f928f45daaffe41dd10822f3907b937c856"}, - {file = "python_slugify-8.0.4-py2.py3-none-any.whl", hash = "sha256:276540b79961052b66b7d116620b36518847f52d5fd9e3a70164fc8c50faa6b8"}, -] - -[package.dependencies] -text-unidecode = ">=1.3" - -[package.extras] -unidecode = ["Unidecode (>=1.1.1)"] - [[package]] name = "pytz" version = "2024.1" @@ -7029,21 +4100,6 @@ files = [ [package.dependencies] cffi = {version = "*", markers = "implementation_name == \"pypy\""} -[[package]] -name = "referencing" -version = "0.35.1" -description = "JSON Referencing + Python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de"}, - {file = "referencing-0.35.1.tar.gz", hash = "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c"}, -] - -[package.dependencies] -attrs = ">=22.2.0" -rpds-py = ">=0.7.0" - [[package]] name = "regex" version = "2024.5.15" @@ -7171,34 +4227,6 @@ requests = ">=2.0.0" [package.extras] rsa = ["oauthlib[signedtoken] (>=3.0.0)"] -[[package]] -name = "requests-toolbelt" -version = "1.0.0" -description = "A utility belt for advanced users of python-requests" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, - {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, -] - -[package.dependencies] -requests = ">=2.0.1,<3.0.0" - -[[package]] -name = "rfc3339-validator" -version = "0.1.4" -description = "A pure python RFC3339 validator" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -files = [ - {file = "rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa"}, - {file = "rfc3339_validator-0.1.4.tar.gz", hash = "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b"}, -] - -[package.dependencies] -six = "*" - [[package]] name = "rich" version = "12.6.0" @@ -7217,128 +4245,6 @@ pygments = ">=2.6.0,<3.0.0" [package.extras] jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"] -[[package]] -name = "rich-argparse" -version = "1.5.2" -description = "Rich help formatters for argparse and optparse" -optional = false -python-versions = ">=3.8" -files = [ - {file = "rich_argparse-1.5.2-py3-none-any.whl", hash = "sha256:7027503d5849e27fc7cc85fb58504363606f2ec1c8b3c27d9a8ad28788faf877"}, - {file = "rich_argparse-1.5.2.tar.gz", hash = "sha256:84d348d5b6dafe99fffe2c7ea1ca0afe14096c921693445b9eee65ee4fcbfd2c"}, -] - -[package.dependencies] -rich = ">=11.0.0" - -[[package]] -name = "rpds-py" -version = "0.18.1" -description = "Python bindings to Rust's persistent data structures (rpds)" -optional = false -python-versions = ">=3.8" -files = [ - {file = "rpds_py-0.18.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:d31dea506d718693b6b2cffc0648a8929bdc51c70a311b2770f09611caa10d53"}, - {file = "rpds_py-0.18.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:732672fbc449bab754e0b15356c077cc31566df874964d4801ab14f71951ea80"}, - {file = "rpds_py-0.18.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a98a1f0552b5f227a3d6422dbd61bc6f30db170939bd87ed14f3c339aa6c7c9"}, - {file = "rpds_py-0.18.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7f1944ce16401aad1e3f7d312247b3d5de7981f634dc9dfe90da72b87d37887d"}, - {file = "rpds_py-0.18.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38e14fb4e370885c4ecd734f093a2225ee52dc384b86fa55fe3f74638b2cfb09"}, - {file = "rpds_py-0.18.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08d74b184f9ab6289b87b19fe6a6d1a97fbfea84b8a3e745e87a5de3029bf944"}, - {file = "rpds_py-0.18.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d70129cef4a8d979caa37e7fe957202e7eee8ea02c5e16455bc9808a59c6b2f0"}, - {file = "rpds_py-0.18.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce0bb20e3a11bd04461324a6a798af34d503f8d6f1aa3d2aa8901ceaf039176d"}, - {file = "rpds_py-0.18.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:81c5196a790032e0fc2464c0b4ab95f8610f96f1f2fa3d4deacce6a79852da60"}, - {file = "rpds_py-0.18.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f3027be483868c99b4985fda802a57a67fdf30c5d9a50338d9db646d590198da"}, - {file = "rpds_py-0.18.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d44607f98caa2961bab4fa3c4309724b185b464cdc3ba6f3d7340bac3ec97cc1"}, - {file = "rpds_py-0.18.1-cp310-none-win32.whl", hash = "sha256:c273e795e7a0f1fddd46e1e3cb8be15634c29ae8ff31c196debb620e1edb9333"}, - {file = "rpds_py-0.18.1-cp310-none-win_amd64.whl", hash = "sha256:8352f48d511de5f973e4f2f9412736d7dea76c69faa6d36bcf885b50c758ab9a"}, - {file = "rpds_py-0.18.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6b5ff7e1d63a8281654b5e2896d7f08799378e594f09cf3674e832ecaf396ce8"}, - {file = "rpds_py-0.18.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8927638a4d4137a289e41d0fd631551e89fa346d6dbcfc31ad627557d03ceb6d"}, - {file = "rpds_py-0.18.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:154bf5c93d79558b44e5b50cc354aa0459e518e83677791e6adb0b039b7aa6a7"}, - {file = "rpds_py-0.18.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:07f2139741e5deb2c5154a7b9629bc5aa48c766b643c1a6750d16f865a82c5fc"}, - {file = "rpds_py-0.18.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8c7672e9fba7425f79019db9945b16e308ed8bc89348c23d955c8c0540da0a07"}, - {file = "rpds_py-0.18.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:489bdfe1abd0406eba6b3bb4fdc87c7fa40f1031de073d0cfb744634cc8fa261"}, - {file = "rpds_py-0.18.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c20f05e8e3d4fc76875fc9cb8cf24b90a63f5a1b4c5b9273f0e8225e169b100"}, - {file = "rpds_py-0.18.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:967342e045564cef76dfcf1edb700b1e20838d83b1aa02ab313e6a497cf923b8"}, - {file = "rpds_py-0.18.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2cc7c1a47f3a63282ab0f422d90ddac4aa3034e39fc66a559ab93041e6505da7"}, - {file = "rpds_py-0.18.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f7afbfee1157e0f9376c00bb232e80a60e59ed716e3211a80cb8506550671e6e"}, - {file = "rpds_py-0.18.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9e6934d70dc50f9f8ea47081ceafdec09245fd9f6032669c3b45705dea096b88"}, - {file = "rpds_py-0.18.1-cp311-none-win32.whl", hash = "sha256:c69882964516dc143083d3795cb508e806b09fc3800fd0d4cddc1df6c36e76bb"}, - {file = "rpds_py-0.18.1-cp311-none-win_amd64.whl", hash = "sha256:70a838f7754483bcdc830444952fd89645569e7452e3226de4a613a4c1793fb2"}, - {file = "rpds_py-0.18.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3dd3cd86e1db5aadd334e011eba4e29d37a104b403e8ca24dcd6703c68ca55b3"}, - {file = "rpds_py-0.18.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:05f3d615099bd9b13ecf2fc9cf2d839ad3f20239c678f461c753e93755d629ee"}, - {file = "rpds_py-0.18.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35b2b771b13eee8729a5049c976197ff58a27a3829c018a04341bcf1ae409b2b"}, - {file = "rpds_py-0.18.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ee17cd26b97d537af8f33635ef38be873073d516fd425e80559f4585a7b90c43"}, - {file = "rpds_py-0.18.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b646bf655b135ccf4522ed43d6902af37d3f5dbcf0da66c769a2b3938b9d8184"}, - {file = "rpds_py-0.18.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19ba472b9606c36716062c023afa2484d1e4220548751bda14f725a7de17b4f6"}, - {file = "rpds_py-0.18.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e30ac5e329098903262dc5bdd7e2086e0256aa762cc8b744f9e7bf2a427d3f8"}, - {file = "rpds_py-0.18.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d58ad6317d188c43750cb76e9deacf6051d0f884d87dc6518e0280438648a9ac"}, - {file = "rpds_py-0.18.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e1735502458621921cee039c47318cb90b51d532c2766593be6207eec53e5c4c"}, - {file = "rpds_py-0.18.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f5bab211605d91db0e2995a17b5c6ee5edec1270e46223e513eaa20da20076ac"}, - {file = "rpds_py-0.18.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2fc24a329a717f9e2448f8cd1f960f9dac4e45b6224d60734edeb67499bab03a"}, - {file = "rpds_py-0.18.1-cp312-none-win32.whl", hash = "sha256:1805d5901779662d599d0e2e4159d8a82c0b05faa86ef9222bf974572286b2b6"}, - {file = "rpds_py-0.18.1-cp312-none-win_amd64.whl", hash = "sha256:720edcb916df872d80f80a1cc5ea9058300b97721efda8651efcd938a9c70a72"}, - {file = "rpds_py-0.18.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:c827576e2fa017a081346dce87d532a5310241648eb3700af9a571a6e9fc7e74"}, - {file = "rpds_py-0.18.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:aa3679e751408d75a0b4d8d26d6647b6d9326f5e35c00a7ccd82b78ef64f65f8"}, - {file = "rpds_py-0.18.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0abeee75434e2ee2d142d650d1e54ac1f8b01e6e6abdde8ffd6eeac6e9c38e20"}, - {file = "rpds_py-0.18.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed402d6153c5d519a0faf1bb69898e97fb31613b49da27a84a13935ea9164dfc"}, - {file = "rpds_py-0.18.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:338dee44b0cef8b70fd2ef54b4e09bb1b97fc6c3a58fea5db6cc083fd9fc2724"}, - {file = "rpds_py-0.18.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7750569d9526199c5b97e5a9f8d96a13300950d910cf04a861d96f4273d5b104"}, - {file = "rpds_py-0.18.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:607345bd5912aacc0c5a63d45a1f73fef29e697884f7e861094e443187c02be5"}, - {file = "rpds_py-0.18.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:207c82978115baa1fd8d706d720b4a4d2b0913df1c78c85ba73fe6c5804505f0"}, - {file = "rpds_py-0.18.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6d1e42d2735d437e7e80bab4d78eb2e459af48c0a46e686ea35f690b93db792d"}, - {file = "rpds_py-0.18.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:5463c47c08630007dc0fe99fb480ea4f34a89712410592380425a9b4e1611d8e"}, - {file = "rpds_py-0.18.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:06d218939e1bf2ca50e6b0ec700ffe755e5216a8230ab3e87c059ebb4ea06afc"}, - {file = "rpds_py-0.18.1-cp38-none-win32.whl", hash = "sha256:312fe69b4fe1ffbe76520a7676b1e5ac06ddf7826d764cc10265c3b53f96dbe9"}, - {file = "rpds_py-0.18.1-cp38-none-win_amd64.whl", hash = "sha256:9437ca26784120a279f3137ee080b0e717012c42921eb07861b412340f85bae2"}, - {file = "rpds_py-0.18.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:19e515b78c3fc1039dd7da0a33c28c3154458f947f4dc198d3c72db2b6b5dc93"}, - {file = "rpds_py-0.18.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a7b28c5b066bca9a4eb4e2f2663012debe680f097979d880657f00e1c30875a0"}, - {file = "rpds_py-0.18.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:673fdbbf668dd958eff750e500495ef3f611e2ecc209464f661bc82e9838991e"}, - {file = "rpds_py-0.18.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d960de62227635d2e61068f42a6cb6aae91a7fe00fca0e3aeed17667c8a34611"}, - {file = "rpds_py-0.18.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:352a88dc7892f1da66b6027af06a2e7e5d53fe05924cc2cfc56495b586a10b72"}, - {file = "rpds_py-0.18.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e0ee01ad8260184db21468a6e1c37afa0529acc12c3a697ee498d3c2c4dcaf3"}, - {file = "rpds_py-0.18.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4c39ad2f512b4041343ea3c7894339e4ca7839ac38ca83d68a832fc8b3748ab"}, - {file = "rpds_py-0.18.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aaa71ee43a703c321906813bb252f69524f02aa05bf4eec85f0c41d5d62d0f4c"}, - {file = "rpds_py-0.18.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:6cd8098517c64a85e790657e7b1e509b9fe07487fd358e19431cb120f7d96338"}, - {file = "rpds_py-0.18.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:4adec039b8e2928983f885c53b7cc4cda8965b62b6596501a0308d2703f8af1b"}, - {file = "rpds_py-0.18.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:32b7daaa3e9389db3695964ce8e566e3413b0c43e3394c05e4b243a4cd7bef26"}, - {file = "rpds_py-0.18.1-cp39-none-win32.whl", hash = "sha256:2625f03b105328729f9450c8badda34d5243231eef6535f80064d57035738360"}, - {file = "rpds_py-0.18.1-cp39-none-win_amd64.whl", hash = "sha256:bf18932d0003c8c4d51a39f244231986ab23ee057d235a12b2684ea26a353590"}, - {file = "rpds_py-0.18.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cbfbea39ba64f5e53ae2915de36f130588bba71245b418060ec3330ebf85678e"}, - {file = "rpds_py-0.18.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:a3d456ff2a6a4d2adcdf3c1c960a36f4fd2fec6e3b4902a42a384d17cf4e7a65"}, - {file = "rpds_py-0.18.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7700936ef9d006b7ef605dc53aa364da2de5a3aa65516a1f3ce73bf82ecfc7ae"}, - {file = "rpds_py-0.18.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:51584acc5916212e1bf45edd17f3a6b05fe0cbb40482d25e619f824dccb679de"}, - {file = "rpds_py-0.18.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:942695a206a58d2575033ff1e42b12b2aece98d6003c6bc739fbf33d1773b12f"}, - {file = "rpds_py-0.18.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b906b5f58892813e5ba5c6056d6a5ad08f358ba49f046d910ad992196ea61397"}, - {file = "rpds_py-0.18.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6f8e3fecca256fefc91bb6765a693d96692459d7d4c644660a9fff32e517843"}, - {file = "rpds_py-0.18.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7732770412bab81c5a9f6d20aeb60ae943a9b36dcd990d876a773526468e7163"}, - {file = "rpds_py-0.18.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:bd1105b50ede37461c1d51b9698c4f4be6e13e69a908ab7751e3807985fc0346"}, - {file = "rpds_py-0.18.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:618916f5535784960f3ecf8111581f4ad31d347c3de66d02e728de460a46303c"}, - {file = "rpds_py-0.18.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:17c6d2155e2423f7e79e3bb18151c686d40db42d8645e7977442170c360194d4"}, - {file = "rpds_py-0.18.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:6c4c4c3f878df21faf5fac86eda32671c27889e13570645a9eea0a1abdd50922"}, - {file = "rpds_py-0.18.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:fab6ce90574645a0d6c58890e9bcaac8d94dff54fb51c69e5522a7358b80ab64"}, - {file = "rpds_py-0.18.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:531796fb842b53f2695e94dc338929e9f9dbf473b64710c28af5a160b2a8927d"}, - {file = "rpds_py-0.18.1-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:740884bc62a5e2bbb31e584f5d23b32320fd75d79f916f15a788d527a5e83644"}, - {file = "rpds_py-0.18.1-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:998125738de0158f088aef3cb264a34251908dd2e5d9966774fdab7402edfab7"}, - {file = "rpds_py-0.18.1-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e2be6e9dd4111d5b31ba3b74d17da54a8319d8168890fbaea4b9e5c3de630ae5"}, - {file = "rpds_py-0.18.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0cee71bc618cd93716f3c1bf56653740d2d13ddbd47673efa8bf41435a60daa"}, - {file = "rpds_py-0.18.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2c3caec4ec5cd1d18e5dd6ae5194d24ed12785212a90b37f5f7f06b8bedd7139"}, - {file = "rpds_py-0.18.1-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:27bba383e8c5231cd559affe169ca0b96ec78d39909ffd817f28b166d7ddd4d8"}, - {file = "rpds_py-0.18.1-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:a888e8bdb45916234b99da2d859566f1e8a1d2275a801bb8e4a9644e3c7e7909"}, - {file = "rpds_py-0.18.1-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:6031b25fb1b06327b43d841f33842b383beba399884f8228a6bb3df3088485ff"}, - {file = "rpds_py-0.18.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:48c2faaa8adfacefcbfdb5f2e2e7bdad081e5ace8d182e5f4ade971f128e6bb3"}, - {file = "rpds_py-0.18.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:d85164315bd68c0806768dc6bb0429c6f95c354f87485ee3593c4f6b14def2bd"}, - {file = "rpds_py-0.18.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6afd80f6c79893cfc0574956f78a0add8c76e3696f2d6a15bca2c66c415cf2d4"}, - {file = "rpds_py-0.18.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fa242ac1ff583e4ec7771141606aafc92b361cd90a05c30d93e343a0c2d82a89"}, - {file = "rpds_py-0.18.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21be4770ff4e08698e1e8e0bce06edb6ea0626e7c8f560bc08222880aca6a6f"}, - {file = "rpds_py-0.18.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c45a639e93a0c5d4b788b2613bd637468edd62f8f95ebc6fcc303d58ab3f0a8"}, - {file = "rpds_py-0.18.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:910e71711d1055b2768181efa0a17537b2622afeb0424116619817007f8a2b10"}, - {file = "rpds_py-0.18.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b9bb1f182a97880f6078283b3505a707057c42bf55d8fca604f70dedfdc0772a"}, - {file = "rpds_py-0.18.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:1d54f74f40b1f7aaa595a02ff42ef38ca654b1469bef7d52867da474243cc633"}, - {file = "rpds_py-0.18.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:8d2e182c9ee01135e11e9676e9a62dfad791a7a467738f06726872374a83db49"}, - {file = "rpds_py-0.18.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:636a15acc588f70fda1661234761f9ed9ad79ebed3f2125d44be0862708b666e"}, - {file = "rpds_py-0.18.1.tar.gz", hash = "sha256:dc48b479d540770c811fbd1eb9ba2bb66951863e448efec2e2c102625328e92f"}, -] - [[package]] name = "rsa" version = "4.9" @@ -7650,63 +4556,6 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-g testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] -[[package]] -name = "shapely" -version = "2.0.4" -description = "Manipulation and analysis of geometric objects" -optional = false -python-versions = ">=3.7" -files = [ - {file = "shapely-2.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:011b77153906030b795791f2fdfa2d68f1a8d7e40bce78b029782ade3afe4f2f"}, - {file = "shapely-2.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9831816a5d34d5170aa9ed32a64982c3d6f4332e7ecfe62dc97767e163cb0b17"}, - {file = "shapely-2.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5c4849916f71dc44e19ed370421518c0d86cf73b26e8656192fcfcda08218fbd"}, - {file = "shapely-2.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:841f93a0e31e4c64d62ea570d81c35de0f6cea224568b2430d832967536308e6"}, - {file = "shapely-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b4431f522b277c79c34b65da128029a9955e4481462cbf7ebec23aab61fc58"}, - {file = "shapely-2.0.4-cp310-cp310-win32.whl", hash = "sha256:92a41d936f7d6743f343be265ace93b7c57f5b231e21b9605716f5a47c2879e7"}, - {file = "shapely-2.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:30982f79f21bb0ff7d7d4a4e531e3fcaa39b778584c2ce81a147f95be1cd58c9"}, - {file = "shapely-2.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de0205cb21ad5ddaef607cda9a3191eadd1e7a62a756ea3a356369675230ac35"}, - {file = "shapely-2.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7d56ce3e2a6a556b59a288771cf9d091470116867e578bebced8bfc4147fbfd7"}, - {file = "shapely-2.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:58b0ecc505bbe49a99551eea3f2e8a9b3b24b3edd2a4de1ac0dc17bc75c9ec07"}, - {file = "shapely-2.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:790a168a808bd00ee42786b8ba883307c0e3684ebb292e0e20009588c426da47"}, - {file = "shapely-2.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4310b5494271e18580d61022c0857eb85d30510d88606fa3b8314790df7f367d"}, - {file = "shapely-2.0.4-cp311-cp311-win32.whl", hash = "sha256:63f3a80daf4f867bd80f5c97fbe03314348ac1b3b70fb1c0ad255a69e3749879"}, - {file = "shapely-2.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:c52ed79f683f721b69a10fb9e3d940a468203f5054927215586c5d49a072de8d"}, - {file = "shapely-2.0.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:5bbd974193e2cc274312da16b189b38f5f128410f3377721cadb76b1e8ca5328"}, - {file = "shapely-2.0.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:41388321a73ba1a84edd90d86ecc8bfed55e6a1e51882eafb019f45895ec0f65"}, - {file = "shapely-2.0.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0776c92d584f72f1e584d2e43cfc5542c2f3dd19d53f70df0900fda643f4bae6"}, - {file = "shapely-2.0.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c75c98380b1ede1cae9a252c6dc247e6279403fae38c77060a5e6186c95073ac"}, - {file = "shapely-2.0.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3e700abf4a37b7b8b90532fa6ed5c38a9bfc777098bc9fbae5ec8e618ac8f30"}, - {file = "shapely-2.0.4-cp312-cp312-win32.whl", hash = "sha256:4f2ab0faf8188b9f99e6a273b24b97662194160cc8ca17cf9d1fb6f18d7fb93f"}, - {file = "shapely-2.0.4-cp312-cp312-win_amd64.whl", hash = "sha256:03152442d311a5e85ac73b39680dd64a9892fa42bb08fd83b3bab4fe6999bfa0"}, - {file = "shapely-2.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:994c244e004bc3cfbea96257b883c90a86e8cbd76e069718eb4c6b222a56f78b"}, - {file = "shapely-2.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05ffd6491e9e8958b742b0e2e7c346635033d0a5f1a0ea083547fcc854e5d5cf"}, - {file = "shapely-2.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fbdc1140a7d08faa748256438291394967aa54b40009f54e8d9825e75ef6113"}, - {file = "shapely-2.0.4-cp37-cp37m-win32.whl", hash = "sha256:5af4cd0d8cf2912bd95f33586600cac9c4b7c5053a036422b97cfe4728d2eb53"}, - {file = "shapely-2.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:464157509ce4efa5ff285c646a38b49f8c5ef8d4b340f722685b09bb033c5ccf"}, - {file = "shapely-2.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:489c19152ec1f0e5c5e525356bcbf7e532f311bff630c9b6bc2db6f04da6a8b9"}, - {file = "shapely-2.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b79bbd648664aa6f44ef018474ff958b6b296fed5c2d42db60078de3cffbc8aa"}, - {file = "shapely-2.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:674d7baf0015a6037d5758496d550fc1946f34bfc89c1bf247cabdc415d7747e"}, - {file = "shapely-2.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6cd4ccecc5ea5abd06deeaab52fcdba372f649728050c6143cc405ee0c166679"}, - {file = "shapely-2.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb5cdcbbe3080181498931b52a91a21a781a35dcb859da741c0345c6402bf00c"}, - {file = "shapely-2.0.4-cp38-cp38-win32.whl", hash = "sha256:55a38dcd1cee2f298d8c2ebc60fc7d39f3b4535684a1e9e2f39a80ae88b0cea7"}, - {file = "shapely-2.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:ec555c9d0db12d7fd777ba3f8b75044c73e576c720a851667432fabb7057da6c"}, - {file = "shapely-2.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3f9103abd1678cb1b5f7e8e1af565a652e036844166c91ec031eeb25c5ca8af0"}, - {file = "shapely-2.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:263bcf0c24d7a57c80991e64ab57cba7a3906e31d2e21b455f493d4aab534aaa"}, - {file = "shapely-2.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ddf4a9bfaac643e62702ed662afc36f6abed2a88a21270e891038f9a19bc08fc"}, - {file = "shapely-2.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:485246fcdb93336105c29a5cfbff8a226949db37b7473c89caa26c9bae52a242"}, - {file = "shapely-2.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8de4578e838a9409b5b134a18ee820730e507b2d21700c14b71a2b0757396acc"}, - {file = "shapely-2.0.4-cp39-cp39-win32.whl", hash = "sha256:9dab4c98acfb5fb85f5a20548b5c0abe9b163ad3525ee28822ffecb5c40e724c"}, - {file = "shapely-2.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:31c19a668b5a1eadab82ff070b5a260478ac6ddad3a5b62295095174a8d26398"}, - {file = "shapely-2.0.4.tar.gz", hash = "sha256:5dc736127fac70009b8d309a0eeb74f3e08979e530cf7017f2f507ef62e6cfb8"}, -] - -[package.dependencies] -numpy = ">=1.14,<3" - -[package.extras] -docs = ["matplotlib", "numpydoc (==1.1.*)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"] -test = ["pytest", "pytest-cov"] - [[package]] name = "shellingham" version = "1.5.4" @@ -7762,17 +4611,6 @@ files = [ {file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"}, ] -[[package]] -name = "sniffio" -version = "1.3.1" -description = "Sniff out which async library your code is running under" -optional = false -python-versions = ">=3.7" -files = [ - {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, - {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, -] - [[package]] name = "sortedcontainers" version = "2.4.0" @@ -7795,186 +4633,6 @@ files = [ {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, ] -[[package]] -name = "sqlalchemy" -version = "1.4.52" -description = "Database Abstraction Library" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" -files = [ - {file = "SQLAlchemy-1.4.52-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:f68016f9a5713684c1507cc37133c28035f29925c75c0df2f9d0f7571e23720a"}, - {file = "SQLAlchemy-1.4.52-cp310-cp310-manylinux1_x86_64.manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_5_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24bb0f81fbbb13d737b7f76d1821ec0b117ce8cbb8ee5e8641ad2de41aa916d3"}, - {file = "SQLAlchemy-1.4.52-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e93983cc0d2edae253b3f2141b0a3fb07e41c76cd79c2ad743fc27eb79c3f6db"}, - {file = "SQLAlchemy-1.4.52-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:84e10772cfc333eb08d0b7ef808cd76e4a9a30a725fb62a0495877a57ee41d81"}, - {file = "SQLAlchemy-1.4.52-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:427988398d2902de042093d17f2b9619a5ebc605bf6372f7d70e29bde6736842"}, - {file = "SQLAlchemy-1.4.52-cp310-cp310-win32.whl", hash = "sha256:1296f2cdd6db09b98ceb3c93025f0da4835303b8ac46c15c2136e27ee4d18d94"}, - {file = "SQLAlchemy-1.4.52-cp310-cp310-win_amd64.whl", hash = "sha256:80e7f697bccc56ac6eac9e2df5c98b47de57e7006d2e46e1a3c17c546254f6ef"}, - {file = "SQLAlchemy-1.4.52-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2f251af4c75a675ea42766880ff430ac33291c8d0057acca79710f9e5a77383d"}, - {file = "SQLAlchemy-1.4.52-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb8f9e4c4718f111d7b530c4e6fb4d28f9f110eb82e7961412955b3875b66de0"}, - {file = "SQLAlchemy-1.4.52-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afb1672b57f58c0318ad2cff80b384e816735ffc7e848d8aa51e0b0fc2f4b7bb"}, - {file = "SQLAlchemy-1.4.52-cp311-cp311-win32.whl", hash = "sha256:6e41cb5cda641f3754568d2ed8962f772a7f2b59403b95c60c89f3e0bd25f15e"}, - {file = "SQLAlchemy-1.4.52-cp311-cp311-win_amd64.whl", hash = "sha256:5bed4f8c3b69779de9d99eb03fd9ab67a850d74ab0243d1be9d4080e77b6af12"}, - {file = "SQLAlchemy-1.4.52-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:49e3772eb3380ac88d35495843daf3c03f094b713e66c7d017e322144a5c6b7c"}, - {file = "SQLAlchemy-1.4.52-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:618827c1a1c243d2540314c6e100aee7af09a709bd005bae971686fab6723554"}, - {file = "SQLAlchemy-1.4.52-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de9acf369aaadb71a725b7e83a5ef40ca3de1cf4cdc93fa847df6b12d3cd924b"}, - {file = "SQLAlchemy-1.4.52-cp312-cp312-win32.whl", hash = "sha256:763bd97c4ebc74136ecf3526b34808c58945023a59927b416acebcd68d1fc126"}, - {file = "SQLAlchemy-1.4.52-cp312-cp312-win_amd64.whl", hash = "sha256:f12aaf94f4d9679ca475975578739e12cc5b461172e04d66f7a3c39dd14ffc64"}, - {file = "SQLAlchemy-1.4.52-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:853fcfd1f54224ea7aabcf34b227d2b64a08cbac116ecf376907968b29b8e763"}, - {file = "SQLAlchemy-1.4.52-cp36-cp36m-manylinux1_x86_64.manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_5_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f98dbb8fcc6d1c03ae8ec735d3c62110949a3b8bc6e215053aa27096857afb45"}, - {file = "SQLAlchemy-1.4.52-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e135fff2e84103bc15c07edd8569612ce317d64bdb391f49ce57124a73f45c5"}, - {file = "SQLAlchemy-1.4.52-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5b5de6af8852500d01398f5047d62ca3431d1e29a331d0b56c3e14cb03f8094c"}, - {file = "SQLAlchemy-1.4.52-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3491c85df263a5c2157c594f54a1a9c72265b75d3777e61ee13c556d9e43ffc9"}, - {file = "SQLAlchemy-1.4.52-cp36-cp36m-win32.whl", hash = "sha256:427c282dd0deba1f07bcbf499cbcc9fe9a626743f5d4989bfdfd3ed3513003dd"}, - {file = "SQLAlchemy-1.4.52-cp36-cp36m-win_amd64.whl", hash = "sha256:ca5ce82b11731492204cff8845c5e8ca1a4bd1ade85e3b8fcf86e7601bfc6a39"}, - {file = "SQLAlchemy-1.4.52-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:29d4247313abb2015f8979137fe65f4eaceead5247d39603cc4b4a610936cd2b"}, - {file = "SQLAlchemy-1.4.52-cp37-cp37m-manylinux1_x86_64.manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_5_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a752bff4796bf22803d052d4841ebc3c55c26fb65551f2c96e90ac7c62be763a"}, - {file = "SQLAlchemy-1.4.52-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7ea11727feb2861deaa293c7971a4df57ef1c90e42cb53f0da40c3468388000"}, - {file = "SQLAlchemy-1.4.52-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d913f8953e098ca931ad7f58797f91deed26b435ec3756478b75c608aa80d139"}, - {file = "SQLAlchemy-1.4.52-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a251146b921725547ea1735b060a11e1be705017b568c9f8067ca61e6ef85f20"}, - {file = "SQLAlchemy-1.4.52-cp37-cp37m-win32.whl", hash = "sha256:1f8e1c6a6b7f8e9407ad9afc0ea41c1f65225ce505b79bc0342159de9c890782"}, - {file = "SQLAlchemy-1.4.52-cp37-cp37m-win_amd64.whl", hash = "sha256:346ed50cb2c30f5d7a03d888e25744154ceac6f0e6e1ab3bc7b5b77138d37710"}, - {file = "SQLAlchemy-1.4.52-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:4dae6001457d4497736e3bc422165f107ecdd70b0d651fab7f731276e8b9e12d"}, - {file = "SQLAlchemy-1.4.52-cp38-cp38-manylinux1_x86_64.manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_5_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5d2e08d79f5bf250afb4a61426b41026e448da446b55e4770c2afdc1e200fce"}, - {file = "SQLAlchemy-1.4.52-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bbce5dd7c7735e01d24f5a60177f3e589078f83c8a29e124a6521b76d825b85"}, - {file = "SQLAlchemy-1.4.52-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:bdb7b4d889631a3b2a81a3347c4c3f031812eb4adeaa3ee4e6b0d028ad1852b5"}, - {file = "SQLAlchemy-1.4.52-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c294ae4e6bbd060dd79e2bd5bba8b6274d08ffd65b58d106394cb6abbf35cf45"}, - {file = "SQLAlchemy-1.4.52-cp38-cp38-win32.whl", hash = "sha256:bcdfb4b47fe04967669874fb1ce782a006756fdbebe7263f6a000e1db969120e"}, - {file = "SQLAlchemy-1.4.52-cp38-cp38-win_amd64.whl", hash = "sha256:7d0dbc56cb6af5088f3658982d3d8c1d6a82691f31f7b0da682c7b98fa914e91"}, - {file = "SQLAlchemy-1.4.52-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:a551d5f3dc63f096ed41775ceec72fdf91462bb95abdc179010dc95a93957800"}, - {file = "SQLAlchemy-1.4.52-cp39-cp39-manylinux1_x86_64.manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_5_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ab773f9ad848118df7a9bbabca53e3f1002387cdbb6ee81693db808b82aaab0"}, - {file = "SQLAlchemy-1.4.52-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2de46f5d5396d5331127cfa71f837cca945f9a2b04f7cb5a01949cf676db7d1"}, - {file = "SQLAlchemy-1.4.52-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7027be7930a90d18a386b25ee8af30514c61f3852c7268899f23fdfbd3107181"}, - {file = "SQLAlchemy-1.4.52-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99224d621affbb3c1a4f72b631f8393045f4ce647dd3262f12fe3576918f8bf3"}, - {file = "SQLAlchemy-1.4.52-cp39-cp39-win32.whl", hash = "sha256:c124912fd4e1bb9d1e7dc193ed482a9f812769cb1e69363ab68e01801e859821"}, - {file = "SQLAlchemy-1.4.52-cp39-cp39-win_amd64.whl", hash = "sha256:2c286fab42e49db23c46ab02479f328b8bdb837d3e281cae546cc4085c83b680"}, - {file = "SQLAlchemy-1.4.52.tar.gz", hash = "sha256:80e63bbdc5217dad3485059bdf6f65a7d43f33c8bde619df5c220edf03d87296"}, -] - -[package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} - -[package.extras] -aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] -aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"] -asyncio = ["greenlet (!=0.4.17)"] -asyncmy = ["asyncmy (>=0.2.3,!=0.2.4)", "greenlet (!=0.4.17)"] -mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2)"] -mssql = ["pyodbc"] -mssql-pymssql = ["pymssql"] -mssql-pyodbc = ["pyodbc"] -mypy = ["mypy (>=0.910)", "sqlalchemy2-stubs"] -mysql = ["mysqlclient (>=1.4.0)", "mysqlclient (>=1.4.0,<2)"] -mysql-connector = ["mysql-connector-python"] -oracle = ["cx_oracle (>=7)", "cx_oracle (>=7,<8)"] -postgresql = ["psycopg2 (>=2.7)"] -postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] -postgresql-pg8000 = ["pg8000 (>=1.16.6,!=1.29.0)"] -postgresql-psycopg2binary = ["psycopg2-binary"] -postgresql-psycopg2cffi = ["psycopg2cffi"] -pymysql = ["pymysql", "pymysql (<1)"] -sqlcipher = ["sqlcipher3_binary"] - -[[package]] -name = "sqlalchemy-bigquery" -version = "1.11.0" -description = "SQLAlchemy dialect for BigQuery" -optional = false -python-versions = "<3.13,>=3.8" -files = [ - {file = "sqlalchemy-bigquery-1.11.0.tar.gz", hash = "sha256:09a2b99b8591d441eef66d34d13057d0f09423fe259fef98c0502df61419d242"}, - {file = "sqlalchemy_bigquery-1.11.0-py2.py3-none-any.whl", hash = "sha256:99f868cfdd103b13f921ec1c1b748826b4b1187457dda48040da5ab5ba63c705"}, -] - -[package.dependencies] -google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev" -google-auth = ">=1.25.0,<3.0.0dev" -google-cloud-bigquery = ">=3.3.6,<4.0.0dev" -packaging = "*" -sqlalchemy = ">=1.4.16,<3.0.0dev" - -[package.extras] -alembic = ["alembic"] -all = ["GeoAlchemy2", "alembic", "google-cloud-bigquery-storage (>=2.0.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "packaging", "pyarrow (>=3.0.0)", "pytz", "shapely"] -bqstorage = ["google-cloud-bigquery-storage (>=2.0.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "pyarrow (>=3.0.0)"] -geography = ["GeoAlchemy2", "shapely"] -tests = ["packaging", "pytz"] - -[[package]] -name = "sqlalchemy-jsonfield" -version = "1.0.2" -description = "SQLALchemy JSONField implementation for storing dicts at SQL" -optional = false -python-versions = ">=3.7.0" -files = [ - {file = "SQLAlchemy-JSONField-1.0.2.tar.gz", hash = "sha256:dab3abc9d75a1640e7f3d4875564a4199f665d27863da8d5a089e4eaca5e67f2"}, - {file = "SQLAlchemy_JSONField-1.0.2-py3-none-any.whl", hash = "sha256:b2945fa1e60b07d5764a7c73b18da427948b35dd4c07c0e94939001dc2dacf77"}, -] - -[package.dependencies] -sqlalchemy = "*" - -[[package]] -name = "sqlalchemy-spanner" -version = "1.7.0" -description = "SQLAlchemy dialect integrated into Cloud Spanner database" -optional = false -python-versions = "*" -files = [ - {file = "sqlalchemy_spanner-1.7.0-py3-none-any.whl", hash = "sha256:9925037a50b94eda358d462b694aebde61bcbeca1bb7c5a0cf247f672a340d7c"}, - {file = "sqlalchemy_spanner-1.7.0.tar.gz", hash = "sha256:c5a184710d289a3ab8e20f10998928d08b5b89838cf39c7ed0b5f52e8ee6e13b"}, -] - -[package.dependencies] -alembic = "*" -google-cloud-spanner = ">=3.12.0" -sqlalchemy = ">=1.1.13" - -[package.extras] -tracing = ["opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)"] - -[[package]] -name = "sqlalchemy-utils" -version = "0.41.2" -description = "Various utility functions for SQLAlchemy." -optional = false -python-versions = ">=3.7" -files = [ - {file = "SQLAlchemy-Utils-0.41.2.tar.gz", hash = "sha256:bc599c8c3b3319e53ce6c5c3c471120bd325d0071fb6f38a10e924e3d07b9990"}, - {file = "SQLAlchemy_Utils-0.41.2-py3-none-any.whl", hash = "sha256:85cf3842da2bf060760f955f8467b87983fb2e30f1764fd0e24a48307dc8ec6e"}, -] - -[package.dependencies] -SQLAlchemy = ">=1.3" - -[package.extras] -arrow = ["arrow (>=0.3.4)"] -babel = ["Babel (>=1.3)"] -color = ["colour (>=0.0.4)"] -encrypted = ["cryptography (>=0.6)"] -intervals = ["intervals (>=0.7.1)"] -password = ["passlib (>=1.6,<2.0)"] -pendulum = ["pendulum (>=2.0.5)"] -phone = ["phonenumbers (>=5.9.2)"] -test = ["Jinja2 (>=2.3)", "Pygments (>=1.2)", "backports.zoneinfo", "docutils (>=0.10)", "flake8 (>=2.4.0)", "flexmock (>=0.9.7)", "isort (>=4.2.2)", "pg8000 (>=1.12.4)", "psycopg (>=3.1.8)", "psycopg2 (>=2.5.1)", "psycopg2cffi (>=2.8.1)", "pymysql", "pyodbc", "pytest (==7.4.4)", "python-dateutil (>=2.6)", "pytz (>=2014.2)"] -test-all = ["Babel (>=1.3)", "Jinja2 (>=2.3)", "Pygments (>=1.2)", "arrow (>=0.3.4)", "backports.zoneinfo", "colour (>=0.0.4)", "cryptography (>=0.6)", "docutils (>=0.10)", "flake8 (>=2.4.0)", "flexmock (>=0.9.7)", "furl (>=0.4.1)", "intervals (>=0.7.1)", "isort (>=4.2.2)", "passlib (>=1.6,<2.0)", "pendulum (>=2.0.5)", "pg8000 (>=1.12.4)", "phonenumbers (>=5.9.2)", "psycopg (>=3.1.8)", "psycopg2 (>=2.5.1)", "psycopg2cffi (>=2.8.1)", "pymysql", "pyodbc", "pytest (==7.4.4)", "python-dateutil", "python-dateutil (>=2.6)", "pytz (>=2014.2)"] -timezone = ["python-dateutil"] -url = ["furl (>=0.4.1)"] - -[[package]] -name = "sqlparse" -version = "0.5.0" -description = "A non-validating SQL parser." -optional = false -python-versions = ">=3.8" -files = [ - {file = "sqlparse-0.5.0-py3-none-any.whl", hash = "sha256:c204494cd97479d0e39f28c93d46c0b2d5959c7b9ab904762ea6c7af211c8663"}, - {file = "sqlparse-0.5.0.tar.gz", hash = "sha256:714d0a4932c059d16189f58ef5411ec2287a4360f17cdd0edd2d09d4c5087c93"}, -] - -[package.extras] -dev = ["build", "hatch"] -doc = ["sphinx"] - [[package]] name = "stack-data" version = "0.6.3" @@ -8037,17 +4695,6 @@ files = [ [package.extras] tests = ["pytest", "pytest-cov"] -[[package]] -name = "text-unidecode" -version = "1.3" -description = "The most basic Text::Unidecode port" -optional = false -python-versions = "*" -files = [ - {file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"}, - {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"}, -] - [[package]] name = "threadpoolctl" version = "3.5.0" @@ -8164,59 +4811,6 @@ files = [ {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, ] -[[package]] -name = "uc-micro-py" -version = "1.0.3" -description = "Micro subset of unicode data files for linkify-it-py projects." -optional = false -python-versions = ">=3.7" -files = [ - {file = "uc-micro-py-1.0.3.tar.gz", hash = "sha256:d321b92cff673ec58027c04015fcaa8bb1e005478643ff4a500882eaab88c48a"}, - {file = "uc_micro_py-1.0.3-py3-none-any.whl", hash = "sha256:db1dffff340817673d7b466ec86114a9dc0e9d4d9b5ba229d9d60e5c12600cd5"}, -] - -[package.extras] -test = ["coverage", "pytest", "pytest-cov"] - -[[package]] -name = "unicodecsv" -version = "0.14.1" -description = "Python2's stdlib csv module is nice, but it doesn't support unicode. This module is a drop-in replacement which *does*." -optional = false -python-versions = "*" -files = [ - {file = "unicodecsv-0.14.1.tar.gz", hash = "sha256:018c08037d48649a0412063ff4eda26eaa81eff1546dbffa51fa5293276ff7fc"}, -] - -[[package]] -name = "universal-pathlib" -version = "0.2.2" -description = "pathlib api extended to use fsspec backends" -optional = false -python-versions = ">=3.8" -files = [ - {file = "universal_pathlib-0.2.2-py3-none-any.whl", hash = "sha256:9bc176112d593348bb29806a47e409eda78dff8d95391d66dd6f85e443aaa75d"}, - {file = "universal_pathlib-0.2.2.tar.gz", hash = "sha256:6bc215548792ad5db3553708b1c19bafd9e2fa1667dc925ed404c95e52ae2f13"}, -] - -[package.dependencies] -fsspec = ">=2022.1.0" - -[package.extras] -dev = ["adlfs", "aiohttp", "cheroot", "gcsfs", "moto[s3,server] (<5)", "mypy (==1.8.0)", "packaging", "pydantic", "pydantic-settings", "pylint (==2.17.4)", "pytest (==8.0.0)", "pytest-cov (==4.1.0)", "pytest-mock (==3.12.0)", "pytest-sugar (==0.9.7)", "requests", "s3fs", "webdav4[fsspec]", "wsgidav"] -tests = ["mypy (==1.8.0)", "packaging", "pylint (==2.17.4)", "pytest (==8.0.0)", "pytest-cov (==4.1.0)", "pytest-mock (==3.12.0)", "pytest-sugar (==0.9.7)"] - -[[package]] -name = "uritemplate" -version = "4.1.1" -description = "Implementation of RFC 6570 URI Templates" -optional = false -python-versions = ">=3.6" -files = [ - {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"}, - {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, -] - [[package]] name = "urllib3" version = "2.2.2" @@ -8409,41 +5003,6 @@ files = [ {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, ] -[[package]] -name = "werkzeug" -version = "2.3.8" -description = "The comprehensive WSGI web application library." -optional = false -python-versions = ">=3.8" -files = [ - {file = "werkzeug-2.3.8-py3-none-any.whl", hash = "sha256:bba1f19f8ec89d4d607a3bd62f1904bd2e609472d93cd85e9d4e178f472c3748"}, - {file = "werkzeug-2.3.8.tar.gz", hash = "sha256:554b257c74bbeb7a0d254160a4f8ffe185243f52a52035060b761ca62d977f03"}, -] - -[package.dependencies] -MarkupSafe = ">=2.1.1" - -[package.extras] -watchdog = ["watchdog (>=2.3)"] - -[[package]] -name = "wirerope" -version = "0.4.7" -description = "'Turn functions and methods into fully controllable objects'" -optional = false -python-versions = "*" -files = [ - {file = "wirerope-0.4.7-py2.py3-none-any.whl", hash = "sha256:332973a3be6898f02fd0e73b2e20414c5102cc6c811d75856a938206677495c8"}, - {file = "wirerope-0.4.7.tar.gz", hash = "sha256:f3961039218276283c5037da0fa164619def0327595f10892d562a61a8603990"}, -] - -[package.dependencies] -six = ">=1.11.0" - -[package.extras] -doc = ["sphinx"] -test = ["pytest (>=4.6.7)", "pytest-cov (>=2.6.1)"] - [[package]] name = "wrapt" version = "1.16.0" @@ -8523,23 +5082,6 @@ files = [ {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, ] -[[package]] -name = "wtforms" -version = "3.1.2" -description = "Form validation and rendering for Python web development." -optional = false -python-versions = ">=3.8" -files = [ - {file = "wtforms-3.1.2-py3-none-any.whl", hash = "sha256:bf831c042829c8cdbad74c27575098d541d039b1faa74c771545ecac916f2c07"}, - {file = "wtforms-3.1.2.tar.gz", hash = "sha256:f8d76180d7239c94c6322f7990ae1216dae3659b7aa1cee94b6318bdffb474b9"}, -] - -[package.dependencies] -markupsafe = "*" - -[package.extras] -email = ["email-validator"] - [[package]] name = "xyzservices" version = "2024.6.0" @@ -8672,22 +5214,7 @@ files = [ idna = ">=2.0" multidict = ">=4.0" -[[package]] -name = "zipp" -version = "3.19.2" -description = "Backport of pathlib-compatible object wrapper for zip files" -optional = false -python-versions = ">=3.8" -files = [ - {file = "zipp-3.19.2-py3-none-any.whl", hash = "sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c"}, - {file = "zipp-3.19.2.tar.gz", hash = "sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19"}, -] - -[package.extras] -doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] - [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "489d535e828faa827aa6cedc3bb739eff1c3b841770d3b9e94bdde44e572e621" +content-hash = "5b7e79eb2ca58918d786e61b6331115376a24705da5478c6feef85d37f24685e" diff --git a/pyproject.toml b/pyproject.toml index d91629ab3..094ca1f6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,8 +71,6 @@ pytest-xdist = "^3.5.0" ipython = "^8.19.0" ipykernel = "^6.28.0" google-cloud-dataproc = "^5.8.0" -apache-airflow = "^2.8.0" -apache-airflow-providers-google = "^10.13.1" pydoclint = ">=0.3.8,<0.6.0" prettier = "^0.0.7" deptry = ">=0.12,<0.21" @@ -113,7 +111,7 @@ requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" [tool.deptry] -extend_exclude = ["src/conftest.py", "src/airflow", "src/utils"] +extend_exclude = ["src/conftest.py", "src/utils"] [tool.deptry.per_rule_ignores] DEP001 = ["gentropy"] @@ -126,8 +124,8 @@ exclude = ["dist"] [tool.pytest.ini_options] addopts = "-n auto --doctest-modules --cov=src/ --cov-report=xml" -pythonpath = [".", "./src/airflow/dags"] -testpaths = ["tests/gentropy", "src/gentropy/"] +pythonpath = ["."] +testpaths = ["tests/gentropy", "src/gentropy"] # Semi-strict mode for mypy [tool.mypy] diff --git a/src/airflow/.env b/src/airflow/.env deleted file mode 100644 index 2bcd8244f..000000000 --- a/src/airflow/.env +++ /dev/null @@ -1,6 +0,0 @@ -AIRFLOW_IMAGE_NAME=extending_airflow:latest -GOOGLE_LOCAL_CREDENTIALS_PATH=~/.config/gcloud -GOOGLE_DOCKER_CREDENTIALS_PATH=/.config/gcloud -GOOGLE_APPLICATION_CREDENTIALS=/.config/gcloud/service_account_credentials.json -AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT='google-cloud-platform://?extra__google_cloud_platform__key_path=/.config/gcloud/service_account_credentials.json' -GCP_PROJECT_ID=open-targets-genetics-dev diff --git a/src/airflow/Dockerfile b/src/airflow/Dockerfile deleted file mode 100644 index 9ca7c9193..000000000 --- a/src/airflow/Dockerfile +++ /dev/null @@ -1,33 +0,0 @@ -FROM apache/airflow:slim-2.7.3-python3.10 - -# Install additional Python requirements. -# --no-cache-dir is a good practice when installing packages using pip, because it helps to keep the image lightweight. -COPY requirements.txt /requirements.txt -RUN pip install --quiet --user --no-cache-dir --upgrade pip setuptools && \ - pip install --quiet --user --no-cache-dir -r /requirements.txt - -# Source: https://airflow.apache.org/docs/docker-stack/recipes.html -# Installing the GCP CLI in the container -SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"] - -USER 0 -ARG CLOUD_SDK_VERSION=452.0.0 -ENV GCLOUD_HOME=/home/google-cloud-sdk - -ENV PATH="${GCLOUD_HOME}/bin/:${PATH}" - -RUN DOWNLOAD_URL="https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-${CLOUD_SDK_VERSION}-linux-x86_64.tar.gz" \ - && TMP_DIR="$(mktemp -d)" \ - && curl -fL "${DOWNLOAD_URL}" --output "${TMP_DIR}/google-cloud-sdk.tar.gz" \ - && mkdir -p "${GCLOUD_HOME}" \ - && tar xzf "${TMP_DIR}/google-cloud-sdk.tar.gz" -C "${GCLOUD_HOME}" --strip-components=1 \ - && "${GCLOUD_HOME}/install.sh" \ - --bash-completion=false \ - --path-update=false \ - --usage-reporting=false \ - --quiet \ - && rm -rf "${TMP_DIR}" \ - && gcloud --version - -# Switch back to a non-root user for security purposes -USER airflow diff --git a/src/airflow/config/.gitkeep b/src/airflow/config/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/airflow/dags/.gitkeep b/src/airflow/dags/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/airflow/dags/common_airflow.py b/src/airflow/dags/common_airflow.py deleted file mode 100644 index 9c3c2f91c..000000000 --- a/src/airflow/dags/common_airflow.py +++ /dev/null @@ -1,490 +0,0 @@ -"""Airflow boilerplate code which can be shared by several DAGs.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, Any, Optional - -import pendulum -import yaml -from google.cloud import batch_v1, dataproc_v1, storage - -from airflow.providers.google.cloud.operators.dataproc import ( - ClusterGenerator, - DataprocCreateClusterOperator, - DataprocDeleteClusterOperator, - DataprocSubmitJobOperator, -) -from airflow.utils.trigger_rule import TriggerRule - -if TYPE_CHECKING: - from pathlib import Path - -# Code version. It has to be repeated here as well as in `pyproject.toml`, because Airflow isn't able to look at files outside of its `dags/` directory. -GENTROPY_VERSION = "0.0.0" - -# Cloud configuration. -GCP_PROJECT = "open-targets-genetics-dev" -GCP_REGION = "europe-west1" -GCP_ZONE = "europe-west1-d" -GCP_DATAPROC_IMAGE = "2.1" -GCP_AUTOSCALING_POLICY = "otg-etl" - -# Cluster init configuration. -INITIALISATION_BASE_PATH = ( - f"gs://genetics_etl_python_playground/initialisation/{GENTROPY_VERSION}" -) -CONFIG_TAG = f"{INITIALISATION_BASE_PATH}/config.tar.gz" -PACKAGE_WHEEL = ( - f"{INITIALISATION_BASE_PATH}/gentropy-{GENTROPY_VERSION}-py3-none-any.whl" -) -INITIALISATION_EXECUTABLE_FILE = [ - f"{INITIALISATION_BASE_PATH}/install_dependencies_on_cluster.sh" -] - -# CLI configuration. -CLUSTER_CONFIG_DIR = "/config" -CONFIG_NAME = "ot_config" -PYTHON_CLI = "cli.py" - -# Shared DAG construction parameters. -shared_dag_args = { - "owner": "Open Targets Data Team", - "retries": 0, -} - -shared_dag_kwargs = { - "tags": ["genetics_etl", "experimental"], - "start_date": pendulum.now(tz="Europe/London").subtract(days=1), - "schedule": "@once", - "catchup": False, -} - -MACHINES = { - "VEPMACHINE": { - "machine_type": "e2-standard-4", - "cpu_milli": 2000, - "memory_mib": 2000, - "boot_disk_mib": 10000, - }, -} - - -def check_gcp_folder_exists(bucket_name: str, folder_path: str) -> bool: - """Check if a folder exists in a Google Cloud bucket. - - Args: - bucket_name (str): The name of the Google Cloud bucket. - folder_path (str): The path of the folder to check. - - Returns: - bool: True if the folder exists, False otherwise. - """ - client = storage.Client() - bucket = client.get_bucket(bucket_name) - blobs = bucket.list_blobs(prefix=folder_path) - return any(blobs) - - -def create_cluster( - cluster_name: str, - master_machine_type: str = "n1-highmem-16", - worker_machine_type: str = "n1-standard-16", - num_workers: int = 2, - num_preemptible_workers: int = 0, - num_local_ssds: int = 1, - autoscaling_policy: str = GCP_AUTOSCALING_POLICY, - master_disk_size: int = 500, -) -> DataprocCreateClusterOperator: - """Generate an Airflow task to create a Dataproc cluster. Common parameters are reused, and varying parameters can be specified as needed. - - Args: - cluster_name (str): Name of the cluster. - master_machine_type (str): Machine type for the master node. Defaults to "n1-highmem-8". - worker_machine_type (str): Machine type for the worker nodes. Defaults to "n1-standard-16". - num_workers (int): Number of worker nodes. Defaults to 2. - num_preemptible_workers (int): Number of preemptible worker nodes. Defaults to 0. - num_local_ssds (int): How many local SSDs to attach to each worker node, both primary and secondary. Defaults to 1. - autoscaling_policy (str): Name of the autoscaling policy to use. Defaults to GCP_AUTOSCALING_POLICY. - master_disk_size (int): Size of the master node's boot disk in GB. Defaults to 500. - - Returns: - DataprocCreateClusterOperator: Airflow task to create a Dataproc cluster. - """ - # Create base cluster configuration. - cluster_config = ClusterGenerator( - project_id=GCP_PROJECT, - zone=GCP_ZONE, - master_machine_type=master_machine_type, - worker_machine_type=worker_machine_type, - master_disk_size=master_disk_size, - worker_disk_size=500, - num_preemptible_workers=num_preemptible_workers, - num_workers=num_workers, - image_version=GCP_DATAPROC_IMAGE, - enable_component_gateway=True, - optional_components=["JUPYTER"], - init_actions_uris=INITIALISATION_EXECUTABLE_FILE, - metadata={ - "CONFIGTAR": CONFIG_TAG, - "PACKAGE": PACKAGE_WHEEL, - }, - idle_delete_ttl=30 * 60, # In seconds. - autoscaling_policy=f"projects/{GCP_PROJECT}/regions/{GCP_REGION}/autoscalingPolicies/{autoscaling_policy}", - ).make() - - # If specified, amend the configuration to include local SSDs for worker nodes. - if num_local_ssds: - for worker_section in ("worker_config", "secondary_worker_config"): - # Create a disk config section if it does not exist. - cluster_config[worker_section].setdefault("disk_config", {}) - # Specify the number of local SSDs. - cluster_config[worker_section]["disk_config"]["num_local_ssds"] = ( - num_local_ssds - ) - - # Return the cluster creation operator. - return DataprocCreateClusterOperator( - task_id="create_cluster", - project_id=GCP_PROJECT, - cluster_config=cluster_config, - region=GCP_REGION, - cluster_name=cluster_name, - trigger_rule=TriggerRule.ALL_SUCCESS, - ) - - -def submit_job( - cluster_name: str, - task_id: str, - job_type: str, - job_specification: dict[str, Any], - trigger_rule: TriggerRule = TriggerRule.ALL_SUCCESS, -) -> DataprocSubmitJobOperator: - """Submit an arbitrary job to a Dataproc cluster. - - Args: - cluster_name (str): Name of the cluster. - task_id (str): Name of the task. - job_type (str): Type of the job to submit. - job_specification (dict[str, Any]): Specification of the job to submit. - trigger_rule (TriggerRule): Trigger rule for the task. Defaults to TriggerRule.ALL_SUCCESS. - - Returns: - DataprocSubmitJobOperator: Airflow task to submit an arbitrary job to a Dataproc cluster. - """ - return DataprocSubmitJobOperator( - task_id=task_id, - region=GCP_REGION, - project_id=GCP_PROJECT, - job={ - "job_uuid": f"airflow-{task_id}", - "reference": {"project_id": GCP_PROJECT}, - "placement": {"cluster_name": cluster_name}, - job_type: job_specification, - }, - trigger_rule=trigger_rule, - ) - - -def submit_pyspark_job( - cluster_name: str, - task_id: str, - python_module_path: str, - args: list[str], - trigger_rule: TriggerRule = TriggerRule.ALL_SUCCESS, -) -> DataprocSubmitJobOperator: - """Submit a PySpark job to a Dataproc cluster. - - Args: - cluster_name (str): Name of the cluster. - task_id (str): Name of the task. - python_module_path (str): Path to the Python module to run. - args (list[str]): Arguments to pass to the Python module. - trigger_rule (TriggerRule): Trigger rule for the task. Defaults to TriggerRule.ALL_SUCCESS. - - Returns: - DataprocSubmitJobOperator: Airflow task to submit a PySpark job to a Dataproc cluster. - """ - return submit_job( - cluster_name=cluster_name, - task_id=task_id, - job_type="pyspark_job", - trigger_rule=trigger_rule, - job_specification={ - "main_python_file_uri": python_module_path, - "args": args, - "properties": { - "spark.jars": "/opt/conda/miniconda3/lib/python3.10/site-packages/hail/backend/hail-all-spark.jar", - "spark.driver.extraClassPath": "/opt/conda/miniconda3/lib/python3.10/site-packages/hail/backend/hail-all-spark.jar", - "spark.executor.extraClassPath": "./hail-all-spark.jar", - "spark.serializer": "org.apache.spark.serializer.KryoSerializer", - "spark.kryo.registrator": "is.hail.kryo.HailKryoRegistrator", - }, - }, - ) - - -def submit_step( - cluster_name: str, - step_id: str, - task_id: str = "", - trigger_rule: TriggerRule = TriggerRule.ALL_SUCCESS, - other_args: Optional[list[str]] = None, -) -> DataprocSubmitJobOperator: - """Submit a PySpark job to execute a specific CLI step. - - Args: - cluster_name (str): Name of the cluster. - step_id (str): Name of the step in gentropy. - task_id (str): Name of the task. Defaults to step_id. - trigger_rule (TriggerRule): Trigger rule for the task. Defaults to TriggerRule.ALL_SUCCESS. - other_args (Optional[list[str]]): Other arguments to pass to the CLI step. Defaults to None. - - Returns: - DataprocSubmitJobOperator: Airflow task to submit a PySpark job to execute a specific CLI step. - """ - if task_id == "": - task_id = step_id - return submit_pyspark_job( - cluster_name=cluster_name, - task_id=task_id, - python_module_path=f"{INITIALISATION_BASE_PATH}/{PYTHON_CLI}", - trigger_rule=trigger_rule, - args=[f"step={step_id}"] - + (other_args if other_args is not None else []) - + [ - f"--config-dir={CLUSTER_CONFIG_DIR}", - f"--config-name={CONFIG_NAME}", - ], - ) - - -def install_dependencies(cluster_name: str) -> DataprocSubmitJobOperator: - """Install dependencies on a Dataproc cluster. - - Args: - cluster_name (str): Name of the cluster. - - Returns: - DataprocSubmitJobOperator: Airflow task to install dependencies on a Dataproc cluster. - """ - return submit_job( - cluster_name=cluster_name, - task_id="install_dependencies", - job_type="pig_job", - job_specification={ - "jar_file_uris": [ - f"gs://genetics_etl_python_playground/initialisation/{GENTROPY_VERSION}/install_dependencies_on_cluster.sh" - ], - "query_list": { - "queries": [ - "sh chmod 750 ${PWD}/install_dependencies_on_cluster.sh", - "sh ${PWD}/install_dependencies_on_cluster.sh", - ] - }, - }, - ) - - -def delete_cluster(cluster_name: str) -> DataprocDeleteClusterOperator: - """Generate an Airflow task to delete a Dataproc cluster. - - Args: - cluster_name (str): Name of the cluster. - - Returns: - DataprocDeleteClusterOperator: Airflow task to delete a Dataproc cluster. - """ - return DataprocDeleteClusterOperator( - task_id="delete_cluster", - project_id=GCP_PROJECT, - cluster_name=cluster_name, - region=GCP_REGION, - trigger_rule=TriggerRule.ALL_DONE, - ) - - -def read_yaml_config(config_path: Path) -> Any: - """Parse a YAMl config file and do all necessary checks. - - Args: - config_path (Path): Path to the YAML config file. - - Returns: - Any: Parsed YAML config file. - """ - assert config_path.exists(), f"YAML config path {config_path} does not exist." - with open(config_path) as config_file: - return yaml.safe_load(config_file) - - -def generate_dag(cluster_name: str, tasks: list[DataprocSubmitJobOperator]) -> Any: - """For a list of tasks, generate a complete DAG. - - Args: - cluster_name (str): Name of the cluster. - tasks (list[DataprocSubmitJobOperator]): List of tasks to execute. - - Returns: - Any: Airflow DAG. - """ - return ( - create_cluster(cluster_name) - >> install_dependencies(cluster_name) - >> tasks - >> delete_cluster(cluster_name) - ) - - -def submit_pyspark_job_no_operator( - cluster_name: str, - step_id: str, - other_args: Optional[list[str]] = None, -) -> None: - """Submits the Pyspark job to the cluster. - - Args: - cluster_name (str): Cluster name - step_id (str): Step id - other_args (Optional[list[str]]): Other arguments to pass to the CLI step. Defaults to None. - """ - # Create the job client. - job_client = dataproc_v1.JobControllerClient( - client_options={"api_endpoint": f"{GCP_REGION}-dataproc.googleapis.com:443"} - ) - - python_uri = f"{INITIALISATION_BASE_PATH}/{PYTHON_CLI}" - # Create the job config. 'main_jar_file_uri' can also be a - # Google Cloud Storage URL. - job_description = { - "placement": {"cluster_name": cluster_name}, - "pyspark_job": { - "main_python_file_uri": python_uri, - "args": [f"step={step_id}"] - + (other_args if other_args is not None else []) - + [ - f"--config-dir={CLUSTER_CONFIG_DIR}", - f"--config-name={CONFIG_NAME}", - ], - "properties": { - "spark.jars": "/opt/conda/miniconda3/lib/python3.10/site-packages/hail/backend/hail-all-spark.jar", - "spark.driver.extraClassPath": "/opt/conda/miniconda3/lib/python3.10/site-packages/hail/backend/hail-all-spark.jar", - "spark.executor.extraClassPath": "./hail-all-spark.jar", - "spark.serializer": "org.apache.spark.serializer.KryoSerializer", - "spark.kryo.registrator": "is.hail.kryo.HailKryoRegistrator", - }, - }, - } - job_client.submit_job( - project_id=GCP_PROJECT, region=GCP_REGION, job=job_description - ) - - -def create_container_runnable( - image: str, commands: list[str], **kwargs: Any -) -> batch_v1.Runnable: - """Create a container runnable for a Batch job with additional optional parameters. - - Args: - image (str): The Docker image to use. - commands (list[str]): The commands to run in the container. - **kwargs (Any): Additional optional parameters to set on the container. - - Returns: - batch_v1.Runnable: The container runnable. - """ - container = batch_v1.Runnable.Container( - image_uri=image, entrypoint="/bin/sh", commands=commands, **kwargs - ) - return batch_v1.Runnable(container=container) - - -def create_task_spec( - image: str, commands: list[str], **kwargs: Any -) -> batch_v1.TaskSpec: - """Create a task for a Batch job. - - Args: - image (str): The Docker image to use. - commands (list[str]): The commands to run in the container. - **kwargs (Any): Any additional parameter to pass to the container runnable - - Returns: - batch_v1.TaskSpec: The task specification. - """ - task = batch_v1.TaskSpec() - task.runnables = [create_container_runnable(image, commands, **kwargs)] - return task - - -def set_up_mounting_points( - mounting_points: list[dict[str, str]], -) -> list[batch_v1.Volume]: - """Set up the mounting points for the container. - - Args: - mounting_points (list[dict[str, str]]): The mounting points. - - Returns: - list[batch_v1.Volume]: The volumes. - """ - volumes = [] - for mount in mounting_points: - gcs_bucket = batch_v1.GCS() - gcs_bucket.remote_path = mount["remote_path"] - gcs_volume = batch_v1.Volume() - gcs_volume.gcs = gcs_bucket - gcs_volume.mount_path = mount["mount_point"] - volumes.append(gcs_volume) - return volumes - - -def create_batch_job( - task: batch_v1.TaskSpec, - machine: str, - task_env: list[batch_v1.Environment], - mounting_points: list[dict[str, str]] | None = None, -) -> batch_v1.Job: - """Create a Google Batch job. - - Args: - task (batch_v1.TaskSpec): The task specification. - machine (str): The machine type to use. - task_env (list[batch_v1.Environment]): The environment variables for the task. - mounting_points (list[dict[str, str]] | None): List of mounting points. - - Returns: - batch_v1.Job: The Batch job. - """ - resources = batch_v1.ComputeResource() - resources.cpu_milli = MACHINES[machine]["cpu_milli"] - resources.memory_mib = MACHINES[machine]["memory_mib"] - resources.boot_disk_mib = MACHINES[machine]["boot_disk_mib"] - task.compute_resource = resources - - task.max_retry_count = 3 - task.max_run_duration = "43200s" - - # The mounting points are set up and assigned to the task: - task.volumes = set_up_mounting_points(mounting_points) if mounting_points else None - - group = batch_v1.TaskGroup() - group.task_spec = task - group.task_environments = task_env - - policy = batch_v1.AllocationPolicy.InstancePolicy() - policy.machine_type = MACHINES[machine]["machine_type"] - policy.provisioning_model = "SPOT" - - instances = batch_v1.AllocationPolicy.InstancePolicyOrTemplate() - instances.policy = policy - allocation_policy = batch_v1.AllocationPolicy() - allocation_policy.instances = [instances] - - job = batch_v1.Job() - job.task_groups = [group] - job.allocation_policy = allocation_policy - job.logs_policy = batch_v1.LogsPolicy() - job.logs_policy.destination = batch_v1.LogsPolicy.Destination.CLOUD_LOGGING - - return job diff --git a/src/airflow/dags/configs/dag.yaml b/src/airflow/dags/configs/dag.yaml deleted file mode 100644 index 24d185c1e..000000000 --- a/src/airflow/dags/configs/dag.yaml +++ /dev/null @@ -1,17 +0,0 @@ -- id: "ot_gene_index" -- id: "ot_variant_to_gene" - prerequisites: - - "ot_gene_index" -- id: "ot_colocalisation_ecaviar" -- id: "ot_colocalisation_coloc" -- id: "ot_locus_to_gene_train" - prerequisites: - - "ot_variant_to_gene" - - "ot_colocalisation_ecaviar" - - "ot_colocalisation_coloc" -- id: "ot_locus_to_gene_predict" - prerequisites: - - "ot_locus_to_gene_train" - - "ot_variant_to_gene" - - "ot_colocalisation_ecaviar" - - "ot_colocalisation_coloc" diff --git a/src/airflow/dags/configs/variant_sources.yaml b/src/airflow/dags/configs/variant_sources.yaml deleted file mode 100644 index 233eb0ccf..000000000 --- a/src/airflow/dags/configs/variant_sources.yaml +++ /dev/null @@ -1,13 +0,0 @@ -sources_inclusion_list: - - name: uniprot - location: gs://open-targets-pre-data-releases/24.09/input/evidence-files/uniprot.json.gz ## input - format: json - - name: clinvar - location: gs://open-targets-pre-data-releases/24.09/input/evidence-files/eva.json.gz - format: json - - name: pharmgkb - location: gs://open-targets-pre-data-releases/24.09/input/pharmacogenomics-inputs/pharmacogenomics.json.gz - format: json - - name: gentropy_credible_sets - location: gs://genetics_etl_python_playground/releases/24.06/credible_set - format: parquet diff --git a/src/airflow/dags/data_validation.py b/src/airflow/dags/data_validation.py deleted file mode 100644 index 875a169dc..000000000 --- a/src/airflow/dags/data_validation.py +++ /dev/null @@ -1,96 +0,0 @@ -"""DAG to validate study locus and study index datasets.""" - -from __future__ import annotations - -from pathlib import Path - -import common_airflow as common - -from airflow.models.dag import DAG - -CLUSTER_NAME = "otg-validation" - -# Input datasets: -STUDY_INDICES = [ - "gs://gwas_catalog_data/study_index", - "gs://eqtl_catalogue_data/study_index", - "gs://finngen_data/r10/study_index", -] -STUDY_LOCI = [ - "gs://gwas_catalog_data/credible_set_datasets/gwas_catalog_PICSed_curated_associations", - "gs://gwas_catalog_data/credible_set_datasets/gwas_catalog_PICSed_summary_statistics", - "gs://eqtl_catalogue_data/credible_set_datasets/susie", - "gs://finngen_data/r10/credible_set_datasets/finngen_susie_processed", -] -TARGET_INDEX = "gs://genetics_etl_python_playground/releases/24.06/gene_index" -DISEASE_INDEX = "gs://open-targets-pre-data-releases/24.06/output/etl/parquet/diseases" - -# Output datasets: -VALIDATED_STUDY = "gs://ot-team/dsuveges/otg-data/validated_study_index" -INVALID_STUDY = f"{VALIDATED_STUDY}_invalid" -INVALID_STUDY_QC = [ - "UNRESOLVED_TARGET", - "UNRESOLVED_DISEASE", - "UNKNOWN_STUDY_TYPE", - "DUPLICATED_STUDY", - "NO_GENE_PROVIDED", -] - -VALIDATED_STUDY_LOCI = "gs://ot-team/dsuveges/otg-data/validated_credible_set" -INVALID_STUDY_LOCI = f"{VALIDATED_STUDY_LOCI}_invalid" -INVALID_STUDY_LOCUS_QC = [ - "DUPLICATED_STUDYLOCUS_ID", - "AMBIGUOUS_STUDY", - "FAILED_STUDY", - "MISSING_STUDY", - "NO_GENOMIC_LOCATION_FLAG", - "COMPOSITE_FLAG", - "INCONSISTENCY_FLAG", - "PALINDROMIC_ALLELE_FLAG", -] - -with DAG( - dag_id=Path(__file__).stem, - description="Open Targets Genetics — Study locus and study index validation", - default_args=common.shared_dag_args, - **common.shared_dag_kwargs, -) as dag: - # Definition of the study index validation step: - validate_studies = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="study_validation", - task_id="study_validation", - other_args=[ - f"step.study_index_path={STUDY_INDICES}", - f"step.target_index_path={TARGET_INDEX}", - f"step.disease_index_path={DISEASE_INDEX}", - f"step.valid_study_index_path={VALIDATED_STUDY}", - f"step.invalid_study_index_path={INVALID_STUDY_LOCI}", - f"step.invalid_qc_reasons={INVALID_STUDY_QC}", - ], - ) - - # Definition of the study locus validation step: - validate_study_loci = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="credible_set_validation", - task_id="credible_set_validation", - other_args=[ - f"step.study_index_path={VALIDATED_STUDY}", - f"step.study_locus_path={STUDY_LOCI}", - f"step.valid_study_locus_path={VALIDATED_STUDY_LOCI}", - f"step.invalid_study_locus_path={INVALID_STUDY_LOCI}", - f"step.invalid_qc_reasons={INVALID_STUDY_LOCUS_QC}", - ], - ) - - ( - common.create_cluster( - CLUSTER_NAME, - master_machine_type="n1-highmem-32", - ) - >> common.install_dependencies(CLUSTER_NAME) - >> validate_studies - >> validate_study_loci - # >> common.delete_cluster(CLUSTER_NAME) - ) diff --git a/src/airflow/dags/eqtl_preprocess.py b/src/airflow/dags/eqtl_preprocess.py deleted file mode 100644 index 309604e09..000000000 --- a/src/airflow/dags/eqtl_preprocess.py +++ /dev/null @@ -1,73 +0,0 @@ -"""Airflow DAG to extract credible sets and a study index from eQTL Catalogue's finemapping results.""" - -from __future__ import annotations - -from pathlib import Path - -import common_airflow as common - -from airflow.models.dag import DAG -from airflow.providers.google.cloud.operators.dataflow import ( - DataflowTemplatedJobStartOperator, -) -from airflow.providers.google.cloud.operators.gcs import GCSDeleteObjectsOperator - -CLUSTER_NAME = "otg-preprocess-eqtl" -AUTOSCALING = "eqtl-preprocess" -PROJECT_ID = "open-targets-genetics-dev" - -EQTL_CATALOGUE_SUSIE_LOCATION = "gs://eqtl_catalogue_data/ebi_ftp/susie" -TEMP_DECOMPRESS_LOCATION = f"{EQTL_CATALOGUE_SUSIE_LOCATION}_decompressed_tmp" -DECOMPRESS_FAILED_LOG = f"{TEMP_DECOMPRESS_LOCATION}/logs.log" -STUDY_INDEX_PATH = "gs://eqtl_catalogue_data/study_index" -CREDIBLE_SET_PATH = "gs://eqtl_catalogue_data/credible_set_datasets/susie" - -with DAG( - dag_id=Path(__file__).stem, - description="Open Targets Genetics — eQTL preprocess", - default_args=common.shared_dag_args, - **common.shared_dag_kwargs, -): - # SuSIE fine mapping results are stored as gzipped files in a GCS bucket. - # To improve processing performance, we decompress the files before processing to a temporary location in GCS. - decompression_job = DataflowTemplatedJobStartOperator( - task_id="decompress_susie_outputs", - template="gs://dataflow-templates/latest/Bulk_Decompress_GCS_Files", - location="europe-west1", - project_id=PROJECT_ID, - parameters={ - "inputFilePattern": f"{EQTL_CATALOGUE_SUSIE_LOCATION}/**/*.gz", - "outputDirectory": TEMP_DECOMPRESS_LOCATION, - "outputFailureFile": DECOMPRESS_FAILED_LOG, - }, - ) - - ingestion_job = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="ot_eqtl_catalogue", - task_id="ot_eqtl_ingestion", - other_args=[ - f"step.eqtl_catalogue_paths_imported={TEMP_DECOMPRESS_LOCATION}", - f"step.eqtl_catalogue_study_index_out={STUDY_INDEX_PATH}", - f"step.eqtl_catalogue_credible_sets_out={CREDIBLE_SET_PATH}", - ], - ) - - delete_decompressed_job = GCSDeleteObjectsOperator( - task_id="delete_decompressed_files", - bucket_name=TEMP_DECOMPRESS_LOCATION.split("/")[2], - prefix=f"{TEMP_DECOMPRESS_LOCATION.split('/')[-1]}/", - ) - - ( - decompression_job - >> common.create_cluster( - CLUSTER_NAME, - autoscaling_policy=AUTOSCALING, - num_workers=4, - worker_machine_type="n1-highmem-8", - ) - >> common.install_dependencies(CLUSTER_NAME) - >> ingestion_job - >> [delete_decompressed_job, common.delete_cluster(CLUSTER_NAME)] - ) diff --git a/src/airflow/dags/genetics_etl.py b/src/airflow/dags/genetics_etl.py deleted file mode 100644 index aeb87398c..000000000 --- a/src/airflow/dags/genetics_etl.py +++ /dev/null @@ -1,154 +0,0 @@ -"""Test DAG to prototype data transfer.""" - -from __future__ import annotations - -from pathlib import Path - -import common_airflow as common - -from airflow.models.dag import DAG -from airflow.operators.python import ShortCircuitOperator -from airflow.providers.google.cloud.transfers.gcs_to_gcs import GCSToGCSOperator -from airflow.utils.task_group import TaskGroup - -CLUSTER_NAME = "otg-etl" -SOURCE_CONFIG_FILE_PATH = Path(__file__).parent / "configs" / "dag.yaml" - -# Release specific variables: -RELEASE_VERSION = "24.06" -RELEASE_BUCKET_NAME = "genetics_etl_python_playground" - -# Datasource paths: -GWAS_CATALOG_BUCKET_NAME = "gwas_catalog_data" -EQTL_BUCKET_NAME = "eqtl_catalogue_data" -FINNGEN_BUCKET_NAME = "finngen_data" -FINNGEN_RELEASE = "r10" - -# Files to move: -DATA_TO_MOVE = { - # GWAS Catalog summary study index: - "gwas_catalog_study_index": { - "source_bucket": GWAS_CATALOG_BUCKET_NAME, - "source_object": "study_index", - "destination_bucket": RELEASE_BUCKET_NAME, - "destination_object": f"releases/{RELEASE_VERSION}/study_index/gwas_catalog", - }, - # PICS credible sets from GWAS Catalog curated associations: - "gwas_catalog_curated_credible_set": { - "source_bucket": GWAS_CATALOG_BUCKET_NAME, - "source_object": "credible_set_datasets/gwas_catalog_PICSed_curated_associations", - "destination_bucket": RELEASE_BUCKET_NAME, - "destination_object": f"releases/{RELEASE_VERSION}/credible_set/gwas_catalog_PICSed_curated_associations", - }, - # PICS credible sets from GWAS Catalog summary statistics: - "gwas_catalog_sumstats_credible_set": { - "source_bucket": GWAS_CATALOG_BUCKET_NAME, - "source_object": "credible_set_datasets/gwas_catalog_PICSed_summary_statistics", - "destination_bucket": RELEASE_BUCKET_NAME, - "destination_object": f"releases/{RELEASE_VERSION}/credible_set/gwas_catalog_PICSed_summary_statistics", - }, - # GWAS Catalog manifest files: - "gwas_catalog_manifests": { - "source_bucket": GWAS_CATALOG_BUCKET_NAME, - "source_object": "manifests", - "destination_bucket": RELEASE_BUCKET_NAME, - "destination_object": f"releases/{RELEASE_VERSION}/manifests", - }, - # eQTL Catalog study index: - "eqtl_catalogue_study_index": { - "source_bucket": EQTL_BUCKET_NAME, - "source_object": "study_index", - "destination_bucket": RELEASE_BUCKET_NAME, - "destination_object": f"releases/{RELEASE_VERSION}/study_index/eqtl_catalogue", - }, - # eQTL Catalog SuSiE credible sets: - "eqtl_catalogue_susie_credible_set": { - "source_bucket": EQTL_BUCKET_NAME, - "source_object": "credible_set_datasets/susie", - "destination_bucket": RELEASE_BUCKET_NAME, - "destination_object": f"releases/{RELEASE_VERSION}/credible_set/eqtl_catalogue_susie", - }, - # Finngen study index: - "finngen_study_index": { - "source_bucket": FINNGEN_BUCKET_NAME, - "source_object": f"{FINNGEN_RELEASE}/study_index", - "destination_bucket": RELEASE_BUCKET_NAME, - "destination_object": f"releases/{RELEASE_VERSION}/study_index/finngen", - }, - # Finngen SuSiE credible sets: - "finngen_susie_credible_set": { - "source_bucket": FINNGEN_BUCKET_NAME, - "source_object": f"{FINNGEN_RELEASE}/credible_set_datasets/finngen_susie_processed", - "destination_bucket": RELEASE_BUCKET_NAME, - "destination_object": f"releases/{RELEASE_VERSION}/credible_set/finngen_susie", - }, - # L2G gold standard: - "gold_standard": { - "source_bucket": "genetics_etl_python_playground", - "source_object": "input/l2g/gold_standard/curation.json", - "destination_bucket": RELEASE_BUCKET_NAME, - "destination_object": f"releases/{RELEASE_VERSION}/locus_to_gene_gold_standard.json", - }, -} - - -# This operator meant to fail the DAG if the release folder exists: -ensure_release_folder_not_exists = ShortCircuitOperator( - task_id="test_release_folder_exists", - python_callable=lambda bucket, path: not common.check_gcp_folder_exists( - bucket, path - ), - op_kwargs={ - "bucket": RELEASE_BUCKET_NAME, - "path": f"releases/{RELEASE_VERSION}", - }, -) - -with DAG( - dag_id=Path(__file__).stem, - description="Open Targets Genetics ETL workflow", - default_args=common.shared_dag_args, - **common.shared_dag_kwargs, -): - # Compiling tasks for moving data to the right place: - with TaskGroup(group_id="data_transfer") as data_transfer: - # Defining the tasks to execute in the task group: - [ - GCSToGCSOperator( - task_id=f"move_{data_name}", - source_bucket=data["source_bucket"], - source_object=data["source_object"], - destination_bucket=data["destination_bucket"], - destination_object=data["destination_object"], - ) - for data_name, data in DATA_TO_MOVE.items() - ] - - with TaskGroup(group_id="genetics_etl") as genetics_etl: - # Parse and define all steps and their prerequisites. - tasks = {} - steps = common.read_yaml_config(SOURCE_CONFIG_FILE_PATH) - for step in steps: - # Define task for the current step. - step_id = step["id"] - this_task = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id=step_id, - task_id=step_id, - ) - # Chain prerequisites. - tasks[step_id] = this_task - for prerequisite in step.get("prerequisites", []): - this_task.set_upstream(tasks[prerequisite]) - - common.generate_dag(cluster_name=CLUSTER_NAME, tasks=list(tasks.values())) - - # DAG description: - ( - # Test that the release folder doesn't exist: - ensure_release_folder_not_exists - # Run data transfer: - >> data_transfer - # Once datasets are transferred, run the rest of the steps: - >> genetics_etl - ) diff --git a/src/airflow/dags/gnomad_preprocess.py b/src/airflow/dags/gnomad_preprocess.py deleted file mode 100644 index 54e6b6bf4..000000000 --- a/src/airflow/dags/gnomad_preprocess.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Airflow DAG for the Preprocess GnomAD datasets - LD index and GnomAD variant set.""" - -from __future__ import annotations - -from pathlib import Path - -import common_airflow as common - -from airflow.models.dag import DAG - -CLUSTER_NAME = "gnomad-preprocess" - -ALL_STEPS = [ - "ot_ld_index", - "ot_gnomad_variants", -] - - -with DAG( - dag_id=Path(__file__).stem, - description="Open Targets Genetics — GnomAD Preprocess", - default_args=common.shared_dag_args, - **common.shared_dag_kwargs, -): - all_tasks = [ - common.submit_step(cluster_name=CLUSTER_NAME, step_id=step, task_id=step) - for step in ALL_STEPS - ] - dag = common.generate_dag(cluster_name=CLUSTER_NAME, tasks=all_tasks) diff --git a/src/airflow/dags/gwas_catalog_harmonisation.py b/src/airflow/dags/gwas_catalog_harmonisation.py deleted file mode 100644 index e6399e957..000000000 --- a/src/airflow/dags/gwas_catalog_harmonisation.py +++ /dev/null @@ -1,125 +0,0 @@ -"""Airflow DAG for the harmonisation part of the pipeline.""" - -from __future__ import annotations - -import re -import time -from pathlib import Path -from typing import Any - -import common_airflow as common - -from airflow.decorators import task -from airflow.models.dag import DAG -from airflow.providers.google.cloud.operators.gcs import GCSListObjectsOperator - -CLUSTER_NAME = "otg-gwascatalog-harmonisation" -AUTOSCALING = "gwascatalog-harmonisation" - -SUMMARY_STATS_BUCKET_NAME = "gwas_catalog_data" -RAW_SUMMARY_STATISTICS_PREFIX = "raw_summary_statistics" -HARMONISED_SUMMARY_STATISTICS_PREFIX = "harmonised_summary_statistics" - -with DAG( - dag_id=Path(__file__).stem, - description="Open Targets Genetics — GWAS Catalog harmonisation", - default_args=common.shared_dag_args, - **common.shared_dag_kwargs, -): - # List raw harmonised files from GWAS Catalog - list_inputs = GCSListObjectsOperator( - task_id="list_raw_harmonised", - bucket=SUMMARY_STATS_BUCKET_NAME, - prefix=RAW_SUMMARY_STATISTICS_PREFIX, - match_glob="**/*.h.tsv.gz", - ) - # List parquet files that have been previously processed - list_outputs = GCSListObjectsOperator( - task_id="list_harmonised_parquet", - bucket=SUMMARY_STATS_BUCKET_NAME, - prefix=HARMONISED_SUMMARY_STATISTICS_PREFIX, - match_glob="**/_SUCCESS", - ) - - # Create list of pending jobs - @task(task_id="create_to_do_list") - def create_to_do_list(**kwargs: Any) -> Any: - """Create the to-do list of studies. - - Args: - **kwargs (Any): Keyword arguments. - - Returns: - Any: To-do list. - """ - ti = kwargs["ti"] - raw_harmonised = ti.xcom_pull( - task_ids="list_raw_harmonised", key="return_value" - ) - print("Number of raw harmonised files: ", len(raw_harmonised)) # noqa: T201 - to_do_list = [] - # Remove the ones that have been processed - parquets = ti.xcom_pull(task_ids="list_harmonised_parquet", key="return_value") - print("Number of parquet files: ", len(parquets)) # noqa: T201 - for path in raw_harmonised: - match_result = re.search( - rf"{RAW_SUMMARY_STATISTICS_PREFIX}/(.*)/(GCST\d+)/harmonised/(.*)\.h\.tsv\.gz", - path, - ) - if match_result: - study_id = match_result.group(2) - if ( - f"{HARMONISED_SUMMARY_STATISTICS_PREFIX}/{study_id}.parquet/_SUCCESS" - not in parquets - ): - to_do_list.append(path) - print("Number of jobs to submit: ", len(to_do_list)) # noqa: T201 - ti.xcom_push(key="to_do_list", value=to_do_list) - - # Submit jobs to dataproc - @task(task_id="submit_jobs") - def submit_jobs(**kwargs: Any) -> None: - """Submit jobs to dataproc. - - Args: - **kwargs (Any): Keyword arguments. - """ - ti = kwargs["ti"] - todo = ti.xcom_pull(task_ids="create_to_do_list", key="to_do_list") - print("Number of jobs to submit: ", len(todo)) # noqa: T201 - for i in range(len(todo)): - # Not to exceed default quota 400 jobs per minute - if i > 0 and i % 399 == 0: - time.sleep(60) - input_path = todo[i] - match_result = re.search( - rf"{RAW_SUMMARY_STATISTICS_PREFIX}/(.*)/(GCST\d+)/harmonised/(.*)\.h\.tsv\.gz", - input_path, - ) - if match_result: - study_id = match_result.group(2) - print("Submitting job for study: ", study_id) # noqa: T201 - common.submit_pyspark_job_no_operator( - cluster_name=CLUSTER_NAME, - step_id="gwas_catalog_sumstat_preprocess", - other_args=[ - f"step.raw_sumstats_path=gs://{SUMMARY_STATS_BUCKET_NAME}/{input_path}", - f"step.out_sumstats_path=gs://{SUMMARY_STATS_BUCKET_NAME}/{HARMONISED_SUMMARY_STATISTICS_PREFIX}/{study_id}.parquet", - ], - ) - - ( - [list_inputs, list_outputs] - >> create_to_do_list() - >> common.create_cluster( - CLUSTER_NAME, - autoscaling_policy=AUTOSCALING, - num_workers=8, - num_preemptible_workers=8, - master_machine_type="n1-highmem-64", - worker_machine_type="n1-standard-2", - ) - >> common.install_dependencies(CLUSTER_NAME) - >> submit_jobs() - # >> common.delete_cluster(CLUSTER_NAME) - ) diff --git a/src/airflow/dags/gwas_catalog_preprocess.py b/src/airflow/dags/gwas_catalog_preprocess.py deleted file mode 100644 index 7f6280242..000000000 --- a/src/airflow/dags/gwas_catalog_preprocess.py +++ /dev/null @@ -1,223 +0,0 @@ -"""Airflow DAG for the preprocessing of GWAS Catalog's harmonised summary statistics and curated associations.""" - -from __future__ import annotations - -from pathlib import Path - -import common_airflow as common - -from airflow.models.dag import DAG -from airflow.operators.python import PythonOperator -from airflow.providers.google.cloud.hooks.gcs import GCSHook -from airflow.providers.google.cloud.operators.gcs import GCSListObjectsOperator -from airflow.utils.task_group import TaskGroup - -CLUSTER_NAME = "otg-preprocess-gwascatalog" -AUTOSCALING = "otg-preprocess-gwascatalog" - -# Setting up bucket name and output object names: -GWAS_CATALOG_BUCKET_NAME = "gwas_catalog_data" -HARMONISED_SUMSTATS_PREFIX = "harmonised_summary_statistics" - -# Manifest paths: -MANIFESTS_PATH = f"gs://{GWAS_CATALOG_BUCKET_NAME}/manifests/" - -# The name of the manifest files have to be consistent with the config file: -HARMONISED_SUMSTATS_LIST_OBJECT_NAME = ( - "manifests/gwas_catalog_harmonised_summary_statistics_list.txt" -) -HARMONISED_SUMSTATS_LIST_FULL_NAME = ( - f"gs://{GWAS_CATALOG_BUCKET_NAME}/{HARMONISED_SUMSTATS_LIST_OBJECT_NAME}" -) -CURATION_INCLUSION_NAME = f"{MANIFESTS_PATH}/gwas_catalog_curation_included_studies" -CURATION_EXCLUSION_NAME = f"{MANIFESTS_PATH}/gwas_catalog_curation_excluded_studies" -SUMMARY_STATISTICS_INCLUSION_NAME = ( - f"{MANIFESTS_PATH}/gwas_catalog_summary_statistics_included_studies" -) -SUMMARY_STATISTICS_EXCLUSION_NAME = ( - f"{MANIFESTS_PATH}/gwas_catalog_summary_statistics_excluded_studies" -) - -# Study index: -STUDY_INDEX = f"gs://{GWAS_CATALOG_BUCKET_NAME}/study_index" - -# Study loci: -CURATED_STUDY_LOCI = f"gs://{GWAS_CATALOG_BUCKET_NAME}/study_locus_datasets/gwas_catalog_curated_associations" -CURATED_LD_CLUMPED = f"gs://{GWAS_CATALOG_BUCKET_NAME}/study_locus_datasets/gwas_catalog_curated_associations_ld_clumped" -WINDOW_BASED_CLUMPED = f"gs://{GWAS_CATALOG_BUCKET_NAME}/study_locus_datasets/gwas_catalog_summary_stats_window_clumped" -LD_BASED_CLUMPED = f"gs://{GWAS_CATALOG_BUCKET_NAME}/study_locus_datasets/gwas_catalog_summary_stats_ld_clumped" -# Credible sets: -CURATED_CREDIBLE_SETS = f"gs://{GWAS_CATALOG_BUCKET_NAME}/credible_set_datasets/gwas_catalog_PICSed_curated_associations" -SUMMARY_STATISTICS_CREDIBLE_SETS = f"gs://{GWAS_CATALOG_BUCKET_NAME}/credible_set_datasets/gwas_catalog_PICSed_summary_statistics" - - -def upload_harmonized_study_list( - concatenated_studies: str, bucket_name: str, object_name: str -) -> None: - """This function uploads file to GCP. - - Args: - concatenated_studies (str): Concatenated list of harmonized summary statistics. - bucket_name (str): Bucket name - object_name (str): Name of the object - """ - hook = GCSHook(gcp_conn_id="google_cloud_default") - hook.upload( - bucket_name=bucket_name, - object_name=object_name, - data=concatenated_studies, - encoding="utf-8", - ) - - -with DAG( - dag_id=Path(__file__).stem, - description="Open Targets Genetics — GWAS Catalog preprocess", - default_args=common.shared_dag_args, - **common.shared_dag_kwargs, -): - # Getting list of folders (each a gwas study with summary statistics) - list_harmonised_sumstats = GCSListObjectsOperator( - task_id="list_harmonised_parquet", - bucket=GWAS_CATALOG_BUCKET_NAME, - prefix=HARMONISED_SUMSTATS_PREFIX, - match_glob="**/_SUCCESS", - ) - - # Upload resuling list to a bucket: - upload_task = PythonOperator( - task_id="uploader", - python_callable=upload_harmonized_study_list, - op_kwargs={ - "concatenated_studies": '{{ "\n".join(ti.xcom_pull( key="return_value", task_ids="list_harmonised_parquet")) }}', - "bucket_name": GWAS_CATALOG_BUCKET_NAME, - "object_name": HARMONISED_SUMSTATS_LIST_OBJECT_NAME, - }, - ) - - # Processing curated GWAS Catalog top-bottom: - with TaskGroup(group_id="curation_processing") as curation_processing: - # Generate inclusion list: - curation_calculate_inclusion_list = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="ot_gwas_catalog_study_inclusion", - task_id="catalog_curation_inclusion_list", - other_args=[ - "step.criteria=curation", - f"step.inclusion_list_path={CURATION_INCLUSION_NAME}", - f"step.exclusion_list_path={CURATION_EXCLUSION_NAME}", - f"step.harmonised_study_file={HARMONISED_SUMSTATS_LIST_FULL_NAME}", - ], - ) - - # Ingest curated associations from GWAS Catalog: - curation_ingest_data = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="ot_gwas_catalog_ingestion", - task_id="ingest_curated_gwas_catalog_data", - other_args=[f"step.inclusion_list_path={CURATION_INCLUSION_NAME}"], - ) - - # Run LD-annotation and clumping on curated data: - curation_ld_clumping = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="ot_ld_based_clumping", - task_id="catalog_curation_ld_clumping", - other_args=[ - f"step.study_locus_input_path={CURATED_STUDY_LOCI}", - f"step.study_index_path={STUDY_INDEX}", - f"step.clumped_study_locus_output_path={CURATED_LD_CLUMPED}", - ], - ) - - # Do PICS based finemapping: - curation_pics = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="pics", - task_id="catalog_curation_pics", - other_args=[ - f"step.study_locus_ld_annotated_in={CURATED_LD_CLUMPED}", - f"step.picsed_study_locus_out={CURATED_CREDIBLE_SETS}", - ], - ) - - # Define order of steps: - ( - curation_calculate_inclusion_list - >> curation_ingest_data - >> curation_ld_clumping - >> curation_pics - ) - - # Processing summary statistics from GWAS Catalog: - with TaskGroup( - group_id="summary_statistics_processing" - ) as summary_statistics_processing: - # Generate inclusion study lists: - summary_stats_calculate_inclusion_list = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="ot_gwas_catalog_study_inclusion", - task_id="catalog_sumstats_inclusion_list", - other_args=[ - "step.criteria=summary_stats", - f"step.inclusion_list_path={SUMMARY_STATISTICS_INCLUSION_NAME}", - f"step.exclusion_list_path={SUMMARY_STATISTICS_EXCLUSION_NAME}", - f"step.harmonised_study_file={HARMONISED_SUMSTATS_LIST_FULL_NAME}", - ], - ) - - # Run window-based clumping: - summary_stats_window_based_clumping = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="window_based_clumping", - task_id="catalog_sumstats_window_clumping", - other_args=[ - f"step.summary_statistics_input_path=gs://{GWAS_CATALOG_BUCKET_NAME}/{HARMONISED_SUMSTATS_PREFIX}", - f"step.inclusion_list_path={SUMMARY_STATISTICS_INCLUSION_NAME}", - f"step.study_locus_output_path={WINDOW_BASED_CLUMPED}", - ], - ) - - # Run LD based clumping: - summary_stats_ld_clumping = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="ot_ld_based_clumping", - task_id="catalog_sumstats_ld_clumping", - other_args=[ - f"step.study_locus_input_path={WINDOW_BASED_CLUMPED}", - f"step.study_index_path={STUDY_INDEX}", - f"step.clumped_study_locus_output_path={LD_BASED_CLUMPED}", - ], - ) - - # Run PICS finemapping: - summary_stats_pics = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="pics", - task_id="catalog_sumstats_pics", - other_args=[ - f"step.study_locus_ld_annotated_in={LD_BASED_CLUMPED}", - f"step.picsed_study_locus_out={SUMMARY_STATISTICS_CREDIBLE_SETS}", - ], - ) - - # Order of steps within the group: - ( - summary_stats_calculate_inclusion_list - >> summary_stats_window_based_clumping - >> summary_stats_ld_clumping - >> summary_stats_pics - ) - - # DAG description: - ( - common.create_cluster( - CLUSTER_NAME, autoscaling_policy=AUTOSCALING, num_workers=5 - ) - >> common.install_dependencies(CLUSTER_NAME) - >> list_harmonised_sumstats - >> upload_task - >> curation_processing - >> summary_statistics_processing - >> common.delete_cluster(CLUSTER_NAME) - ) diff --git a/src/airflow/dags/gwas_curation_update.py b/src/airflow/dags/gwas_curation_update.py deleted file mode 100644 index d5fd38e35..000000000 --- a/src/airflow/dags/gwas_curation_update.py +++ /dev/null @@ -1,34 +0,0 @@ -"""DAG for updating GWAS Catalog curation table.""" -from __future__ import annotations - -from datetime import datetime -from pathlib import Path - -import common_airflow as common - -from airflow.models.dag import DAG - -CLUSTER_NAME = "otg-gwascatalog-curation" -RUN_DATE = datetime.today().strftime("%Y-%m-%d") - -with DAG( - dag_id=Path(__file__).stem, - description="Open Targets Genetics — GWAS Catalog curation update", - default_args=common.shared_dag_args, - **common.shared_dag_kwargs, -): - update_gwas_curation = common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="ot_gwas_catalog_study_curation", - task_id="gwas_catalog_curation_update", - other_args=[ - f"step.gwas_catalog_study_curation_out=gs://genetics_etl_python_playground/input/v2d/GWAS_Catalog_study_curation_{RUN_DATE}.tsv", - ], - ) - - # DAG description: - ( - common.create_cluster(CLUSTER_NAME, num_workers=2) - >> common.install_dependencies(CLUSTER_NAME) - >> update_gwas_curation - ) diff --git a/src/airflow/dags/ukb_ppp_eur.py b/src/airflow/dags/ukb_ppp_eur.py deleted file mode 100644 index c8df8cf5b..000000000 --- a/src/airflow/dags/ukb_ppp_eur.py +++ /dev/null @@ -1,45 +0,0 @@ -"""Airflow DAG to ingest and harmonise UKB PPP (EUR) data.""" - -from __future__ import annotations - -from pathlib import Path - -import common_airflow as common - -from airflow.models.dag import DAG - -CLUSTER_NAME = "otg-ukb-ppp-eur" - -# Input location. -UKB_PPP_EUR_STUDY_INDEX = "gs://gentropy-tmp/batch/output/ukb_ppp_eur/study_index.tsv" -UKB_PPP_EUR_SUMMARY_STATS = "gs://gentropy-tmp/batch/output/ukb_ppp_eur/summary_stats.parquet" -VARIANT_ANNOTATION = "gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/variant_annotation" - -# Output locations. -TMP_VARIANT_ANNOTATION = "gs://gentropy-tmp/variant_annotation" -UKB_PPP_EUR_OUTPUT_STUDY_INDEX = "gs://ukb_ppp_eur_data/study_index" -UKB_PPP_EUR_OUTPUT_SUMMARY_STATS = "gs://ukb_ppp_eur_data/summary_stats" - -with DAG( - dag_id=Path(__file__).stem, - description="Open Targets Genetics — Ingest UKB PPP (EUR)", - default_args=common.shared_dag_args, - **common.shared_dag_kwargs, -): - dag = common.generate_dag( - cluster_name=CLUSTER_NAME, - tasks=[ - common.submit_step( - cluster_name=CLUSTER_NAME, - step_id="ot_ukb_ppp_eur_sumstat_preprocess", - other_args=[ - f"step.raw_study_index_path_from_tsv={UKB_PPP_EUR_STUDY_INDEX}", - f"step.raw_summary_stats_path={UKB_PPP_EUR_SUMMARY_STATS}", - f"step.variant_annotation_path={VARIANT_ANNOTATION}", - f"step.tmp_variant_annotation_path={TMP_VARIANT_ANNOTATION}", - f"step.study_index_output_path={UKB_PPP_EUR_OUTPUT_STUDY_INDEX}", - f"step.summary_stats_output_path={UKB_PPP_EUR_OUTPUT_SUMMARY_STATS}", - ] - ) - ] - ) diff --git a/src/airflow/dags/variant_index.py b/src/airflow/dags/variant_index.py deleted file mode 100644 index 98ba48198..000000000 --- a/src/airflow/dags/variant_index.py +++ /dev/null @@ -1,321 +0,0 @@ -"""DAG that generates a variant index dataset based on several sources.""" - -from __future__ import annotations - -import os -import time -from dataclasses import dataclass -from pathlib import Path -from typing import Any - -import pandas as pd -from common_airflow import ( - create_batch_job, - create_cluster, - create_task_spec, - delete_cluster, - install_dependencies, - read_yaml_config, - shared_dag_args, - shared_dag_kwargs, - submit_step, -) -from google.cloud import batch_v1 - -from airflow.decorators import task -from airflow.models.dag import DAG -from airflow.providers.google.cloud.operators.cloud_batch import ( - CloudBatchSubmitJobOperator, -) -from airflow.providers.google.cloud.operators.gcs import GCSListObjectsOperator -from airflow.utils.trigger_rule import TriggerRule - -PROJECT_ID = "open-targets-genetics-dev" -REGION = "europe-west1" -GCS_BUCKET = "genetics_etl_python_playground" -CONFIG_FILE_PATH = Path(__file__).parent / "configs" / "variant_sources.yaml" -GENTROPY_DOCKER_IMAGE = "europe-west1-docker.pkg.dev/open-targets-genetics-dev/gentropy-app/gentropy:il-variant-idx" # TODO: change to dev -VEP_DOCKER_IMAGE = "europe-west1-docker.pkg.dev/open-targets-genetics-dev/gentropy-app/custom_ensembl_vep:dev" -VEP_CACHE_BUCKET = f"gs://{GCS_BUCKET}/vep/cache" - -RELEASE = "XX.XX" # This needs to be updated to the latest release - -VCF_DST_PATH = f"gs://{GCS_BUCKET}/{RELEASE}/variant_vcf" -VCF_MERGED_DST_PATH = f"{VCF_DST_PATH}/merged" -VEP_OUTPUT_BUCKET = f"gs://{GCS_BUCKET}/{RELEASE}/vep_output" -VARIANT_INDEX_BUCKET = f"gs://{GCS_BUCKET}/{RELEASE}/variant_index" -GNOMAD_ANNOTATION_PATH = f"gs://{GCS_BUCKET}/static_assets/gnomad_variants" -# Internal parameters for the docker image: -MOUNT_DIR = "/mnt/disks/share" - -CLUSTER_NAME = "otg-variant-index" -AUTOSCALING = "eqtl-preprocess" - - -@task(task_id="vcf_creation") -def create_vcf(**kwargs: Any) -> None: - """Task that sends the ConvertToVcfStep job to Google Batch. - - Args: - **kwargs (Any): Keyword arguments - """ - sources = read_yaml_config(CONFIG_FILE_PATH) - task_env = [ - batch_v1.Environment( - variables={ - "SOURCE_NAME": source["name"], - "SOURCE_PATH": source["location"], - "SOURCE_FORMAT": source["format"], - } - ) - for source in sources["sources_inclusion_list"] - ] - - commands = [ - "-c", - rf"poetry run gentropy step=variant_to_vcf step.source_path=$SOURCE_PATH step.source_format=$SOURCE_FORMAT step.vcf_path={VCF_DST_PATH}/$SOURCE_NAME +step.session.extended_spark_conf={{spark.jars:https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop3-latest.jar}}", - ] - task = create_task_spec( - GENTROPY_DOCKER_IMAGE, commands, options="-e HYDRA_FULL_ERROR=1" - ) - - batch_task = CloudBatchSubmitJobOperator( - task_id="vep_batch_job", - project_id=PROJECT_ID, - region=REGION, - job_name=f"vcf-job-{time.strftime('%Y%m%d-%H%M%S')}", - job=create_batch_job( - task, - "VEPMACHINE", - task_env, - ), - deferrable=False, - ) - - batch_task.execute(context=kwargs) - - -@task(task_id="merge_vcfs") -def merge_vcfs(chunk_size: int = 2000, **kwargs: Any) -> None: - """Task that merges the information from all the VCF files into a single one so that we only submit one VEP job. - - Args: - chunk_size (int): Partition size of the merged file. Defaults to 2000. - **kwargs (Any): Keyword arguments - """ - ti = kwargs["ti"] - input_vcfs = [ - f"gs://{GCS_BUCKET}/{listed_file}" - for listed_file in ti.xcom_pull( - task_ids="get_vcf_per_source", key="return_value" - ) - ] - merged_df = ( - pd.concat( - pd.read_csv( - file, - sep="\t", - dtype={ - "#CHROM": str, - "POS": int, - "ID": str, - "REF": str, - "ALT": str, - "QUAL": str, - "FILTER": str, - "INFO": str, - }, - ) - for file in input_vcfs - ) - .drop_duplicates(subset=["#CHROM", "POS", "REF", "ALT"]) - .sort_values(by=["#CHROM", "POS"]) - .reset_index(drop=True) - ) - # Partition the merged file into chunks of 2000 variants to run the VEP jobs in parallel - chunks = 0 - for i in range(0, len(merged_df), chunk_size): - merged_df[i : i + chunk_size].to_csv( - f"{VCF_MERGED_DST_PATH}/chunk_{i + 1}-{i + chunk_size}.vcf", - index=False, - header=True, - sep="\t", - ) - chunks += 1 - expected_chunks_count = len(merged_df) // chunk_size + 1 - assert ( - chunks == expected_chunks_count - ), f"Expected {expected_chunks_count} chunks but got {chunks} chunks" - - -@dataclass -class PathManager: - """It is quite complicated to keep track of all the input/output buckets, the corresponding mounting points prefixes etc...""" - - VCF_INPUT_BUCKET: str - VEP_OUTPUT_BUCKET: str - VEP_CACHE_BUCKET: str - MOUNT_DIR_ROOT: str - - # Derived parameters to find the list of files to process: - input_path: str | None = None - input_bucket: str | None = None - - # Derived parameters to initialise the docker image: - path_dictionary: dict[str, dict[str, str]] | None = None - - # Derived parameters to point to the right mouting points: - cache_dir: str | None = None - input_dir: str | None = None - output_dir: str | None = None - - def __post_init__(self: PathManager) -> None: - """Build paths based on the input parameters.""" - self.path_dictionary = { - "input": { - "remote_path": self.VCF_INPUT_BUCKET.replace("gs://", ""), - "mount_point": f"{self.MOUNT_DIR_ROOT}/input", - }, - "output": { - "remote_path": self.VEP_OUTPUT_BUCKET.replace("gs://", ""), - "mount_point": f"{self.MOUNT_DIR_ROOT}/output", - }, - "cache": { - "remote_path": self.VEP_CACHE_BUCKET.replace("gs://", ""), - "mount_point": f"{self.MOUNT_DIR_ROOT}/cache", - }, - } - # Parameters for fetching files: - self.input_path = self.VCF_INPUT_BUCKET.replace("gs://", "") + "/" - self.input_bucket = self.VCF_INPUT_BUCKET.split("/")[2] - - # Parameters for VEP: - self.cache_dir = f"{self.MOUNT_DIR_ROOT}/cache" - self.input_dir = f"{self.MOUNT_DIR_ROOT}/input" - self.output_dir = f"{self.MOUNT_DIR_ROOT}/output" - - def get_mount_config(self) -> list[dict[str, str]]: - """Return the mount configuration. - - Returns: - list[dict[str, str]]: The mount configuration. - """ - assert self.path_dictionary is not None, "Path dictionary not initialized." - return list(self.path_dictionary.values()) - - -@task(task_id="vep_annotation") -def vep_annotation(pm: PathManager, **kwargs: Any) -> None: - """Submit a Batch job to annotate VCFs with a local VEP docker image. - - Args: - pm (PathManager): The path manager with all the required path related information. - **kwargs (Any): Keyword arguments. - """ - # Get the filenames to process: - ti = kwargs["ti"] - filenames = [ - os.path.basename(os.path.splitext(path)[0]) - for path in ti.xcom_pull(task_ids="get_vep_todo_list", key="return_value") - ] - # Stop process if no files was found: - assert filenames, "No files found to process." - - # Based on the filenames, build the environment variables for the batch job: - task_env = [ - batch_v1.Environment( - variables={ - "INPUT_FILE": f"{filename}.vcf", - "OUTPUT_FILE": f"{filename}.json", - } - ) - for filename in filenames - ] - # Build the command to run in the container: - command = [ - "-c", - rf"vep --cache --offline --format vcf --force_overwrite \ - --no_stats \ - --dir_cache {pm.cache_dir} \ - --input_file {pm.input_dir}/$INPUT_FILE \ - --output_file {pm.output_dir}/$OUTPUT_FILE --json \ - --dir_plugins {pm.cache_dir}/VEP_plugins \ - --sift b \ - --polyphen b \ - --fasta {pm.cache_dir}/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz \ - --mane_select \ - --appris \ - --hgvsg \ - --pick_order mane_select,canonical \ - --per_gene \ - --uniprot \ - --check_existing \ - --exclude_null_alleles \ - --canonical \ - --plugin TSSDistance \ - --distance 500000 \ - --plugin LoF,loftee_path:{pm.cache_dir}/VEP_plugins,gerp_bigwig:{pm.cache_dir}/gerp_conservation_scores.homo_sapiens.GRCh38.bw,human_ancestor_fa:{pm.cache_dir}/human_ancestor.fa.gz,conservation_file:/opt/vep/loftee.sql \ - --plugin AlphaMissense,file={pm.cache_dir}/AlphaMissense_hg38.tsv.gz,transcript_match=1 \ - --plugin CADD,snv={pm.cache_dir}/CADD_GRCh38_whole_genome_SNVs.tsv.gz", - ] - task = create_task_spec(VEP_DOCKER_IMAGE, command) - batch_task = CloudBatchSubmitJobOperator( - task_id="vep_batch_job", - project_id=PROJECT_ID, - region=REGION, - job_name=f"vep-job-{time.strftime('%Y%m%d-%H%M%S')}", - job=create_batch_job(task, "VEPMACHINE", task_env, pm.get_mount_config()), - deferrable=False, - ) - batch_task.execute(context=kwargs) - - -with DAG( - dag_id=Path(__file__).stem, - description="Open Targets Genetics — create VCF file from datasets that contain variant information", - default_args=shared_dag_args, - **shared_dag_kwargs, -) as dag: - pm = PathManager( - VCF_MERGED_DST_PATH, - VEP_OUTPUT_BUCKET, - VEP_CACHE_BUCKET, - MOUNT_DIR, - ) - ( - create_vcf() - >> GCSListObjectsOperator( - task_id="get_vcf_per_source", - bucket=GCS_BUCKET, - prefix=VCF_DST_PATH.replace(f"gs://{GCS_BUCKET}/", ""), - trigger_rule=TriggerRule.ALL_SUCCESS, - match_glob="**.csv", - ) - >> merge_vcfs() - >> GCSListObjectsOperator( - task_id="get_vep_todo_list", - bucket=GCS_BUCKET, - prefix=VCF_MERGED_DST_PATH.replace(f"gs://{GCS_BUCKET}/", ""), - trigger_rule=TriggerRule.ALL_SUCCESS, - match_glob="**.vcf", - ) - >> vep_annotation(pm) - >> create_cluster( - CLUSTER_NAME, - autoscaling_policy=AUTOSCALING, - num_workers=4, - worker_machine_type="n1-highmem-8", - ) - >> install_dependencies(CLUSTER_NAME) - >> submit_step( - cluster_name=CLUSTER_NAME, - step_id="ot_variant_index", - task_id="ot_variant_index", - other_args=[ - f"step.vep_output_json_path={VEP_OUTPUT_BUCKET}", - f"step.variant_index_path={VARIANT_INDEX_BUCKET}", - f"step.gnomad_variant_annotations_path={GNOMAD_ANNOTATION_PATH}", - ], - ) - >> delete_cluster(CLUSTER_NAME) - ) diff --git a/src/airflow/docker-compose.yaml b/src/airflow/docker-compose.yaml deleted file mode 100644 index 8e8523490..000000000 --- a/src/airflow/docker-compose.yaml +++ /dev/null @@ -1,228 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -# Basic Airflow cluster configuration for LocalExecutor with PostgreSQL. -# -# WARNING: This configuration is for local development. Do not use it in a production deployment. -# -# This configuration supports basic configuration using environment variables or an .env file -# The following variables are supported: -# -# AIRFLOW_IMAGE_NAME - Docker image name used to run Airflow. -# Default: apache/airflow:slim-2.7.2-python3.10 -# AIRFLOW_UID - User ID in Airflow containers -# Default: 50000 -# AIRFLOW_PROJ_DIR - Base path to which all the files will be volumed. -# Default: . -# Those configurations are useful mostly in case of standalone testing/running Airflow in test/try-out mode -# -# _AIRFLOW_WWW_USER_USERNAME - Username for the administrator account (if requested). -# Default: airflow -# _AIRFLOW_WWW_USER_PASSWORD - Password for the administrator account (if requested). -# Default: airflow -# _PIP_ADDITIONAL_REQUIREMENTS - Additional PIP requirements to add when starting all containers. -# Use this option ONLY for quick checks. Installing requirements at container -# startup is done EVERY TIME the service is started. -# A better way is to build a custom image or extend the official image -# as described in https://airflow.apache.org/docs/docker-stack/build.html. -# Default: '' -# -# Feel free to modify this file to suit your needs. -version: "3.8" -x-airflow-common: &airflow-common - # In order to add custom dependencies or upgrade provider packages you can use your extended image. - # Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml - # and uncomment the "build" line below, Then run `docker-compose build` to build the images. - image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:slim-2.7.2-python3.10} - # build: . - environment: &airflow-common-env - AIRFLOW__CORE__EXECUTOR: LocalExecutor - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow - # For backward compatibility, with Airflow <2.3 - AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow - AIRFLOW__CORE__FERNET_KEY: "" - AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: "true" - AIRFLOW__CORE__LOAD_EXAMPLES: "false" - AIRFLOW__API__AUTH_BACKENDS: "airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session" - # yamllint disable rule:line-length - # Use simple http server on scheduler for health checks - # See https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/logging-monitoring/check-health.html#scheduler-health-check-server - # yamllint enable rule:line-length - AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: "true" - # WARNING: Use _PIP_ADDITIONAL_REQUIREMENTS option ONLY for a quick checks - # for other purpose (development, test and especially production usage) build/extend Airflow image. - _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:-} - # GCLOUD Authentication - GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-} - AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT: ${AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT:-} - GCP_PROJECT_ID: ${GCP_PROJECT_ID:-} - GCP_GCS_BUCKET: ${GCP_GCS_BUCKET:-} - - volumes: - - ${AIRFLOW_PROJ_DIR:-.}/dags:/opt/airflow/dags - - ${AIRFLOW_PROJ_DIR:-.}/logs:/opt/airflow/logs - - ${AIRFLOW_PROJ_DIR:-.}/config:/opt/airflow/config - - ${AIRFLOW_PROJ_DIR:-.}/plugins:/opt/airflow/plugins - # GCLOUD Authentication - - ${GOOGLE_LOCAL_CREDENTIALS_PATH}:/${GOOGLE_DOCKER_CREDENTIALS_PATH}:ro - user: "${AIRFLOW_UID:-50000}:0" - depends_on: &airflow-common-depends-on - postgres: - condition: service_healthy - -services: - postgres: - image: postgres:13 - environment: - POSTGRES_USER: airflow - POSTGRES_PASSWORD: airflow - POSTGRES_DB: airflow - volumes: - - postgres-db-volume:/var/lib/postgresql/data - healthcheck: - test: ["CMD", "pg_isready", "-U", "airflow"] - interval: 10s - retries: 5 - start_period: 5s - restart: always - - airflow-webserver: - <<: *airflow-common - command: webserver - ports: - - "8080:8080" - healthcheck: - test: ["CMD", "curl", "--fail", "http://localhost:8080/health"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 30s - restart: always - depends_on: - <<: *airflow-common-depends-on - airflow-init: - condition: service_completed_successfully - - airflow-scheduler: - <<: *airflow-common - command: scheduler - healthcheck: - test: ["CMD", "curl", "--fail", "http://localhost:8974/health"] - interval: 30s - timeout: 10s - retries: 5 - start_period: 30s - restart: always - depends_on: - <<: *airflow-common-depends-on - airflow-init: - condition: service_completed_successfully - - airflow-init: - <<: *airflow-common - entrypoint: /bin/bash - # yamllint disable rule:line-length - command: - - -c - - | - function ver() { - printf "%04d%04d%04d%04d" $${1//./ } - } - airflow_version=$$(AIRFLOW__LOGGING__LOGGING_LEVEL=INFO && gosu airflow airflow version) - airflow_version_comparable=$$(ver $${airflow_version}) - min_airflow_version=2.2.0 - min_airflow_version_comparable=$$(ver $${min_airflow_version}) - if (( airflow_version_comparable < min_airflow_version_comparable )); then - echo - echo -e "\033[1;31mERROR!!!: Too old Airflow version $${airflow_version}!\e[0m" - echo "The minimum Airflow version supported: $${min_airflow_version}. Only use this or higher!" - echo - exit 1 - fi - if [[ -z "${AIRFLOW_UID}" ]]; then - echo - echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m" - echo "If you are on Linux, you SHOULD follow the instructions below to set " - echo "AIRFLOW_UID environment variable, otherwise files will be owned by root." - echo "For other operating systems you can get rid of the warning with manually created .env file:" - echo " See: https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#setting-the-right-airflow-user" - echo - fi - one_meg=1048576 - mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg)) - cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat) - disk_available=$$(df / | tail -1 | awk '{print $$4}') - warning_resources="false" - if (( mem_available < 4000 )) ; then - echo - echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m" - echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))" - echo - warning_resources="true" - fi - if (( cpus_available < 2 )); then - echo - echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m" - echo "At least 2 CPUs recommended. You have $${cpus_available}" - echo - warning_resources="true" - fi - if (( disk_available < one_meg * 10 )); then - echo - echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m" - echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))" - echo - warning_resources="true" - fi - if [[ $${warning_resources} == "true" ]]; then - echo - echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m" - echo "Please follow the instructions to increase amount of resources available:" - echo " https://airflow.apache.org/docs/apache-airflow/stable/howto/docker-compose/index.html#before-you-begin" - echo - fi - mkdir -p /sources/logs /sources/dags /sources/plugins - chown -R "${AIRFLOW_UID}:0" /sources/{logs,dags,plugins} - exec /entrypoint airflow version - # yamllint enable rule:line-length - environment: - <<: *airflow-common-env - _AIRFLOW_DB_MIGRATE: "true" - _AIRFLOW_WWW_USER_CREATE: "true" - _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow} - _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow} - _PIP_ADDITIONAL_REQUIREMENTS: "" - user: "0:0" - volumes: - - ${AIRFLOW_PROJ_DIR:-.}:/sources - - airflow-cli: - <<: *airflow-common - profiles: - - debug - environment: - <<: *airflow-common-env - CONNECTION_CHECK_MAX_COUNT: "0" - # Workaround for entrypoint issue. See: https://github.com/apache/airflow/issues/16252 - command: - - bash - - -c - - airflow - -volumes: - postgres-db-volume: diff --git a/src/airflow/logs/.gitkeep b/src/airflow/logs/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/airflow/plugins/.gitkeep b/src/airflow/plugins/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/airflow/requirements.txt b/src/airflow/requirements.txt deleted file mode 100644 index 540c20453..000000000 --- a/src/airflow/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -apache-airflow-providers-google==10.17.0 -apache-airflow-providers-apache-beam==5.6.1 -psycopg2-binary==2.9.9 diff --git a/tests/airflow/test_dag.py b/tests/airflow/test_dag.py deleted file mode 100644 index e18f91fb1..000000000 --- a/tests/airflow/test_dag.py +++ /dev/null @@ -1,51 +0,0 @@ -"""Check for airflow import errors. Inspiration from https://garystafford.medium.com/devops-for-dataops-building-a-ci-cd-pipeline-for-apache-airflow-dags-975e4a622f83.""" - -from __future__ import annotations - -import pytest - -from airflow.models import DagBag - - -@pytest.fixture(params=["./src/airflow/dags"]) -def dag_bag(request: pytest.FixtureRequest) -> DagBag: - """Return a DAG bag for testing.""" - return DagBag(dag_folder=request.param, include_examples=False) - - -def test_no_import_errors(dag_bag: DagBag) -> None: - """Test for import errors.""" - assert ( - not dag_bag.import_errors - ), f"DAG import failures. Errors: {dag_bag.import_errors}" - - -def test_requires_tags(dag_bag: DagBag) -> None: - """Tags should be defined for each DAG.""" - for _, dag in dag_bag.dags.items(): - assert dag.tags - - -def test_owner_len_greater_than_five(dag_bag: DagBag) -> None: - """Owner should be defined for each DAG and be longer than 5 characters.""" - for _, dag in dag_bag.dags.items(): - assert len(dag.owner) > 5 - - -def test_desc_len_greater_than_fifteen(dag_bag: DagBag) -> None: - """Description should be defined for each DAG and be longer than 30 characters.""" - for _, dag in dag_bag.dags.items(): - if isinstance(dag.description, str): - assert len(dag.description) > 30 - - -def test_owner_not_airflow(dag_bag: DagBag) -> None: - """Owner should not be 'airflow'.""" - for _, dag in dag_bag.dags.items(): - assert str.lower(dag.owner) != "airflow" - - -def test_three_or_less_retries(dag_bag: DagBag) -> None: - """Retries should be 3 or less.""" - for _, dag in dag_bag.dags.items(): - assert dag.default_args["retries"] <= 3 From d6364f86ee8680e535ba3a6637ffe1a1f8dc9653 Mon Sep 17 00:00:00 2001 From: Kirill Tsukanov Date: Fri, 13 Sep 2024 12:54:36 +0100 Subject: [PATCH 034/188] fix: multiple fixes after debugging and test runs (#760) --- src/gentropy/common/harmonise.py | 19 +++++---- src/gentropy/common/per_chromosome.py | 4 +- src/gentropy/config.py | 1 + .../finngen_ukb_meta/study_index.py | 42 +++++++++++++------ .../finngen_ukb_meta/summary_stats.py | 6 ++- 5 files changed, 50 insertions(+), 22 deletions(-) diff --git a/src/gentropy/common/harmonise.py b/src/gentropy/common/harmonise.py index 9b570eec6..6420245bd 100644 --- a/src/gentropy/common/harmonise.py +++ b/src/gentropy/common/harmonise.py @@ -135,13 +135,6 @@ def harmonise_summary_stats( df .join(va_df, (df["chromosome"] == va_df["vaChromosome"]) & (df["summary_stats_id"] == va_df["summary_stats_id"]), "inner") .drop("vaChromosome", "summary_stats_id") - .withColumn( - "effectAlleleFrequencyFromSource", - f.when( - f.col("direction") == "direct", - f.col(colname_a1freq).cast("float") - ).otherwise(1 - f.col(colname_a1freq).cast("float")) - ) .withColumn( "beta", f.when( @@ -150,6 +143,18 @@ def harmonise_summary_stats( ).otherwise(-f.col(colname_beta).cast("double")) ) ) + if colname_a1freq: + df = ( + df + .withColumn( + "effectAlleleFrequencyFromSource", + f.when( + f.col("direction") == "direct", + f.col(colname_a1freq).cast("float") + ) + .otherwise(1 - f.col(colname_a1freq).cast("float")) + ) + ) df = ( # Harmonise, 7: Drop bad quality variants. df diff --git a/src/gentropy/common/per_chromosome.py b/src/gentropy/common/per_chromosome.py index f2cedd98e..a3016202e 100644 --- a/src/gentropy/common/per_chromosome.py +++ b/src/gentropy/common/per_chromosome.py @@ -36,7 +36,7 @@ def prepare_va(session: SparkSession, variant_annotation_path: str, tmp_variant_ f.col("position"), f.col("referenceAllele"), f.col("alternateAllele") - ).alias("ukb_ppp_id"), + ).alias("summary_stats_id"), f.lit("direct").alias("direction") ) ) @@ -51,7 +51,7 @@ def prepare_va(session: SparkSession, variant_annotation_path: str, tmp_variant_ f.col("position"), f.col("alternateAllele"), f.col("referenceAllele") - ).alias("ukb_ppp_id"), + ).alias("summary_stats_id"), f.lit("flip").alias("direction") ) ) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 181e9042d..4d1174d6b 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -598,3 +598,4 @@ def register_config() -> None: name="study_validation", node=StudyValidationStepConfig, ) + cs.store(group="step", name="finngen_ukb_meta_ingestion", node=FinngenUkbMetaConfig) diff --git a/src/gentropy/datasource/finngen_ukb_meta/study_index.py b/src/gentropy/datasource/finngen_ukb_meta/study_index.py index 76e82f0eb..fe6c74beb 100644 --- a/src/gentropy/datasource/finngen_ukb_meta/study_index.py +++ b/src/gentropy/datasource/finngen_ukb_meta/study_index.py @@ -1,10 +1,14 @@ """Study Index for Finngen data source.""" from __future__ import annotations +from urllib.request import urlopen + import pyspark.sql.functions as f from pyspark.sql import SparkSession +from gentropy.config import FinngenStudiesConfig from gentropy.dataset.study_index import StudyIndex +from gentropy.datasource.finngen.study_index import FinnGenStudyIndex class FinngenUkbMetaStudyIndex(StudyIndex): @@ -15,12 +19,14 @@ def from_source( cls: type[FinngenUkbMetaStudyIndex], spark: SparkSession, raw_study_index_path_from_tsv: str, + efo_curation_mapping_url: str = FinngenStudiesConfig().efo_curation_mapping_url, ) -> StudyIndex: """This function ingests study level metadata from FinnGen UKB meta-analysis. Args: spark (SparkSession): Spark session object. raw_study_index_path_from_tsv (str): Raw study index path. + efo_curation_mapping_url (str): URL to the EFO curation mapping file. Returns: StudyIndex: Parsed and annotated FinnGen UKB meta-analysis study table. @@ -35,28 +41,40 @@ def from_source( f.col("name").alias("traitFromSource"), f.lit(True).alias("hasSumstats"), f.col("_gentropy_summary_stats_link").alias("summarystatsLocation"), - (f.col("fg_n_cases") + f.col("ukbb_n_cases") + f.col("fg_n_controls") + f.col("ukbb_n_controls")).alias("nSamples") + (f.col("fg_n_cases") + f.col("ukbb_n_cases") + f.col("fg_n_controls") + f.col("ukbb_n_controls")).cast("integer").alias("nSamples"), + f.array( + f.struct( + (f.col("fg_n_cases") + f.col("fg_n_controls")).cast("integer").alias("sampleSize"), + f.lit("Finnish").alias("ancestry"), + ), + f.struct( + (f.col("ukbb_n_cases") + f.col("ukbb_n_controls")).cast("integer").alias("sampleSize"), + f.lit("European").alias("ancestry"), + ), + ).alias("discoverySamples"), ) ) # Add population structure. study_index_df = ( study_index_df - .withColumn( - "discoverySamples", - f.array( - f.struct( - f.col("nSamples").cast("integer").alias("sampleSize"), - f.lit("European").alias("ancestry"), - ) - ) - ) .withColumn( "ldPopulationStructure", cls.aggregate_and_map_ancestries(f.col("discoverySamples")), ) ) - - return StudyIndex( + # Create study index. + study_index = StudyIndex( _df=study_index_df, _schema=StudyIndex.get_schema(), ) + # Add EFO mappings. + csv_data = urlopen(efo_curation_mapping_url).readlines() + csv_rows = [row.decode("utf8") for row in csv_data] + rdd = spark.sparkContext.parallelize(csv_rows) + efo_curation_mapping = spark.read.csv(rdd, header=True, sep="\t") + study_index = FinnGenStudyIndex.join_efo_mapping( + study_index, + efo_curation_mapping, + finngen_release_prefix="FINNGEN_R11", + ) + return study_index diff --git a/src/gentropy/datasource/finngen_ukb_meta/summary_stats.py b/src/gentropy/datasource/finngen_ukb_meta/summary_stats.py index 6e45736c3..b77d57966 100644 --- a/src/gentropy/datasource/finngen_ukb_meta/summary_stats.py +++ b/src/gentropy/datasource/finngen_ukb_meta/summary_stats.py @@ -4,6 +4,7 @@ from dataclasses import dataclass +import pyspark.sql.functions as f from pyspark.sql import SparkSession from gentropy.common.harmonise import harmonise_summary_stats @@ -53,7 +54,10 @@ def from_source( ) # Populate the sample size column from the study index. - study_index = spark.read.parquet(study_index_path).select("studyId", "nSamples") + study_index = spark.read.parquet(study_index_path).select( + "studyId", + f.col("nSamples").cast("integer").alias("sampleSize") + ) df = df.join(study_index, on=["studyId"], how="inner") # Create the summary statistics object. From 34aaaf5901cff29c6e1971f0b746793c048b20e9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 13 Sep 2024 23:38:24 +0200 Subject: [PATCH 035/188] build(deps-dev): bump mkdocstrings-python from 1.10.5 to 1.11.1 (#749) --- poetry.lock | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0df0da543..760522884 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1590,13 +1590,13 @@ grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] [[package]] name = "griffe" -version = "0.47.0" +version = "1.2.0" description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." optional = false python-versions = ">=3.8" files = [ - {file = "griffe-0.47.0-py3-none-any.whl", hash = "sha256:07a2fd6a8c3d21d0bbb0decf701d62042ccc8a576645c7f8799fe1f10de2b2de"}, - {file = "griffe-0.47.0.tar.gz", hash = "sha256:95119a440a3c932b13293538bdbc405bee4c36428547553dc6b327e7e7d35e5a"}, + {file = "griffe-1.2.0-py3-none-any.whl", hash = "sha256:a8b2fcb1ecdc5a412e646b0b4375eb20a5d2eac3a11dd8c10c56967a4097663c"}, + {file = "griffe-1.2.0.tar.gz", hash = "sha256:1c9f6ef7455930f3f9b0c4145a961c90385d1e2cbc496f7796fbff560ec60d31"}, ] [package.dependencies] @@ -2411,13 +2411,13 @@ mkdocs = ">=1.2.3" [[package]] name = "mkdocs-autorefs" -version = "1.0.1" +version = "1.2.0" description = "Automatically link across pages in MkDocs." optional = false python-versions = ">=3.8" files = [ - {file = "mkdocs_autorefs-1.0.1-py3-none-any.whl", hash = "sha256:aacdfae1ab197780fb7a2dac92ad8a3d8f7ca8049a9cbe56a4218cd52e8da570"}, - {file = "mkdocs_autorefs-1.0.1.tar.gz", hash = "sha256:f684edf847eced40b570b57846b15f0bf57fb93ac2c510450775dcf16accb971"}, + {file = "mkdocs_autorefs-1.2.0-py3-none-any.whl", hash = "sha256:d588754ae89bd0ced0c70c06f58566a4ee43471eeeee5202427da7de9ef85a2f"}, + {file = "mkdocs_autorefs-1.2.0.tar.gz", hash = "sha256:a86b93abff653521bda71cf3fc5596342b7a23982093915cb74273f67522190f"}, ] [package.dependencies] @@ -2559,23 +2559,23 @@ mkdocs = ">=1.2" [[package]] name = "mkdocstrings" -version = "0.25.1" +version = "0.26.1" description = "Automatic documentation from sources, for MkDocs." optional = false python-versions = ">=3.8" files = [ - {file = "mkdocstrings-0.25.1-py3-none-any.whl", hash = "sha256:da01fcc2670ad61888e8fe5b60afe9fee5781017d67431996832d63e887c2e51"}, - {file = "mkdocstrings-0.25.1.tar.gz", hash = "sha256:c3a2515f31577f311a9ee58d089e4c51fc6046dbd9e9b4c3de4c3194667fe9bf"}, + {file = "mkdocstrings-0.26.1-py3-none-any.whl", hash = "sha256:29738bfb72b4608e8e55cc50fb8a54f325dc7ebd2014e4e3881a49892d5983cf"}, + {file = "mkdocstrings-0.26.1.tar.gz", hash = "sha256:bb8b8854d6713d5348ad05b069a09f3b79edbc6a0f33a34c6821141adb03fe33"}, ] [package.dependencies] click = ">=7.0" Jinja2 = ">=2.11.1" -Markdown = ">=3.3" +Markdown = ">=3.6" MarkupSafe = ">=1.1" mkdocs = ">=1.4" -mkdocs-autorefs = ">=0.3.1" -platformdirs = ">=2.2.0" +mkdocs-autorefs = ">=1.2" +platformdirs = ">=2.2" pymdown-extensions = ">=6.3" [package.extras] @@ -2585,18 +2585,19 @@ python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"] [[package]] name = "mkdocstrings-python" -version = "1.10.5" +version = "1.11.1" description = "A Python handler for mkdocstrings." optional = false python-versions = ">=3.8" files = [ - {file = "mkdocstrings_python-1.10.5-py3-none-any.whl", hash = "sha256:92e3c588ef1b41151f55281d075de7558dd8092e422cb07a65b18ee2b0863ebb"}, - {file = "mkdocstrings_python-1.10.5.tar.gz", hash = "sha256:acdc2a98cd9d46c7ece508193a16ca03ccabcb67520352b7449f84b57c162bdf"}, + {file = "mkdocstrings_python-1.11.1-py3-none-any.whl", hash = "sha256:a21a1c05acef129a618517bb5aae3e33114f569b11588b1e7af3e9d4061a71af"}, + {file = "mkdocstrings_python-1.11.1.tar.gz", hash = "sha256:8824b115c5359304ab0b5378a91f6202324a849e1da907a3485b59208b797322"}, ] [package.dependencies] -griffe = ">=0.47" -mkdocstrings = ">=0.25" +griffe = ">=0.49" +mkdocs-autorefs = ">=1.2" +mkdocstrings = ">=0.26" [[package]] name = "msal" From 2e0e326333336fcd40b3f30f4435e7675bf9c7fa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 17 Sep 2024 10:04:28 +0100 Subject: [PATCH 036/188] build(deps): bump wandb from 0.17.2 to 0.18.0 (#763) Bumps [wandb](https://github.com/wandb/wandb) from 0.17.2 to 0.18.0. - [Release notes](https://github.com/wandb/wandb/releases) - [Changelog](https://github.com/wandb/wandb/blob/main/CHANGELOG.md) - [Commits](https://github.com/wandb/wandb/compare/v0.17.2...v0.18.0) --- updated-dependencies: - dependency-name: wandb dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 25 +++++++++++++------------ pyproject.toml | 2 +- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/poetry.lock b/poetry.lock index 760522884..933477f7c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4895,18 +4895,19 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [[package]] name = "wandb" -version = "0.17.2" +version = "0.18.0" description = "A CLI and library for interacting with the Weights & Biases API." optional = false python-versions = ">=3.7" files = [ - {file = "wandb-0.17.2-py3-none-any.whl", hash = "sha256:4bd351be28cea87730365856cfaa72f72ceb787accc21bad359dde5aa9c4356d"}, - {file = "wandb-0.17.2-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:638353a2d702caedd304a5f1e526ef93a291c984c109fcb444262a57aeaacec9"}, - {file = "wandb-0.17.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:824e33ca77af87f87a9cf1122acba164da5bf713adc9d67332bc686028921ec9"}, - {file = "wandb-0.17.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:032ca5939008643349af178a8b66b8047a1eefcb870c4c4a86e22acafde6470f"}, - {file = "wandb-0.17.2-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9558bab47a0c8ac4f22cfa2d43f91d1bc1f75d4255629286db674fe49fcd30e5"}, - {file = "wandb-0.17.2-py3-none-win32.whl", hash = "sha256:4bc176e3c81be216dc889fcd098341eb17a14b04e080d4343ce3f0b1740abfc1"}, - {file = "wandb-0.17.2-py3-none-win_amd64.whl", hash = "sha256:62cd707f38b5711971729dae80343b8c35f6003901e690166cc6d526187a9785"}, + {file = "wandb-0.18.0-py3-none-any.whl", hash = "sha256:a176af0d51b55a363dac3c54a8b7aa1cfd5a89cad6fc6574237232f37c779965"}, + {file = "wandb-0.18.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:2bc7f18becda9a566a63723666390f941e8b115b9e7746e0e5d73dc9ea9714c6"}, + {file = "wandb-0.18.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e14a385c95e61e77b0b5c4cbc6c5a0b47ac0d9e66730ca8c17b84eba374e35d1"}, + {file = "wandb-0.18.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0764ad8911a70cdb7cb339567c4170b860e8f5f523447b2f748d7e0e6224e29"}, + {file = "wandb-0.18.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d2ffea43710e3482168a2d89b2770aa9a14007ba16e717b176428f2a50765f2"}, + {file = "wandb-0.18.0-py3-none-win32.whl", hash = "sha256:b209840a9499bf687e8b5b20117341e7722f86a85f986c422501eb1a709dc721"}, + {file = "wandb-0.18.0-py3-none-win_amd64.whl", hash = "sha256:25aa8ee1808eae0c0e4818b81bc43fd6461e4f3603d7918e5eab2f9afca00715"}, + {file = "wandb-0.18.0.tar.gz", hash = "sha256:872dfd7298c053ca861352196bc422452caff105d3bc66b90e7bc86f17ad8bdd"}, ] [package.dependencies] @@ -4914,7 +4915,7 @@ click = ">=7.1,<8.0.0 || >8.0.0" docker-pycreds = ">=0.4.0" gitpython = ">=1.0.0,<3.1.29 || >3.1.29" platformdirs = "*" -protobuf = {version = ">=3.19.0,<4.21.0 || >4.21.0,<6", markers = "python_version > \"3.9\" or sys_platform != \"linux\""} +protobuf = {version = ">=3.19.0,<4.21.0 || >4.21.0,<5.28.0 || >5.28.0,<6", markers = "python_version > \"3.9\" or sys_platform != \"linux\""} psutil = ">=5.0.0" pyyaml = "*" requests = ">=2.0.0,<3" @@ -4926,9 +4927,9 @@ setuptools = "*" aws = ["boto3"] azure = ["azure-identity", "azure-storage-blob"] gcp = ["google-cloud-storage"] -importers = ["filelock", "mlflow", "polars", "rich", "tenacity"] +importers = ["filelock", "mlflow", "polars (<=1.2.1)", "rich", "tenacity"] kubeflow = ["google-cloud-storage", "kubernetes", "minio", "sh"] -launch = ["awscli", "azure-containerregistry", "azure-identity", "azure-storage-blob", "boto3", "botocore", "chardet", "google-auth", "google-cloud-aiplatform", "google-cloud-artifact-registry", "google-cloud-compute", "google-cloud-storage", "iso8601", "kubernetes", "kubernetes-asyncio", "nbconvert", "nbformat", "optuna", "pydantic", "pyyaml (>=6.0.0)", "tomli", "typing-extensions"] +launch = ["awscli", "azure-containerregistry", "azure-identity", "azure-storage-blob", "boto3", "botocore", "chardet", "google-auth", "google-cloud-aiplatform", "google-cloud-artifact-registry", "google-cloud-compute", "google-cloud-storage", "iso8601", "jsonschema", "kubernetes", "kubernetes-asyncio", "nbconvert", "nbformat", "optuna", "pydantic", "pyyaml (>=6.0.0)", "tomli", "typing-extensions"] media = ["bokeh", "moviepy", "numpy", "pillow", "plotly (>=5.18.0)", "rdkit-pypi", "soundfile"] models = ["cloudpickle"] perf = ["orjson"] @@ -5218,4 +5219,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "5b7e79eb2ca58918d786e61b6331115376a24705da5478c6feef85d37f24685e" +content-hash = "50a797b217805183c5967246c1ca4b339037ce60e44a6c43b5c3fc6a9fb2832a" diff --git a/pyproject.toml b/pyproject.toml index 094ca1f6e..1343c7b50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ hydra-core = "^1.3.2" pyliftover = "^0.4" numpy = "^1.26.2" hail = "0.2.127" -wandb = ">=0.16.2,<0.18.0" +wandb = ">=0.16.2,<0.19.0" google = "^3.0.0" omegaconf = "^2.3.0" typing-extensions = "^4.9.0" From 6ede7363c683d0955edafc647c9d13b2baf17fc7 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Tue, 17 Sep 2024 17:45:44 +0100 Subject: [PATCH 037/188] fix: prevent multiple credible filters to override spark plan (#766) * fix: prevent multiple filters to override spark plan * feat: mhc quality control flag * fix: prevent multiple filters to override spark plan * Revert "fix: prevent multiple filters to override spark plan" This reverts commit a358781017dee62ba7a0466783bf24af108d6d42. * revert: wrong commit * fix: missing changes due to git chaos * chore: merge dev toml, update lock * chore: update lock after upgrading poetry to 1.8.3 v 2 --------- Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- poetry.lock | 1 - src/gentropy/dataset/study_locus.py | 14 ++++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index 933477f7c..226311a8b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3952,7 +3952,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index b59d57650..38d1e14a2 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -534,14 +534,16 @@ def filter_credible_set( Returns: StudyLocus: Filtered study-locus dataset. """ - self.df = self._df.withColumn( - "locus", - f.filter( - f.col("locus"), - lambda tag: (tag[credible_interval.value]), + return StudyLocus( + _df=self._df.withColumn( + "locus", + f.filter( + f.col("locus"), + lambda tag: (tag[credible_interval.value]), + ), ), + _schema=self._schema, ) - return self @staticmethod def filter_ld_set(ld_set: Column, r2_threshold: float) -> Column: From 8c4421aa221d69be76765375d9525222c7c89572 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Wed, 18 Sep 2024 12:35:28 +0100 Subject: [PATCH 038/188] feat: flag MHC credible sets based on lead (#767) * fix: prevent multiple filters to override spark plan * feat: mhc quality control flag * fix: typo * docs: genomic region added to docs * docs: info added to common index (and title) * docs: fix paths * Update src/gentropy/study_locus_validation.py * fix: remove unnecessary comment --------- Closes https://github.com/opentargets/issues/issues/3469 Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- docs/python_api/common/_common.md | 1 + docs/python_api/common/genomic_region.md | 6 + src/gentropy/common/genomic_region.py | 103 ++++++++++++++++++ src/gentropy/common/utils.py | 42 +------ src/gentropy/dataset/study_locus.py | 45 ++++++-- src/gentropy/dataset/summary_statistics.py | 17 +-- src/gentropy/locus_breaker_clumping.py | 4 +- src/gentropy/study_locus_validation.py | 7 +- tests/gentropy/dataset/test_study_locus.py | 5 + .../dataset/test_summary_statistics.py | 8 +- 10 files changed, 173 insertions(+), 65 deletions(-) create mode 100644 docs/python_api/common/genomic_region.md create mode 100644 src/gentropy/common/genomic_region.py diff --git a/docs/python_api/common/_common.md b/docs/python_api/common/_common.md index a8abe0f84..5957ec3bd 100644 --- a/docs/python_api/common/_common.md +++ b/docs/python_api/common/_common.md @@ -4,5 +4,6 @@ title: Common Common utilities used in gentropy package. +- [**Genomic Region**](genomic_region.md): class to represent genomic regions - [**Version Engine**](version_engine.md): class to extract version from datasource input paths - [**Types**](types.md): Literal types used in the gentropy diff --git a/docs/python_api/common/genomic_region.md b/docs/python_api/common/genomic_region.md new file mode 100644 index 000000000..c0841d299 --- /dev/null +++ b/docs/python_api/common/genomic_region.md @@ -0,0 +1,6 @@ +--- +title: Genomic Region +--- + +:::gentropy.common.genomic_region.KnownGenomicRegions +:::gentropy.common.genomic_region.GenomicRegion diff --git a/src/gentropy/common/genomic_region.py b/src/gentropy/common/genomic_region.py new file mode 100644 index 000000000..ffa298f86 --- /dev/null +++ b/src/gentropy/common/genomic_region.py @@ -0,0 +1,103 @@ +"""Genomic Region class.""" + +from enum import Enum + + +class KnownGenomicRegions(Enum): + """Known genomic regions in the human genome in string format.""" + + MHC = "chr6:25726063-33400556" + + +class GenomicRegion: + """Genomic regions of interest. + + Attributes: + chromosome (str): Chromosome. + start (int): Start position. + end (int): + """ + + def __init__(self, chromosome: str, start: int, end: int) -> None: + """Class constructor. + + Args: + chromosome (str): Chromosome. + start (int): Start position. + end (int): End position. + """ + self.chromosome = chromosome + self.start = start + self.end = end + + def __str__(self) -> str: + """String representation of the genomic region. + + Returns: + str: Genomic region in chr:start-end format. + """ + return f"{self.chromosome}:{self.start}-{self.end}" + + @classmethod + def from_string(cls: type["GenomicRegion"], region: str) -> "GenomicRegion": + """Parse region string to chr:start-end. + + Args: + region (str): Genomic region expected to follow chr##:#,###-#,### format or ##:####-#####. + + Returns: + GenomicRegion: Genomic region object. + + Raises: + ValueError: If the end and start positions cannot be casted to integer or not all three values value error is raised. + + Examples: + >>> print(GenomicRegion.from_string('chr6:28,510,120-33,480,577')) + 6:28510120-33480577 + >>> print(GenomicRegion.from_string('6:28510120-33480577')) + 6:28510120-33480577 + >>> print(GenomicRegion.from_string('6:28510120')) + Traceback (most recent call last): + ... + ValueError: Genomic region should follow a ##:####-#### format. + >>> print(GenomicRegion.from_string('6:28510120-foo')) + Traceback (most recent call last): + ... + ValueError: Start and the end position of the region has to be integer. + """ + region = region.replace(":", "-").replace(",", "") + try: + chromosome, start_position, end_position = region.split("-") + except ValueError as err: + raise ValueError( + "Genomic region should follow a ##:####-#### format." + ) from err + + try: + return cls( + chromosome=chromosome.replace("chr", ""), + start=int(start_position), + end=int(end_position), + ) + except ValueError as err: + raise ValueError( + "Start and the end position of the region has to be integer." + ) from err + + @classmethod + def from_known_genomic_region( + cls: type["GenomicRegion"], region: KnownGenomicRegions + ) -> "GenomicRegion": + """Get known genomic region. + + Args: + region (KnownGenomicRegions): Known genomic region. + + Returns: + GenomicRegion: Genomic region object. + + Examples: + >>> print(GenomicRegion.from_known_genomic_region(KnownGenomicRegions.MHC)) + 6:25726063-33400556 + """ + return GenomicRegion.from_string(region.value) diff --git a/src/gentropy/common/utils.py b/src/gentropy/common/utils.py index 81a2b4bfd..ca6e8e7f2 100644 --- a/src/gentropy/common/utils.py +++ b/src/gentropy/common/utils.py @@ -4,7 +4,7 @@ import sys from math import floor, log10 -from typing import TYPE_CHECKING, Tuple +from typing import TYPE_CHECKING import hail as hl import numpy as np @@ -19,46 +19,6 @@ from pyspark.sql import Column -def parse_region(region: str) -> Tuple[str, int, int]: - """Parse region string to chr:start-end. - - Args: - region (str): Genomic region expected to follow chr##:#,###-#,### format or ##:####-#####. - - Returns: - Tuple[str, int, int]: Chromosome, start position, end position - - Raises: - ValueError: If the end and start positions cannot be casted to integer or not all three values value error is raised. - - Examples: - >>> parse_region('chr6:28,510,120-33,480,577') - ('6', 28510120, 33480577) - >>> parse_region('6:28510120-33480577') - ('6', 28510120, 33480577) - >>> parse_region('6:28510120') - Traceback (most recent call last): - ... - ValueError: Genomic region should follow a ##:####-#### format. - >>> parse_region('6:28510120-foo') - Traceback (most recent call last): - ... - ValueError: Start and the end position of the region has to be integer. - """ - region = region.replace(":", "-").replace(",", "") - try: - (chromosome, start_position, end_position) = region.split("-") - except ValueError as err: - raise ValueError("Genomic region should follow a ##:####-#### format.") from err - - try: - return (chromosome.replace("chr", ""), int(start_position), int(end_position)) - except ValueError as err: - raise ValueError( - "Start and the end position of the region has to be integer." - ) from err - - def calculate_confidence_interval( pvalue_mantissa: Column, pvalue_exponent: Column, diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 38d1e14a2..48f6ee8be 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -10,12 +10,13 @@ import pyspark.sql.functions as f from pyspark.sql.types import ArrayType, FloatType, StringType +from gentropy.common.genomic_region import GenomicRegion, KnownGenomicRegions from gentropy.common.schemas import parse_spark_schema from gentropy.common.spark_helpers import ( calculate_neglog_pvalue, order_array_of_structs_by_field, ) -from gentropy.common.utils import get_logsum, parse_region +from gentropy.common.utils import get_logsum from gentropy.dataset.dataset import Dataset from gentropy.dataset.study_locus_overlap import StudyLocusOverlap from gentropy.dataset.variant_index import VariantIndex @@ -49,6 +50,7 @@ class StudyLocusQualityCheck(Enum): MISSING_STUDY (str): Flagging study loci if the study is not found in the study index as a reference DUPLICATED_STUDYLOCUS_ID (str): Study-locus identifier is not unique. INVALID_VARIANT_IDENTIFIER (str): Flagging study loci where identifier of any tagging variant was not found in the variant index + IN_MHC (str): Flagging study loci in the MHC region """ SUBSIGNIFICANT_FLAG = "Subsignificant p-value" @@ -70,6 +72,7 @@ class StudyLocusQualityCheck(Enum): INVALID_VARIANT_IDENTIFIER = ( "Some variant identifiers of this locus were not found in variant index" ) + IN_MHC = "MHC region" class CredibleInterval(Enum): @@ -817,32 +820,31 @@ def clump(self: StudyLocus) -> StudyLocus: return self def exclude_region( - self: StudyLocus, region: str, exclude_overlap: bool = False + self: StudyLocus, region: GenomicRegion, exclude_overlap: bool = False ) -> StudyLocus: """Exclude a region from the StudyLocus dataset. Args: - region (str): region given in "chr##:#####-####" format + region (GenomicRegion): genomic region object. exclude_overlap (bool): If True, excludes StudyLocus windows with any overlap with the region. Returns: StudyLocus: filtered StudyLocus object. """ - (chromosome, start_position, end_position) = parse_region(region) if exclude_overlap: filter_condition = ~( - (f.col("chromosome") == chromosome) + (f.col("chromosome") == region.chromosome) & ( - (f.col("locusStart") <= end_position) - & (f.col("locusEnd") >= start_position) + (f.col("locusStart") <= region.end) + & (f.col("locusEnd") >= region.start) ) ) else: filter_condition = ~( - (f.col("chromosome") == chromosome) + (f.col("chromosome") == region.chromosome) & ( - (f.col("position") >= start_position) - & (f.col("position") <= end_position) + (f.col("position") >= region.start) + & (f.col("position") <= region.end) ) ) @@ -851,6 +853,29 @@ def exclude_region( _schema=StudyLocus.get_schema(), ) + def qc_MHC_region(self: StudyLocus) -> StudyLocus: + """Adds qualityControl flag when lead overlaps with MHC region. + + Returns: + StudyLocus: including qualityControl flag if in MHC region. + """ + region = GenomicRegion.from_known_genomic_region(KnownGenomicRegions.MHC) + self.df = self.df.withColumn( + "qualityControls", + self.update_quality_flag( + f.col("qualityControls"), + ~( + (f.col("chromosome") == region.chromosome) + & ( + (f.col("position") <= region.end) + & (f.col("position") >= region.start) + ) + ), + StudyLocusQualityCheck.IN_MHC, + ), + ) + return self + def _qc_no_population(self: StudyLocus) -> StudyLocus: """Flag associations where the study doesn't have population information to resolve LD. diff --git a/src/gentropy/dataset/summary_statistics.py b/src/gentropy/dataset/summary_statistics.py index 9aaa9b956..b9a44ff34 100644 --- a/src/gentropy/dataset/summary_statistics.py +++ b/src/gentropy/dataset/summary_statistics.py @@ -7,8 +7,9 @@ import pyspark.sql.functions as f +from gentropy.common.genomic_region import GenomicRegion from gentropy.common.schemas import parse_spark_schema -from gentropy.common.utils import parse_region, split_pvalue +from gentropy.common.utils import split_pvalue from gentropy.config import LocusBreakerClumpingConfig, WindowBasedClumpingStepConfig from gentropy.dataset.dataset import Dataset @@ -112,25 +113,25 @@ def locus_breaker_clumping( flanking_distance, ) - def exclude_region(self: SummaryStatistics, region: str) -> SummaryStatistics: + def exclude_region( + self: SummaryStatistics, region: GenomicRegion + ) -> SummaryStatistics: """Exclude a region from the summary stats dataset. Args: - region (str): region given in "chr##:#####-####" format + region (GenomicRegion): Genomic region to be excluded. Returns: SummaryStatistics: filtered summary statistics. """ - (chromosome, start_position, end_position) = parse_region(region) - return SummaryStatistics( _df=( self.df.filter( ~( - (f.col("chromosome") == chromosome) + (f.col("chromosome") == region.chromosome) & ( - (f.col("position") >= start_position) - & (f.col("position") <= end_position) + (f.col("position") >= region.start) + & (f.col("position") <= region.end) ) ) ) diff --git a/src/gentropy/locus_breaker_clumping.py b/src/gentropy/locus_breaker_clumping.py index 8eb83816e..7f3649097 100644 --- a/src/gentropy/locus_breaker_clumping.py +++ b/src/gentropy/locus_breaker_clumping.py @@ -2,6 +2,7 @@ from __future__ import annotations +from gentropy.common.genomic_region import GenomicRegion, KnownGenomicRegions from gentropy.common.session import Session from gentropy.dataset.summary_statistics import SummaryStatistics from gentropy.method.locus_breaker_clumping import LocusBreakerClumping @@ -63,7 +64,8 @@ def __init__( ) if remove_mhc: clumped_result = clumped_result.exclude_region( - "chr6:25726063-33400556", exclude_overlap=True + GenomicRegion.from_known_genomic_region(KnownGenomicRegions.MHC), + exclude_overlap=True, ) if collect_locus: diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index 41a572e79..da660ca57 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -41,9 +41,10 @@ def __init__( # Running validation then writing output: study_locus_with_qc = ( StudyLocus.from_parquet(session, list(study_locus_path)) - .validate_lead_pvalue( - pvalue_cutoff=gwas_significance - ) # Flagging study locus with subsignificant p-values + # Flagging study locus with subsignificant p-values + .validate_lead_pvalue(pvalue_cutoff=gwas_significance) + # Add flag for MHC region + .qc_MHC_region() .validate_study(study_index) # Flagging studies not in study index .validate_unique_study_locus_id() # Flagging duplicated study locus ids ).persist() # we will need this for 2 types of outputs diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index c7538b28b..1daf9bb89 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -526,6 +526,11 @@ def test__qc_no_population(mock_study_locus: StudyLocus) -> None: assert isinstance(mock_study_locus._qc_no_population(), StudyLocus) +def test_qc_MHC_region(mock_study_locus: StudyLocus) -> None: + """Test qc_MHC_region.""" + assert isinstance(mock_study_locus.qc_MHC_region(), StudyLocus) + + def test_ldannotate( mock_study_locus: StudyLocus, mock_study_index: StudyIndex, mock_ld_index: LDIndex ) -> None: diff --git a/tests/gentropy/dataset/test_summary_statistics.py b/tests/gentropy/dataset/test_summary_statistics.py index cf3cfdae7..b1b06442b 100644 --- a/tests/gentropy/dataset/test_summary_statistics.py +++ b/tests/gentropy/dataset/test_summary_statistics.py @@ -6,6 +6,7 @@ from pyspark.sql import types as t +from gentropy.common.genomic_region import GenomicRegion from gentropy.dataset.study_locus import StudyLocus from gentropy.dataset.summary_statistics import SummaryStatistics @@ -44,7 +45,10 @@ def test_summary_statistics__exclude_region__return_type( ) -> None: """Testing if the exclude region method returns the right datatype.""" assert isinstance( - mock_summary_statistics.exclude_region("chr12:124-1245"), SummaryStatistics + mock_summary_statistics.exclude_region( + GenomicRegion.from_string("chr12:124-1245") + ), + SummaryStatistics, ) @@ -85,7 +89,7 @@ def test_summary_statistics__exclude_region__correctness( df = spark.createDataFrame(data, schema=schema) filtered_sumstas = SummaryStatistics( _df=df, _schema=SummaryStatistics.get_schema() - ).exclude_region("c1:9-16") + ).exclude_region(GenomicRegion.from_string("c1:9-16")) # Test for the correct number of rows returned: assert filtered_sumstas.df.count() == 8 From d84d43bdc0e75f4b69018b529bccd6d0821098c8 Mon Sep 17 00:00:00 2001 From: Vivien Ho <56025826+vivienho@users.noreply.github.com> Date: Wed, 18 Sep 2024 17:46:19 +0100 Subject: [PATCH 039/188] fix: add condition to eQTL study index and schema (#770) --- src/gentropy/assets/schemas/study_index.json | 6 ++++++ src/gentropy/datasource/eqtl_catalogue/finemapping.py | 1 + 2 files changed, 7 insertions(+) diff --git a/src/gentropy/assets/schemas/study_index.json b/src/gentropy/assets/schemas/study_index.json index b4a2257d7..e18401917 100644 --- a/src/gentropy/assets/schemas/study_index.json +++ b/src/gentropy/assets/schemas/study_index.json @@ -250,6 +250,12 @@ "type": "boolean", "nullable": true, "metadata": {} + }, + { + "name": "condition", + "type": "string", + "nullable": true, + "metadata": {} } ] } diff --git a/src/gentropy/datasource/eqtl_catalogue/finemapping.py b/src/gentropy/datasource/eqtl_catalogue/finemapping.py index a5c02dd3e..11ec5bef1 100644 --- a/src/gentropy/datasource/eqtl_catalogue/finemapping.py +++ b/src/gentropy/datasource/eqtl_catalogue/finemapping.py @@ -206,6 +206,7 @@ def parse_susie_results( f.lit(True).alias("hasSumstats"), f.col("molecular_trait_id"), f.col("pmid").alias("pubmedId"), + f.col("condition_label").alias("condition"), ) ) From c3651b7e8ecad3631fce0d4e9fa79fea1b59d40a Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Wed, 18 Sep 2024 22:42:51 +0100 Subject: [PATCH 040/188] fix(effect harmonisation): addressing beta harmonisation bug (#762) * fix: turning OR to beta harmonisation on for GWAS Catalog * fix(ingestion): addressing beta harmonisation bug * fix(ingestion): addressing beta harmonisation bug * test: adding test for effect harmonisation * test: adding doctests for beta harmonisation * fix: fixing value checking bug: effect value needs to be cast to double type * fix: typos in gwas ingestion script --- src/gentropy/common/spark_helpers.py | 32 ++ .../datasource/gwas_catalog/associations.py | 430 +++++++++++------- .../test_gwas_catalog_associations.py | 2 +- 3 files changed, 294 insertions(+), 170 deletions(-) diff --git a/src/gentropy/common/spark_helpers.py b/src/gentropy/common/spark_helpers.py index 65d3ae17b..791fb913d 100644 --- a/src/gentropy/common/spark_helpers.py +++ b/src/gentropy/common/spark_helpers.py @@ -429,6 +429,7 @@ def order_array_of_structs_by_two_fields( """ ) + def map_column_by_dictionary(col: Column, mapping_dict: Dict[str, str]) -> Column: """Map column values to dictionary values by key. @@ -655,3 +656,34 @@ def create_empty_column_if_not_exists( """ return f.lit(None).cast(col_schema).alias(col_name) + + +def get_standard_error_from_confidence_interval(lower: Column, upper: Column) -> Column: + """Compute the standard error from the confidence interval. + + Args: + lower (Column): The lower bound of the confidence interval. + upper (Column): The upper bound of the confidence interval. + + Returns: + Column: The standard error. + + Examples: + >>> data = [(0.5, 1.5), (None, 2.5), (None, None)] + >>> ( + ... spark.createDataFrame(data, ['lower', 'upper']) + ... .select( + ... get_standard_error_from_confidence_interval(f.col('lower'), f.col('upper')).alias('standard_error') + ... ) + ... .show() + ... ) + +-------------------+ + | standard_error| + +-------------------+ + |0.25510204081632654| + | null| + | null| + +-------------------+ + + """ + return (upper - lower) / (2 * 1.96) diff --git a/src/gentropy/datasource/gwas_catalog/associations.py b/src/gentropy/datasource/gwas_catalog/associations.py index 7a6a3c394..6781d045a 100644 --- a/src/gentropy/datasource/gwas_catalog/associations.py +++ b/src/gentropy/datasource/gwas_catalog/associations.py @@ -9,15 +9,15 @@ from typing import TYPE_CHECKING import pyspark.sql.functions as f -from pyspark.sql.types import DoubleType, IntegerType, LongType +from pyspark.sql.types import DoubleType, FloatType, IntegerType, LongType from pyspark.sql.window import Window from gentropy.assets import data from gentropy.common.spark_helpers import ( get_record_with_maximum_value, - pvalue_to_zscore, + get_standard_error_from_confidence_interval, ) -from gentropy.common.utils import parse_efos +from gentropy.common.utils import convert_odds_ratio_to_beta, parse_efos from gentropy.config import WindowBasedClumpingStepConfig from gentropy.dataset.study_locus import StudyLocus, StudyLocusQualityCheck @@ -140,8 +140,8 @@ def _normalise_pvaluetext(p_value_text: Column) -> Column: ) @staticmethod - def _normalise_risk_allele(risk_allele: Column) -> Column: - """Normalised risk allele column to a standardised format. + def _extract_risk_allele(risk_allele: Column) -> Column: + """Extract risk allele from provided "STRONGEST SNP-RISK ALLELE" input column. If multiple risk alleles are present, the first one is returned. @@ -155,7 +155,7 @@ def _normalise_risk_allele(risk_allele: Column) -> Column: >>> import pyspark.sql.types as t >>> d = [("rs1234-A-G"), ("rs1234-A"), ("rs1234-A; rs1235-G")] >>> df = spark.createDataFrame(d, t.StringType()) - >>> df.withColumn('normalised', GWASCatalogCuratedAssociationsParser._normalise_risk_allele(f.col('value'))).show() + >>> df.withColumn('normalised', GWASCatalogCuratedAssociationsParser._extract_risk_allele(f.col('value'))).show() +------------------+----------+ | value|normalised| +------------------+----------+ @@ -164,7 +164,6 @@ def _normalise_risk_allele(risk_allele: Column) -> Column: |rs1234-A; rs1235-G| A| +------------------+----------+ - """ # GWAS Catalog to risk allele mapping return f.split(f.split(risk_allele, "; ").getItem(0), "-").getItem(1) @@ -195,7 +194,7 @@ def _collect_rsids( return f.array_distinct(f.array(snp_id, snp_id_current, risk_allele)) @staticmethod - def _map_variants_to_variant_index( + def _map_variants_to_gnomad_variants( gwas_associations: DataFrame, variant_index: VariantIndex ) -> DataFrame: """Add variant metadata in associations. @@ -220,7 +219,7 @@ def _map_variants_to_variant_index( f.col("SNP_ID_CURRENT"), f.split(f.col("STRONGEST SNP-RISK ALLELE"), "; ").getItem(0), ).alias("rsIdsGwasCatalog"), - GWASCatalogCuratedAssociationsParser._normalise_risk_allele( + GWASCatalogCuratedAssociationsParser._extract_risk_allele( f.col("STRONGEST SNP-RISK ALLELE") ).alias("riskAllele"), ) @@ -570,175 +569,107 @@ def _are_alleles_palindromic( @staticmethod def _harmonise_beta( - risk_allele: Column, - reference_allele: Column, - alternate_allele: Column, effect_size: Column, confidence_interval: Column, + flipping_needed: Column, ) -> Column: - """A function to extract the beta value from the effect size and confidence interval. + """A function to extract the beta value from the effect size and confidence interval and harmonises for the alternate allele. If the confidence interval contains the word "increase" or "decrease" it indicates, we are dealing with betas. - If it's "increase" and the effect size needs to be harmonized, then multiply the effect size by -1 + If it's "increase" and the effect size needs to be harmonized, then multiply the effect size by -1. + The sign of the effect size is flipped if the confidence interval contains "decrease". + + eg. if the reported value is 0.5, and the confidence interval tells "decrease"? -> beta is -0.5 Args: - risk_allele (Column): Risk allele column - reference_allele (Column): Reference allele column - alternate_allele (Column): Alternate allele column - effect_size (Column): GWAS Catalog effect size column - confidence_interval (Column): GWAS Catalog confidence interval column + effect_size (Column): GWAS Catalog effect size column. + confidence_interval (Column): GWAS Catalog confidence interval column to know the direction of the effect. + flipping_needed (Column): Boolean flag indicating if effect needs to be flipped based on the alleles. Returns: Column: A column containing the beta value. + + Examples: + >>> d = [ + ... # positive effect -no flipping: + ... (0.5, 'increase', False), + ... # Positive effect - flip: + ... (0.5, 'decrease', False), + ... # Positive effect - flip: + ... (0.5, 'decrease', True), + ... # Negative effect - no flip: + ... (0.5, 'increase', True), + ... # Negative effect - flip: + ... (0.5, 'decrease', False), + ... ] + >>> ( + ... spark.createDataFrame(d, ['effect', 'ci_text', 'flip']) + ... .select("effect", "ci_text", 'flip', GWASCatalogCuratedAssociationsParser._harmonise_beta(f.col("effect"), f.col("ci_text"), f.lit(False)).alias("beta")) + ... .show() + ... ) + +------+--------+-----+----+ + |effect| ci_text| flip|beta| + +------+--------+-----+----+ + | 0.5|increase|false| 0.5| + | 0.5|decrease|false|-0.5| + | 0.5|decrease| true|-0.5| + | 0.5|increase| true| 0.5| + | 0.5|decrease|false|-0.5| + +------+--------+-----+----+ + """ return ( f.when( - GWASCatalogCuratedAssociationsParser._are_alleles_palindromic( - reference_allele, alternate_allele - ), - None, - ) - .when( - ( - GWASCatalogCuratedAssociationsParser._effect_needs_harmonisation( - risk_allele, reference_allele - ) - & confidence_interval.contains("increase") - ) - | ( - ~GWASCatalogCuratedAssociationsParser._effect_needs_harmonisation( - risk_allele, reference_allele - ) - & confidence_interval.contains("decrease") - ), + (flipping_needed & confidence_interval.contains("increase")) + | (~flipping_needed & confidence_interval.contains("decrease")), -effect_size, ) .otherwise(effect_size) .cast(DoubleType()) ) - @staticmethod - def _harmonise_beta_ci( - risk_allele: Column, - reference_allele: Column, - alternate_allele: Column, - effect_size: Column, - confidence_interval: Column, - p_value: Column, - direction: str, - ) -> Column: - """Calculating confidence intervals for beta values. - - Args: - risk_allele (Column): Risk allele column - reference_allele (Column): Reference allele column - alternate_allele (Column): Alternate allele column - effect_size (Column): GWAS Catalog effect size column - confidence_interval (Column): GWAS Catalog confidence interval column - p_value (Column): GWAS Catalog p-value column - direction (str): This is the direction of the confidence interval. It can be either "upper" or "lower". - - Returns: - Column: The upper and lower bounds of the confidence interval for the beta coefficient. - """ - zscore_95 = f.lit(1.96) - beta = GWASCatalogCuratedAssociationsParser._harmonise_beta( - risk_allele, - reference_allele, - alternate_allele, - effect_size, - confidence_interval, - ) - zscore = pvalue_to_zscore(p_value) - return ( - f.when(f.lit(direction) == "upper", beta + f.abs(zscore_95 * beta) / zscore) - .when(f.lit(direction) == "lower", beta - f.abs(zscore_95 * beta) / zscore) - .otherwise(None) - ) - @staticmethod def _harmonise_odds_ratio( - risk_allele: Column, - reference_allele: Column, - alternate_allele: Column, effect_size: Column, - confidence_interval: Column, + flipping_needed: Column, ) -> Column: - """Harmonizing odds ratio. + """Odds ratio is either propagated as is, or flipped if indicated, meaning returning a reciprocal value. Args: - risk_allele (Column): Risk allele column - reference_allele (Column): Reference allele column - alternate_allele (Column): Alternate allele column - effect_size (Column): GWAS Catalog effect size column - confidence_interval (Column): GWAS Catalog confidence interval column + effect_size (Column): containing effect size, + flipping_needed (Column): Boolean flag indicating if effect needs to be flipped Returns: Column: A column with the odds ratio, or 1/odds_ratio if harmonization required. + + Examples: + >>> d = [(0.5, False), (0.5, True), (0.0, False), (0.0, True)] + >>> ( + ... spark.createDataFrame(d, ['effect', 'flip']) + ... .select("effect", "flip", GWASCatalogCuratedAssociationsParser._harmonise_odds_ratio(f.col("effect"), f.col("flip")).alias("odds_ratio")) + ... .show() + ... ) + +------+-----+----------+ + |effect| flip|odds_ratio| + +------+-----+----------+ + | 0.5|false| 0.5| + | 0.5| true| 2.0| + | 0.0|false| 0.0| + | 0.0| true| null| + +------+-----+----------+ + """ return ( - f.when( - GWASCatalogCuratedAssociationsParser._are_alleles_palindromic( - reference_allele, alternate_allele - ), - None, - ) + # We are not flipping zero effect size: + f.when((effect_size.cast(DoubleType()) == 0) & flipping_needed, f.lit(None)) .when( - ( - GWASCatalogCuratedAssociationsParser._effect_needs_harmonisation( - risk_allele, reference_allele - ) - & ~confidence_interval.rlike("|".join(["decrease", "increase"])) - ), + flipping_needed, 1 / effect_size, ) .otherwise(effect_size) .cast(DoubleType()) ) - @staticmethod - def _harmonise_odds_ratio_ci( - risk_allele: Column, - reference_allele: Column, - alternate_allele: Column, - effect_size: Column, - confidence_interval: Column, - p_value: Column, - direction: str, - ) -> Column: - """Calculating confidence intervals for beta values. - - Args: - risk_allele (Column): Risk allele column - reference_allele (Column): Reference allele column - alternate_allele (Column): Alternate allele column - effect_size (Column): GWAS Catalog effect size column - confidence_interval (Column): GWAS Catalog confidence interval column - p_value (Column): GWAS Catalog p-value column - direction (str): This is the direction of the confidence interval. It can be either "upper" or "lower". - - Returns: - Column: The upper and lower bounds of the 95% confidence interval for the odds ratio. - """ - zscore_95 = f.lit(1.96) - odds_ratio = GWASCatalogCuratedAssociationsParser._harmonise_odds_ratio( - risk_allele, - reference_allele, - alternate_allele, - effect_size, - confidence_interval, - ) - odds_ratio_estimate = f.log(odds_ratio) - zscore = pvalue_to_zscore(p_value) - odds_ratio_se = odds_ratio_estimate / zscore - return f.when( - f.lit(direction) == "upper", - f.exp(odds_ratio_estimate + f.abs(zscore_95 * odds_ratio_se)), - ).when( - f.lit(direction) == "lower", - f.exp(odds_ratio_estimate - f.abs(zscore_95 * odds_ratio_se)), - ) - @staticmethod def _concatenate_substudy_description( association_trait: Column, pvalue_text: Column, mapped_trait_uri: Column @@ -988,11 +919,177 @@ def _qc_palindromic_alleles( StudyLocusQualityCheck.PALINDROMIC_ALLELE_FLAG, ) + @staticmethod + def _get_effect_type(ci_text: Column) -> Column: + """Extracts the effect type from the 95% CI text. + + The GWAS Catalog confidence interval column contains text that can be used to infer the effect type. + If the text contains "increase" or "decrease", the effect type is beta, otherwise it is odds ratio. + Null columns return null as the effect type. + + Args: + ci_text (Column): Column containing the 95% CI text. + + Returns: + Column: A column containing the effect type. + + Examples: + >>> data = [{"ci_text": "95% CI: [0.1-0.2]"}, {"ci_text": "95% CI: [0.1-0.2] increase"}, {"ci_text": "95% CI: [0.1-0.2] decrease"}, {"ci_text": None}] + >>> spark.createDataFrame(data).select('ci_text', GWASCatalogCuratedAssociationsParser._get_effect_type(f.col('ci_text')).alias('effect_type')).show(truncate=False) + +--------------------------+-----------+ + |ci_text |effect_type| + +--------------------------+-----------+ + |95% CI: [0.1-0.2] |odds_ratio | + |95% CI: [0.1-0.2] increase|beta | + |95% CI: [0.1-0.2] decrease|beta | + |null |null | + +--------------------------+-----------+ + + + """ + return f.when( + f.lower(ci_text).contains("increase") + | f.lower(ci_text).contains("decrease"), + f.lit("beta"), + ).when(ci_text.isNotNull(), f.lit("odds_ratio")) + + @staticmethod + def harmonise_association_effect_to_beta( + df: DataFrame, + ) -> DataFrame: + """Harmonise effect to beta value. + + The harmonisation process has a number of steps: + - Extracting the reported effect allele. + - Flagging palindromic alleles. + - Flagging associations where the effect direction needs to be flipped. + - Flagging the effect type. + - Getting the standard error from the confidence interval text. + - Harmonising both beta and odds ratio. + - Converting the odds ratio to beta. + + Args: + df (DataFrame): DataFrame with the following columns: + + Returns: + DataFrame: DataFrame with the following columns: + + Raises: + ValueError: If any of the required columns are missing. + + Examples: + >>> data = [ + ... # Flagged as palindromic: + ... ('rs123-T', 'A', 'T', '0.1', '[0.08-0.12] unit increase'), + ... # Not palindromic, beta needs to be flipped: + ... ('rs123-C', 'G', 'T', '0.1', '[0.08-0.12] unit increase'), + ... # Beta is not flipped: + ... ('rs123-T', 'C', 'T', '0.1', '[0.08-0.12] unit increase'), + ... # odds ratio: + ... ('rs123-T', 'C', 'T', '0.1', '[0.08-0.12]'), + ... # odds ratio flipped: + ... ('rs123-C', 'G', 'T', '0.1', '[0.08-0.12]'), + ... ] + >>> schema = ["STRONGEST SNP-RISK ALLELE", "referenceAllele", "alternateAllele", "OR or BETA", "95% CI (TEXT)"] + >>> df = spark.createDataFrame(data, schema) + >>> GWASCatalogCuratedAssociationsParser.harmonise_association_effect_to_beta(df).show() + +-------------------------+---------------+---------------+----------+--------------------+-------------------+--------------------+ + |STRONGEST SNP-RISK ALLELE|referenceAllele|alternateAllele|OR or BETA| 95% CI (TEXT)| beta| standardError| + +-------------------------+---------------+---------------+----------+--------------------+-------------------+--------------------+ + | rs123-T| A| T| 0.1|[0.08-0.12] unit ...| null| null| + | rs123-C| G| T| 0.1|[0.08-0.12] unit ...| -0.1|0.010204081404574064| + | rs123-T| C| T| 0.1|[0.08-0.12] unit ...| 0.1|0.010204081404574064| + | rs123-T| C| T| 0.1| [0.08-0.12]|-2.3025850929940455| null| + | rs123-C| G| T| 0.1| [0.08-0.12]| 2.302585092994046| null| + +-------------------------+---------------+---------------+----------+--------------------+-------------------+--------------------+ + + """ + # Testing if all columns are in the dataframe: + required_columns = [ + "STRONGEST SNP-RISK ALLELE", + "referenceAllele", + "alternateAllele", + "OR or BETA", + "95% CI (TEXT)", + ] + + for column in required_columns: + if column not in df.columns: + raise ValueError( + f"Column {column} is required for harmonising effect to beta value." + ) + + return ( + df.withColumn( + "reportedRiskAllele", + GWASCatalogCuratedAssociationsParser._extract_risk_allele( + f.col("STRONGEST SNP-RISK ALLELE") + ), + ) + .withColumns( + { + # Flag palindromic alleles: + "isAllelePalindromic": GWASCatalogCuratedAssociationsParser._are_alleles_palindromic( + f.col("referenceAllele"), f.col("alternateAllele") + ), + # Flag associations, where the effect direction needs to be flipped: + "needsFlipping": GWASCatalogCuratedAssociationsParser._effect_needs_harmonisation( + f.col("reportedRiskAllele"), f.col("referenceAllele") + ), + # Flag effect type: + "effectType": GWASCatalogCuratedAssociationsParser._get_effect_type( + f.col("95% CI (TEXT)") + ), + # Get standard error from confidence interval text: + "standardError": get_standard_error_from_confidence_interval( + f.regexp_extract( + "95% CI (TEXT)", r"\[(\d+\.*\d*)-\d+\.*\d*\]", 1 + ).cast(FloatType()), + f.regexp_extract( + "95% CI (TEXT)", r"\[\d+\.*\d*-(\d+\.*\d*)\]", 1 + ).cast(FloatType()), + ), + } + ) + # Harmonise both beta and odds ratio: + .withColumns( + { # Normalise beta value of the association: + "effect_beta": f.when( + (f.col("effectType") == "beta") + & (~f.col("isAllelePalindromic")), + GWASCatalogCuratedAssociationsParser._harmonise_beta( + f.col("OR or BETA"), + f.col("95% CI (TEXT)"), + f.col("needsFlipping"), + ), + ), + # Normalise odds ratio of the association: + "effect_odds_ratio": f.when( + (f.col("effectType") == "odds_ratio") + & (~f.col("isAllelePalindromic")), + GWASCatalogCuratedAssociationsParser._harmonise_odds_ratio( + f.col("OR or BETA"), + f.col("needsFlipping"), + ), + ), + }, + ) + .select( + *df.columns, + # Harmonise OR effect to beta: + *convert_odds_ratio_to_beta( + f.col("effect_beta"), + f.col("effect_odds_ratio"), + f.col("standardError"), + ), + ) + ) + @classmethod def from_source( cls: type[GWASCatalogCuratedAssociationsParser], gwas_associations: DataFrame, - variant_index: VariantIndex, + gnomad_variants: VariantIndex, pvalue_threshold: float = WindowBasedClumpingStepConfig.gwas_significance, ) -> StudyLocusGWASCatalog: """Read GWASCatalog associations. @@ -1002,7 +1099,7 @@ def from_source( Args: gwas_associations (DataFrame): GWAS Catalog raw associations dataset. - variant_index (VariantIndex): Variant index dataset with available allele frequencies. + gnomad_variants (VariantIndex): Variant dataset from GnomAD, with allele frequencies. pvalue_threshold (float): P-value threshold for flagging associations. Returns: @@ -1017,26 +1114,29 @@ def from_source( .transform( # Map/harmonise variants to variant annotation dataset: # This function adds columns: variantId, referenceAllele, alternateAllele, chromosome, position - lambda df: GWASCatalogCuratedAssociationsParser._map_variants_to_variant_index( - df, variant_index + lambda df: GWASCatalogCuratedAssociationsParser._map_variants_to_gnomad_variants( + df, gnomad_variants ) ) - .withColumn( + .withColumns( # Perform all quality control checks: - "qualityControls", - GWASCatalogCuratedAssociationsParser._qc_all( - f.array().alias("qualityControls"), - f.col("CHR_ID"), - f.col("CHR_POS").cast(IntegerType()), - f.col("referenceAllele"), - f.col("alternateAllele"), - f.col("STRONGEST SNP-RISK ALLELE"), - *GWASCatalogCuratedAssociationsParser._parse_pvalue( - f.col("P-VALUE") - ), - pvalue_threshold, - ), + { + "qualityControls": GWASCatalogCuratedAssociationsParser._qc_all( + f.array().alias("qualityControls"), + f.col("CHR_ID"), + f.col("CHR_POS").cast(IntegerType()), + f.col("referenceAllele"), + f.col("alternateAllele"), + f.col("STRONGEST SNP-RISK ALLELE"), + *GWASCatalogCuratedAssociationsParser._parse_pvalue( + f.col("P-VALUE") + ), + pvalue_threshold, + ) + } ) + # Harmonising effect to beta value and flip effect if needed: + .transform(cls.harmonise_association_effect_to_beta) .select( # INSIDE STUDY-LOCUS SCHEMA: "studyLocusId", @@ -1045,16 +1145,6 @@ def from_source( "chromosome", "position", f.col("STUDY ACCESSION").alias("studyId"), - # beta value of the association - GWASCatalogCuratedAssociationsParser._harmonise_beta( - GWASCatalogCuratedAssociationsParser._normalise_risk_allele( - f.col("STRONGEST SNP-RISK ALLELE") - ), - f.col("referenceAllele"), - f.col("alternateAllele"), - f.col("OR or BETA"), - f.col("95% CI (TEXT)"), - ).alias("beta"), # p-value of the association, string: split into exponent and mantissa. *GWASCatalogCuratedAssociationsParser._parse_pvalue(f.col("P-VALUE")), # Capturing phenotype granularity at the association level @@ -1065,6 +1155,8 @@ def from_source( ).alias("subStudyDescription"), # Quality controls (array of strings) "qualityControls", + "beta", + "standardError", ), _schema=StudyLocusGWASCatalog.get_schema(), ) diff --git a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py index 2179150cd..fec181543 100644 --- a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py +++ b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py @@ -69,7 +69,7 @@ def test_map_variants_to_variant_index( ) -> None: """Test mapping to variant annotation variants.""" assert isinstance( - GWASCatalogCuratedAssociationsParser._map_variants_to_variant_index( + GWASCatalogCuratedAssociationsParser._map_variants_to_gnomad_variants( sample_gwas_catalog_associations.withColumn( "studyLocusId", f.monotonically_increasing_id().cast(LongType()) ), From c292e849a2ffd88c62155c511d3c311a6579a54b Mon Sep 17 00:00:00 2001 From: Kirill Tsukanov Date: Thu, 19 Sep 2024 13:24:41 +0100 Subject: [PATCH 041/188] perf: quickly build a Docker image for every branch (#773) * perf: cache Docker image layers * chore: temporary add branch to test * perf: separate AMD and ARM builds for gentropy * perf: temporarily test only VEP image separately * perf: test ARM/AMD gentropy/VEP separately * perf: introduce quick build for all commits * perf: quick build for all tags * fix: use correct syntax for tags * fix: revert accidental changes for the VEP step --- .github/workflows/artifact.yml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/artifact.yml b/.github/workflows/artifact.yml index 665dab0d7..61dbab28e 100644 --- a/.github/workflows/artifact.yml +++ b/.github/workflows/artifact.yml @@ -2,8 +2,8 @@ name: Build and Push to Artifact Registry "on": push: - branches: ["dev"] - tags: ["v*"] + branches: ["*"] + tags: ["*"] env: PROJECT_ID: open-targets-genetics-dev @@ -39,7 +39,18 @@ jobs: run: |- gcloud auth configure-docker ${{ env.REGION }}-docker.pkg.dev --quiet + - name: Quick Docker build (gentropy only, AMD64 only, with layer cache) + uses: docker/build-push-action@v6 + with: + platforms: linux/amd64 + push: true + tags: "${{ env.GAR_LOCATION }}/${{ env.REPOSITORY }}/gentropy:${{ github.ref_name }}" + context: . + cache-from: type=gha + cache-to: type=gha,mode=max + - name: Build and push gentropy image + if: github.ref == 'refs/heads/dev' || startsWith(github.ref, 'refs/tags/v') uses: docker/build-push-action@v6 with: platforms: linux/amd64,linux/arm64 @@ -48,6 +59,7 @@ jobs: context: . - name: Build and push VEP image + if: github.ref == 'refs/heads/dev' || startsWith(github.ref, 'refs/tags/v') uses: docker/build-push-action@v6 with: platforms: linux/amd64 From 785dad067c58edb1646af3d89a1580687685b400 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Fri, 20 Sep 2024 12:23:20 +0100 Subject: [PATCH 042/188] feat: flag all top-hits from GWAS catalog curation (#775) --- src/gentropy/dataset/study_locus.py | 4 +++- .../datasource/gwas_catalog/associations.py | 18 ++++++++++++++++++ .../datasource/gwas_catalog/study_splitter.py | 4 +++- .../test_gwas_catalog_associations.py | 9 +++++++++ 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 48f6ee8be..283280527 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -48,8 +48,9 @@ class StudyLocusQualityCheck(Enum): NOT_QUALIFYING_LD_BLOCK (str): LD block does not contain variants at the required R^2 threshold FAILED_STUDY (str): Flagging study loci if the study has failed QC MISSING_STUDY (str): Flagging study loci if the study is not found in the study index as a reference - DUPLICATED_STUDYLOCUS_ID (str): Study-locus identifier is not unique. + DUPLICATED_STUDYLOCUS_ID (str): Study-locus identifier is not unique INVALID_VARIANT_IDENTIFIER (str): Flagging study loci where identifier of any tagging variant was not found in the variant index + TOP_HIT (str): Study locus from curated top hit IN_MHC (str): Flagging study loci in the MHC region """ @@ -73,6 +74,7 @@ class StudyLocusQualityCheck(Enum): "Some variant identifiers of this locus were not found in variant index" ) IN_MHC = "MHC region" + TOP_HIT = "Study locus from curated top hit" class CredibleInterval(Enum): diff --git a/src/gentropy/datasource/gwas_catalog/associations.py b/src/gentropy/datasource/gwas_catalog/associations.py index 6781d045a..dd9aa3fe2 100644 --- a/src/gentropy/datasource/gwas_catalog/associations.py +++ b/src/gentropy/datasource/gwas_catalog/associations.py @@ -1212,6 +1212,24 @@ def qc_ambiguous_study(self: StudyLocusGWASCatalog) -> StudyLocusGWASCatalog: ) return self + def qc_flag_all_tophits(self: StudyLocusGWASCatalog) -> StudyLocusGWASCatalog: + """Flag all associations as top hits. + + Returns: + StudyLocusGWASCatalog: Updated study locus. + """ + return StudyLocusGWASCatalog( + _df=self._df.withColumn( + "qualityControls", + StudyLocus.update_quality_flag( + f.col("qualityControls"), + f.lit(True), + StudyLocusQualityCheck.TOP_HIT, + ), + ), + _schema=StudyLocusGWASCatalog.get_schema(), + ) + def apply_inclusion_list( self: StudyLocusGWASCatalog, inclusion_list: DataFrame ) -> StudyLocusGWASCatalog: diff --git a/src/gentropy/datasource/gwas_catalog/study_splitter.py b/src/gentropy/datasource/gwas_catalog/study_splitter.py index 882a9893a..0b11e797a 100644 --- a/src/gentropy/datasource/gwas_catalog/study_splitter.py +++ b/src/gentropy/datasource/gwas_catalog/study_splitter.py @@ -132,5 +132,7 @@ def split( st_ass.select( "updatedStudyId", "studyId", "subStudyDescription" ).distinct() - ).qc_ambiguous_study(), + ) + .qc_ambiguous_study() + .qc_flag_all_tophits(), ) diff --git a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py index fec181543..130097f25 100644 --- a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py +++ b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py @@ -77,3 +77,12 @@ def test_map_variants_to_variant_index( ), DataFrame, ) + + +def test_qc_flag_all_tophits( + mock_study_locus_gwas_catalog: StudyLocusGWASCatalog, +) -> None: + """Test qc flag all top hits.""" + assert isinstance( + mock_study_locus_gwas_catalog.qc_flag_all_tophits(), StudyLocusGWASCatalog + ) From 018defa61e8faff3808c02635a5b468726e6c0db Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Fri, 20 Sep 2024 14:06:26 +0100 Subject: [PATCH 043/188] docs: fix broken refs (#768) Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- src/gentropy/dataset/summary_statistics.py | 2 +- src/gentropy/method/sumstat_imputation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gentropy/dataset/summary_statistics.py b/src/gentropy/dataset/summary_statistics.py index b9a44ff34..d0875fe85 100644 --- a/src/gentropy/dataset/summary_statistics.py +++ b/src/gentropy/dataset/summary_statistics.py @@ -91,7 +91,7 @@ def locus_breaker_clumping( ) -> StudyLocus: """Generate study-locus from summary statistics using locus-breaker clumping method with locus boundaries. - For more info, see [`locus_breaker`][gentropy.method.locus_breaker_clumping.locus_breaker] + For more info, see [`locus_breaker`][gentropy.method.locus_breaker_clumping.LocusBreakerClumping] Args: baseline_pvalue_cutoff (float, optional): Baseline significance we consider for the locus. diff --git a/src/gentropy/method/sumstat_imputation.py b/src/gentropy/method/sumstat_imputation.py index 8375fa84f..b53295560 100644 --- a/src/gentropy/method/sumstat_imputation.py +++ b/src/gentropy/method/sumstat_imputation.py @@ -24,7 +24,7 @@ def raiss_model( Args: z_scores_known (np.ndarray): the vector of known Z scores ld_matrix_known (np.ndarray) : the matrix of known LD correlations - ld_matrix_known_missing (np.ndarray): LD matrix of known SNPs with other unknown SNPs in large matrix (similar to ld[unknowns, :][:,known]) + ld_matrix_known_missing (np.ndarray): LD matrix of known SNPs with other unknown SNPs in large matrix (similar to `ld[unknowns, :][:,known]`) lamb (float): size of the small value added to the diagonal of the covariance matrix before inversion. Defaults to 0.01. rtol (float): threshold to filter eigenvectos by its eigenvalue. It makes an inversion biased but much more numerically robust. Default to 0.01. From ad3f503ae988f7b09fb6723ca2c6a8c7ba0b8d90 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Fri, 20 Sep 2024 15:29:30 +0100 Subject: [PATCH 044/188] feat: flag PICS top hits in studies with credset sumstats (#777) Co-authored-by: Daniel Suveges --- src/gentropy/dataset/study_locus.py | 42 +++++++++++++ src/gentropy/study_locus_validation.py | 1 + tests/gentropy/dataset/test_study_locus.py | 71 ++++++++++++++++++++++ 3 files changed, 114 insertions(+) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 283280527..e2f9dfece 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -52,6 +52,7 @@ class StudyLocusQualityCheck(Enum): INVALID_VARIANT_IDENTIFIER (str): Flagging study loci where identifier of any tagging variant was not found in the variant index TOP_HIT (str): Study locus from curated top hit IN_MHC (str): Flagging study loci in the MHC region + REDUNDANT_PICS_TOP_HIT (str): Flagging study loci in studies with PICS results from summary statistics """ SUBSIGNIFICANT_FLAG = "Subsignificant p-value" @@ -74,6 +75,9 @@ class StudyLocusQualityCheck(Enum): "Some variant identifiers of this locus were not found in variant index" ) IN_MHC = "MHC region" + REDUNDANT_PICS_TOP_HIT = ( + "PICS results from summary statistics available for this same study" + ) TOP_HIT = "Study locus from curated top hit" @@ -878,6 +882,44 @@ def qc_MHC_region(self: StudyLocus) -> StudyLocus: ) return self + def qc_redundant_top_hits_from_PICS(self: StudyLocus) -> StudyLocus: + """Flag associations from top hits when the study contains other PICS associations from summary statistics. + + This flag can be useful to identify top hits that should be explained by other associations in the study derived from the summary statistics. + + Returns: + StudyLocus: Updated study locus with redundant top hits flagged. + """ + studies_with_pics_sumstats = ( + self.df.filter(f.col("finemappingMethod") == "pics") + # Returns True if the study contains any PICS associations from summary statistics + .withColumn( + "hasPicsSumstats", + ~f.array_contains( + "qualityControls", StudyLocusQualityCheck.TOP_HIT.value + ), + ) + .groupBy("studyId") + .agg(f.max(f.col("hasPicsSumstats")).alias("studiesWithPicsSumstats")) + ) + + return StudyLocus( + _df=self.df.join(studies_with_pics_sumstats, on="studyId", how="left") + .withColumn( + "qualityControls", + self.update_quality_flag( + f.col("qualityControls"), + f.array_contains( + "qualityControls", StudyLocusQualityCheck.TOP_HIT.value + ) + & f.col("studiesWithPicsSumstats"), + StudyLocusQualityCheck.REDUNDANT_PICS_TOP_HIT, + ), + ) + .drop("studiesWithPicsSumstats"), + _schema=StudyLocus.get_schema(), + ) + def _qc_no_population(self: StudyLocus) -> StudyLocus: """Flag associations where the study doesn't have population information to resolve LD. diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index da660ca57..e3d10f3db 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -46,6 +46,7 @@ def __init__( # Add flag for MHC region .qc_MHC_region() .validate_study(study_index) # Flagging studies not in study index + .qc_redundant_top_hits_from_PICS() # Flagging top hits from studies with PICS summary statistics .validate_unique_study_locus_id() # Flagging duplicated study locus ids ).persist() # we will need this for 2 types of outputs diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index 1daf9bb89..94390d20b 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -778,3 +778,74 @@ def test_study_validation_correctness(self: TestStudyLocusValidation) -> None: ) .count() ) == 1 + + +class TestStudyLocusRedundancyFlagging: + """Collection of tests related to flagging redundant credible sets.""" + + STUDY_LOCUS_DATA = [ + (1, "v1", "s1", "pics", []), + (2, "v2", "s1", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + (3, "v3", "s1", "pics", []), + (3, "v3", "s1", "pics", []), + (1, "v1", "s1", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + (1, "v1", "s2", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + (1, "v1", "s2", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + (1, "v1", "s3", "SuSie", []), + (1, "v1", "s3", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + (1, "v1", "s4", "pics", []), + (1, "v1", "s4", "SuSie", []), + (1, "v1", "s4", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + ] + + STUDY_LOCUS_SCHEMA = t.StructType( + [ + t.StructField("studyLocusId", t.LongType(), False), + t.StructField("variantId", t.StringType(), False), + t.StructField("studyId", t.StringType(), False), + t.StructField("finemappingMethod", t.StringType(), False), + t.StructField("qualityControls", t.ArrayType(t.StringType()), False), + ] + ) + + @pytest.fixture(autouse=True) + def _setup(self: TestStudyLocusRedundancyFlagging, spark: SparkSession) -> None: + """Setup study locus for testing.""" + self.study_locus = StudyLocus( + _df=spark.createDataFrame( + self.STUDY_LOCUS_DATA, schema=self.STUDY_LOCUS_SCHEMA + ), + _schema=StudyLocus.get_schema(), + ) + + def test_qc_redundant_top_hits_from_PICS_returntype( + self: TestStudyLocusRedundancyFlagging, + ) -> None: + """Test qc_redundant_top_hits_from_PICS.""" + assert isinstance( + self.study_locus.qc_redundant_top_hits_from_PICS(), StudyLocus + ) + + def test_qc_redundant_top_hits_from_PICS_no_data_loss( + self: TestStudyLocusRedundancyFlagging, + ) -> None: + """Testing if the redundancy flagging returns the same number of rows.""" + assert ( + self.study_locus.qc_redundant_top_hits_from_PICS().df.count() + == self.study_locus.df.count() + ) + + def test_qc_redundant_top_hits_from_PICS_correctness( + self: TestStudyLocusRedundancyFlagging, + ) -> None: + """Testing if the study validation flags the right number of studies.""" + assert ( + self.study_locus.qc_redundant_top_hits_from_PICS() + .df.filter( + f.array_contains( + f.col("qualityControls"), + StudyLocusQualityCheck.REDUNDANT_PICS_TOP_HIT.value, + ) + ) + .count() + ) == 3 From 58fb726a867d9b25f55f4ca6873b9a08adb3a8f5 Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Mon, 23 Sep 2024 13:16:35 +0100 Subject: [PATCH 045/188] fix(ld clumping): a revised logic allows a more accurate clumping (#772) * fix(ld clumping): a revised logic allows a more accurate identification of linked loci * test: adding some test * chore: pre-commit auto fixes [...] * test: adding more test cases * fix: fixing test data --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- src/gentropy/dataset/study_locus.py | 8 +- src/gentropy/method/clump.py | 45 ++--- tests/gentropy/method/test_clump.py | 250 ++++++++++++---------------- 3 files changed, 131 insertions(+), 172 deletions(-) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index e2f9dfece..9ab19dab6 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -798,11 +798,12 @@ def clump(self: StudyLocus) -> StudyLocus: Returns: StudyLocus: with empty credible sets for linked variants and QC flag. """ - self.df = ( + clumped_df = ( self.df.withColumn( "is_lead_linked", LDclumping._is_lead_linked( self.df.studyId, + self.df.chromosome, self.df.variantId, self.df.pValueExponent, self.df.pValueMantissa, @@ -823,7 +824,10 @@ def clump(self: StudyLocus) -> StudyLocus: ) .drop("is_lead_linked") ) - return self + return StudyLocus( + _df=clumped_df, + _schema=self.get_schema(), + ) def exclude_region( self: StudyLocus, region: GenomicRegion, exclude_overlap: bool = False diff --git a/src/gentropy/method/clump.py b/src/gentropy/method/clump.py index d239c062c..442d9d611 100644 --- a/src/gentropy/method/clump.py +++ b/src/gentropy/method/clump.py @@ -1,6 +1,5 @@ """Clumps GWAS significant variants to generate a studyLocus dataset of independent variants.""" - from __future__ import annotations from typing import TYPE_CHECKING @@ -20,6 +19,7 @@ class LDclumping: @staticmethod def _is_lead_linked( study_id: Column, + chromosome: Column, variant_id: Column, p_value_exponent: Column, p_value_mantissa: Column, @@ -29,6 +29,7 @@ def _is_lead_linked( Args: study_id (Column): studyId + chromosome (Column): chromosome variant_id (Column): Lead variant id p_value_exponent (Column): p-value exponent p_value_mantissa (Column): p-value mantissa @@ -37,31 +38,31 @@ def _is_lead_linked( Returns: Column: Boolean in which True indicates that the lead is linked to another tag in the same dataset. """ - leads_in_study = f.collect_set(variant_id).over(Window.partitionBy(study_id)) - tags_in_studylocus = f.array_union( - # Get all tag variants from the credible set per studyLocusId - f.transform(ld_set, lambda x: x.tagVariantId), - # And append the lead variant so that the intersection is the same for all studyLocusIds in a study - f.array(variant_id), - ) - intersect_lead_tags = f.array_sort( - f.array_intersect(leads_in_study, tags_in_studylocus) + # Partitoning data by study and chromosome - this is the scope for looking for linked loci. + # Within the partition, we order the data by increasing p-value, and we collect the more significant lead variants in the window. + windowspec = ( + Window.partitionBy(study_id, chromosome) + .orderBy(p_value_exponent.asc(), p_value_mantissa.asc()) + .rowsBetween(Window.unboundedPreceding, Window.currentRow) ) - return ( - # If the lead is in the credible set, we rank the peaks by p-value - f.when( - f.size(intersect_lead_tags) > 0, - f.row_number().over( - Window.partitionBy(study_id, intersect_lead_tags).orderBy( - p_value_exponent, p_value_mantissa - ) - ) - > 1, + more_significant_leads = f.collect_set(variant_id).over(windowspec) + + # Collect all variants from the ld_set + adding the lead variant to the list to make sure that the lead is always in the list. + tags_in_studylocus = f.array_distinct( + f.array_union( + f.array(variant_id), + f.transform(ld_set, lambda x: x.getField("tagVariantId")), ) - # If the intersection is empty (lead is not in the credible set or cred set is empty), the association is not linked - .otherwise(f.lit(False)) ) + # If more than one tags of the ld_set can be found in the list of the more significant leads, the lead is linked. + # Study loci without variantId is considered as not linked. + # Also leads that were not found in the LD index is also considered as not linked. + return f.when( + variant_id.isNotNull(), + f.size(f.array_intersect(more_significant_leads, tags_in_studylocus)) > 1, + ).otherwise(f.lit(False)) + @classmethod def clump(cls: type[LDclumping], associations: StudyLocus) -> StudyLocus: """Perform clumping on studyLocus dataset. diff --git a/tests/gentropy/method/test_clump.py b/tests/gentropy/method/test_clump.py index 4616c5c6f..1e754df3a 100644 --- a/tests/gentropy/method/test_clump.py +++ b/tests/gentropy/method/test_clump.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING import pyspark.sql.functions as f import pyspark.sql.types as t @@ -20,142 +20,77 @@ def test_clump(mock_study_locus: StudyLocus) -> None: assert isinstance(LDclumping.clump(mock_study_locus), StudyLocus) -@pytest.mark.parametrize( - ("observed_data", "expected_data"), - [ +class TestIsLeadLinked: + """Testing the is_lead_linked method.""" + + DATA = [ + # Linked to V2: ( - [ - ( - # Dependent locus - lead is correlated with a more significant variant - 1, - "L1", - "GCST005650_1", - 1.0, - -17, - [{"tagVariantId": "T1"}, {"tagVariantId": "L2"}], - None, - ), - ( - # Dependent locus - lead shows a stronger association than the row above - 2, - "L2", - "GCST005650_1", - 4.0, - -18, - [ - {"tagVariantId": "T2"}, - {"tagVariantId": "T3"}, - {"tagVariantId": "L1"}, - ], - None, - ), - ( - # Independent locus - 3, - "L2", - "GCST005650_1", - 4.0, - -18, - [ - {"tagVariantId": "L3"}, - {"tagVariantId": "T4"}, - {"tagVariantId": "L5"}, - ], - None, - ), - ( - # Empty credible set - 4, - "L3", - "GCST005650_1", - 4.0, - -18, - [], - None, - ), - ( - # Null credible set - 5, - "L4", - "GCST005650_1", - 4.0, - -18, - None, - None, - ), - ], - [ - ( - # Signal is linked to the next row - 1, - "L1", - "GCST005650_1", - 1.0, - -17, - [{"tagVariantId": "T1"}, {"tagVariantId": "L2"}], - True, - ), - ( - # Signal is the most significant - 2, - "L2", - "GCST005650_1", - 4.0, - -18, - [ - {"tagVariantId": "T2"}, - {"tagVariantId": "T3"}, - {"tagVariantId": "L1"}, - ], - False, - ), - ( - # Signal is not linked - 3, - "L2", - "GCST005650_1", - 4.0, - -18, - [ - {"tagVariantId": "L3"}, - {"tagVariantId": "T4"}, - {"tagVariantId": "L5"}, - ], - False, - ), - ( - # Empty credible set - signal is not linked - 4, - "L3", - "GCST005650_1", - 4.0, - -18, - [], - False, - ), - ( - # Null credible set - signal is not linked - 5, - "L4", - "GCST005650_1", - 4.0, - -18, - None, - False, - ), - ], - ) - ], -) -def test_is_lead_linked( - spark: SparkSession, observed_data: list[Any], expected_data: list[Any] -) -> None: - """Test function that annotates whether a studyLocusId is linked to a more statistically significant studyLocusId.""" - schema = t.StructType( + "s1", + 1, + "c1", + "v3", + 1.0, + -8, + [{"tagVariantId": "v3"}, {"tagVariantId": "v2"}, {"tagVariantId": "v4"}], + True, + ), + # True lead: + ( + "s1", + 2, + "c1", + "v1", + 1.0, + -10, + [{"tagVariantId": "v1"}, {"tagVariantId": "v2"}, {"tagVariantId": "v3"}], + False, + ), + # Linked to V1: + ( + "s1", + 3, + "c1", + "v2", + 1.0, + -9, + [{"tagVariantId": "v2"}, {"tagVariantId": "v1"}], + True, + ), + # Independent - No LD set: + ("s1", 4, "c1", "v10", 1.0, -10, [], False), + # Independent - No variantId: + ("s1", 5, "c1", None, 1.0, -10, [], False), + # An other independent variant on the same chromosome, but lead is not in ldSet: + ( + "s1", + 6, + "c1", + "v6", + 1.0, + -8, + [{"tagVariantId": "v7"}, {"tagVariantId": "v8"}, {"tagVariantId": "v9"}], + False, + ), + # An other independent variant on a different chromosome, but lead is not in ldSet: + ( + "s1", + 7, + "c2", + "v10", + 1.0, + -8, + [{"tagVariantId": "v2"}, {"tagVariantId": "v10"}], + False, + ), + ] + + SCHEMA = t.StructType( [ + t.StructField("studyId", t.StringType(), True), t.StructField("studyLocusId", t.LongType(), True), + t.StructField("chromosome", t.StringType(), True), t.StructField("variantId", t.StringType(), True), - t.StructField("studyId", t.StringType(), True), t.StructField("pValueMantissa", t.FloatType(), True), t.StructField("pValueExponent", t.IntegerType(), True), t.StructField( @@ -169,28 +104,47 @@ def test_is_lead_linked( ), True, ), - t.StructField("is_lead_linked", t.BooleanType(), True), + t.StructField("expected_flag", t.BooleanType(), True), ] ) - study_locus_df = spark.createDataFrame( - observed_data, - schema, - ) - observed_df = ( - study_locus_df.withColumn( + + @pytest.fixture(autouse=True) + def _setup(self: TestIsLeadLinked, spark: SparkSession) -> None: + """Setup study the mock index for testing.""" + # Store input data: + self.df = spark.createDataFrame(self.DATA, self.SCHEMA) + + def test_is_lead_correctness(self: TestIsLeadLinked) -> None: + """Test the correctness of the is_lead_linked method.""" + observed = self.df.withColumn( "is_lead_linked", LDclumping._is_lead_linked( f.col("studyId"), + f.col("chromosome"), f.col("variantId"), f.col("pValueExponent"), f.col("pValueMantissa"), f.col("ldSet"), ), - ) - .orderBy("studyLocusId") - .collect() - ) - expected_df = ( - spark.createDataFrame(expected_data, schema).orderBy("studyLocusId").collect() - ) - assert observed_df == expected_df + ).collect() + + for row in observed: + assert row["is_lead_linked"] == row["expected_flag"] + + def test_flagging(self: TestIsLeadLinked) -> None: + """Test flagging of lead variants.""" + # Create the study locus and clump: + sl_flagged = StudyLocus( + _df=self.df.drop("expected_flag").withColumn("qualityControls", f.array()), + _schema=StudyLocus.get_schema(), + ).clump() + + # Assert that the clumped locus is a StudyLocus: + assert isinstance(sl_flagged, StudyLocus) + + # Assert that the clumped locus has the correct columns: + for row in sl_flagged.df.join(self.df, on="studylocusId").collect(): + if len(row["qualityControls"]) == 0: + assert not row["expected_flag"] + else: + assert row["expected_flag"] From b93842af125b255b83c6e206c30ee1213d2860bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Mon, 23 Sep 2024 13:30:33 +0100 Subject: [PATCH 046/188] refactor(L2GFeatureMatrix)!: streamline feature matrix management (#745) * refactor(L2GFeatureMatrix): remove schema validation * refactor(FeatureFactory): reshape feature generation WIP * chore: pre-commit auto fixes [...] * chore: set l2gfeature properties with decorator * chore(l2gfeature): make credible_set and input_dependency instance attributes * chore(l2gfeature): make credible_set and input_dependency instance attributes * chore(featurefactory): distanceTssMeanFeature working * refactor(l2g): improve step dependency management * feat: implement * chore: fix mypy issues * feat: l2gfeaturematrix.from_features_list working * chore: comment out obsolete refs * chore(L2GFeatureMatrix): change `mode` attribute to `with_gold_standard` * refactor(l2g): move feature matrix writing to training module * feat(L2GFeatureMatrix): accept L2GGoldStandard or StudyLocus as inputs * feat: implement methods to build a feature matrix based on a studylocus/L2GGoldStandard instance * feat: coloc logic prototype * feat(l2g): filter non gwas credible sets at the start of the step * feat: rewrite colocalisation feature factory * test: add `test_colocalisation_feature_type` * test(colocalisation): add test_extract_maximum_coloc_probability_per_region_and_gene * feat(L2GFeatureInputLoader): support multiple deps by passing loader as kwarg * test: add integration tests `test_build_feature_matrix` * chore: drop config yamls * refactor: move feature classes to datasets module * docs: update feature docs * refactor(colocalisation): cleaner joins in `append_right_study_metadata` * chore: better logging abstract methods * test: add `L2GFeatureMatrix.test_from_features_list` unit tests * fix: add goldStandardSet when a gs instance is passed to `from_features_list` * fix: lowercase colocalisation type and add semantic test * test: add semantic test for `append_right_study_metadata` * feat(colocalisation): make `append_right_study_metadata` extensible to left metadata * fix(colocalisation): append_study_metadata cant take a gold standard * fix(colocalisation): extract_maximum_coloc_probability_per_region_and_gene cant take a gold standard * feat: add `StudyLocus` as a dependency of colocalisation features * fix: add studylocus to input loader in test * fix: add studylocus to input loader in test * fix: add studylocus to input loader in test --- docs/python_api/datasets/l2g_feature.md | 22 +- .../python_api/methods/l2g/feature_factory.md | 4 +- .../assets/schemas/l2g_feature_matrix.json | 155 ------ src/gentropy/config.py | 55 +- src/gentropy/dataset/colocalisation.py | 134 +++++ src/gentropy/dataset/dataset.py | 5 +- src/gentropy/dataset/l2g_feature.py | 487 +++++++++++++++++- src/gentropy/dataset/l2g_feature_matrix.py | 150 +++--- src/gentropy/dataset/l2g_gold_standard.py | 25 + src/gentropy/dataset/l2g_prediction.py | 57 +- src/gentropy/dataset/study_locus.py | 24 + .../datasource/eqtl_catalogue/study_index.py | 20 +- src/gentropy/l2g.py | 302 +++++------ src/gentropy/method/colocalisation.py | 2 +- src/gentropy/method/l2g/feature_factory.py | 422 +++++---------- src/gentropy/method/l2g/model.py | 2 +- src/gentropy/method/l2g/trainer.py | 4 +- tests/gentropy/conftest.py | 38 +- tests/gentropy/dataset/test_colocalisation.py | 122 +++++ tests/gentropy/dataset/test_l2g.py | 55 +- tests/gentropy/dataset/test_l2g_feature.py | 59 +++ .../dataset/test_l2g_feature_matrix.py | 150 ++++++ tests/gentropy/dataset/test_study_locus.py | 21 + .../open_targets/test_l2g_gold_standard.py | 25 + tests/gentropy/method/test_locus_to_gene.py | 156 ------ 25 files changed, 1491 insertions(+), 1005 deletions(-) delete mode 100644 src/gentropy/assets/schemas/l2g_feature_matrix.json create mode 100644 tests/gentropy/dataset/test_l2g_feature.py create mode 100644 tests/gentropy/dataset/test_l2g_feature_matrix.py delete mode 100644 tests/gentropy/method/test_locus_to_gene.py diff --git a/docs/python_api/datasets/l2g_feature.md b/docs/python_api/datasets/l2g_feature.md index cf8c3dcf1..bdab67e7c 100644 --- a/docs/python_api/datasets/l2g_feature.md +++ b/docs/python_api/datasets/l2g_feature.md @@ -2,7 +2,27 @@ title: L2G Feature --- -::: gentropy.method.l2g.feature_factory.L2GFeature +## Abstract Class + +::: gentropy.dataset.l2g_feature.L2GFeature + +## Feature Classes + +### Derived from colocalisation + +::: gentropy.dataset.l2g_feature.EQtlColocClppMaximumFeature +::: gentropy.dataset.l2g_feature.PQtlColocClppMaximumFeature +::: gentropy.dataset.l2g_feature.SQtlColocClppMaximumFeature +::: gentropy.dataset.l2g_feature.TuQtlColocClppMaximumFeature +::: gentropy.dataset.l2g_feature.EQtlColocH4MaximumFeature +::: gentropy.dataset.l2g_feature.PQtlColocH4MaximumFeature +::: gentropy.dataset.l2g_feature.SQtlColocH4MaximumFeature +::: gentropy.dataset.l2g_feature.TuQtlColocH4MaximumFeature + +### Derived from distance + +::: gentropy.dataset.l2g_feature.DistanceTssMinimumFeature +::: gentropy.dataset.l2g_feature.DistanceTssMeanFeature ## Schema diff --git a/docs/python_api/methods/l2g/feature_factory.md b/docs/python_api/methods/l2g/feature_factory.md index 35b4ed710..ec812d2da 100644 --- a/docs/python_api/methods/l2g/feature_factory.md +++ b/docs/python_api/methods/l2g/feature_factory.md @@ -2,6 +2,6 @@ title: L2G Feature Factory --- -::: gentropy.method.l2g.feature_factory.ColocalisationFactory +::: gentropy.method.l2g.feature_factory.FeatureFactory -::: gentropy.method.l2g.feature_factory.StudyLocusFactory +::: gentropy.method.l2g.feature_factory.L2GFeatureInputLoader diff --git a/src/gentropy/assets/schemas/l2g_feature_matrix.json b/src/gentropy/assets/schemas/l2g_feature_matrix.json deleted file mode 100644 index 114936168..000000000 --- a/src/gentropy/assets/schemas/l2g_feature_matrix.json +++ /dev/null @@ -1,155 +0,0 @@ -{ - "fields": [ - { - "metadata": {}, - "name": "studyLocusId", - "nullable": false, - "type": "long" - }, - { - "metadata": {}, - "name": "geneId", - "nullable": false, - "type": "string" - }, - { - "metadata": {}, - "name": "goldStandardSet", - "nullable": true, - "type": "string" - }, - { - "metadata": {}, - "name": "distanceTssMean", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "distanceTssMinimum", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "vepMaximumNeighborhood", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "vepMaximum", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "vepMeanNeighborhood", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "vepMean", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "eqtlColocClppMaximum", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "eqtlColocClppMaximumNeighborhood", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "eqtlColocLlrMaximum", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "eqtlColocLlrMaximumNeighborhood", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "pqtlColocClppMaximum", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "pqtlColocClppMaximumNeighborhood", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "pqtlColocLlrMaximum", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "pqtlColocLlrMaximumNeighborhood", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "sqtlColocClppMaximum", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "sqtlColocClppMaximumNeighborhood", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "sqtlColocLlrMaximum", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "sqtlColocLlrMaximumNeighborhood", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "tuqtlColocClppMaximum", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "tuqtlColocClppMaximumNeighborhood", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "tuqtlColocLlrMaximum", - "nullable": true, - "type": "float" - }, - { - "metadata": {}, - "name": "tuqtlColocLlrMaximumNeighborhood", - "nullable": true, - "type": "float" - } - ], - "type": "struct" -} diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 4d1174d6b..d5e02924b 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -227,50 +227,16 @@ class LocusToGeneConfig(StepConfig): gene_interactions_path: str | None = None features_list: list[str] = field( default_factory=lambda: [ - # average distance of all tagging variants to gene TSS - "distanceTssMean", - # minimum distance of all tagging variants to gene TSS - "distanceTssMinimum", - # maximum vep consequence score of the locus 95% credible set among all genes in the vicinity - "vepMaximumNeighborhood", - # maximum vep consequence score of the locus 95% credible set split by gene - "vepMaximum", - # mean vep consequence score of the locus 95% credible set among all genes in the vicinity - "vepMeanNeighborhood", - # mean vep consequence score of the locus 95% credible set split by gene - "vepMean", - # max clpp for each (study, locus, gene) aggregating over all eQTLs - "eqtlColocClppMaximum", - # max clpp for each (study, locus) aggregating over all eQTLs - "eqtlColocClppMaximumNeighborhood", - # max clpp for each (study, locus, gene) aggregating over all pQTLs - "pqtlColocClppMaximum", - # max clpp for each (study, locus) aggregating over all pQTLs - "pqtlColocClppMaximumNeighborhood", - # max clpp for each (study, locus, gene) aggregating over all sQTLs - "sqtlColocClppMaximum", - # max clpp for each (study, locus) aggregating over all sQTLs - "sqtlColocClppMaximumNeighborhood", - # max clpp for each (study, locus) aggregating over all tuQTLs - "tuqtlColocClppMaximum", - # max clpp for each (study, locus, gene) aggregating over all tuQTLs - "tuqtlColocClppMaximumNeighborhood", - # max log-likelihood ratio value for each (study, locus, gene) aggregating over all eQTLs - "eqtlColocLlrMaximum", - # max log-likelihood ratio value for each (study, locus) aggregating over all eQTLs - "eqtlColocLlrMaximumNeighborhood", - # max log-likelihood ratio value for each (study, locus, gene) aggregating over all pQTLs - "pqtlColocLlrMaximum", - # max log-likelihood ratio value for each (study, locus) aggregating over all pQTLs - "pqtlColocLlrMaximumNeighborhood", - # max log-likelihood ratio value for each (study, locus, gene) aggregating over all sQTLs - "sqtlColocLlrMaximum", - # max log-likelihood ratio value for each (study, locus) aggregating over all sQTLs - "sqtlColocLlrMaximumNeighborhood", - # max log-likelihood ratio value for each (study, locus, gene) aggregating over all tuQTLs - "tuqtlColocLlrMaximum", - # max log-likelihood ratio value for each (study, locus) aggregating over all tuQTLs - "tuqtlColocLlrMaximumNeighborhood", + # max CLPP for each (study, locus, gene) aggregating over a specific qtl type + "eQtlColocClppMaximum", + "pQtlColocClppMaximum", + "sQtlColocClppMaximum", + "tuQtlColocClppMaximum", + # max H4 for each (study, locus, gene) aggregating over a specific qtl type + "eQtlColocH4Maximum", + "pQtlColocH4Maximum", + "sQtlColocH4Maximum", + "tuQtlColocH4Maximum", ] ) hyperparameters: dict[str, Any] = field( @@ -283,6 +249,7 @@ class LocusToGeneConfig(StepConfig): wandb_run_name: str | None = None hf_hub_repo_id: str | None = "opentargets/locus_to_gene" download_from_hub: bool = True + write_feature_matrix: bool = True _target_: str = "gentropy.l2g.LocusToGeneStep" diff --git a/src/gentropy/dataset/colocalisation.py b/src/gentropy/dataset/colocalisation.py index e72543cb2..c0d074ae3 100644 --- a/src/gentropy/dataset/colocalisation.py +++ b/src/gentropy/dataset/colocalisation.py @@ -1,15 +1,26 @@ """Colocalisation dataset.""" + from __future__ import annotations from dataclasses import dataclass from typing import TYPE_CHECKING +import pyspark.sql.functions as f + from gentropy.common.schemas import parse_spark_schema +from gentropy.common.spark_helpers import get_record_with_maximum_value from gentropy.dataset.dataset import Dataset +from gentropy.dataset.study_locus import StudyLocus +from gentropy.datasource.eqtl_catalogue.study_index import EqtlCatalogueStudyIndex if TYPE_CHECKING: + from pyspark.sql import DataFrame from pyspark.sql.types import StructType + from gentropy.dataset.study_index import StudyIndex + +from functools import reduce + @dataclass class Colocalisation(Dataset): @@ -23,3 +34,126 @@ def get_schema(cls: type[Colocalisation]) -> StructType: StructType: Schema for the Colocalisation dataset """ return parse_spark_schema("colocalisation.json") + + def extract_maximum_coloc_probability_per_region_and_gene( + self: Colocalisation, + study_locus: StudyLocus, + study_index: StudyIndex, + *, + filter_by_colocalisation_method: str, + filter_by_qtl: str | None = None, + ) -> DataFrame: + """Get maximum colocalisation probability for a (studyLocus, gene) window. + + Args: + study_locus (StudyLocus): Dataset containing study loci to filter the colocalisation dataset on and the geneId linked to the region + study_index (StudyIndex): Study index to use to get study metadata + filter_by_colocalisation_method (str): optional filter to apply on the colocalisation dataset + filter_by_qtl (str | None): optional filter to apply on the colocalisation dataset + + Returns: + DataFrame: table with the maximum colocalisation scores for the provided study loci + + Raises: + ValueError: if filter_by_qtl is not in the list of valid QTL types + ValueError: if filter_by_colocalisation_method is not in the list of valid colocalisation methods + """ + from gentropy.colocalisation import ColocalisationStep + + valid_qtls = list(EqtlCatalogueStudyIndex.method_to_study_type_mapping.values()) + if filter_by_qtl and filter_by_qtl not in valid_qtls: + raise ValueError(f"There are no studies with QTL type {filter_by_qtl}") + + if filter_by_colocalisation_method not in [ + "ECaviar", + "Coloc", + ]: # TODO: Write helper class to retrieve coloc method names + raise ValueError( + f"Colocalisation method {filter_by_colocalisation_method} is not supported." + ) + + method_colocalisation_metric = ColocalisationStep._get_colocalisation_class( + filter_by_colocalisation_method + ).METHOD_METRIC # type: ignore + + coloc_filtering_expr = [ + f.col("rightGeneId").isNotNull(), + f.lower("colocalisationMethod") == filter_by_colocalisation_method.lower(), + ] + if filter_by_qtl: + coloc_filtering_expr.append( + f.lower("rightStudyType") == filter_by_qtl.lower() + ) + + filtered_colocalisation = ( + # Bring rightStudyType and rightGeneId and filter by rows where the gene is null, + # which is equivalent to filtering studyloci from gwas on the right side + self.append_study_metadata( + study_locus, + study_index, + metadata_cols=["studyType", "geneId"], + colocalisation_side="right", + ) + # it also filters based on method and qtl type + .filter(reduce(lambda a, b: a & b, coloc_filtering_expr)) + # and filters colocalisation results to only include the subset of studylocus that contains gwas studylocusid + .join( + study_locus.df.selectExpr("studyLocusId as leftStudyLocusId"), + "leftStudyLocusId", + ) + ) + + return get_record_with_maximum_value( + filtered_colocalisation.withColumnRenamed( + "leftStudyLocusId", "studyLocusId" + ).withColumnRenamed("rightGeneId", "geneId"), + ["studyLocusId", "geneId"], + method_colocalisation_metric, + ) + + def append_study_metadata( + self: Colocalisation, + study_locus: StudyLocus, + study_index: StudyIndex, + *, + metadata_cols: list[str], + colocalisation_side: str = "right", + ) -> DataFrame: + """Appends metadata from the study to the requested side of the colocalisation dataset. + + Args: + study_locus (StudyLocus): Dataset containing study loci that links the colocalisation dataset and the study index via the studyId + study_index (StudyIndex): Dataset containing study index that contains the metadata + metadata_cols (list[str]): List of study columns to append + colocalisation_side (str): Which side of the colocalisation dataset to append metadata to. Must be either 'right' or 'left' + + Returns: + DataFrame: Colocalisation dataset with appended metadata of the study from the requested side + + Raises: + ValueError: if colocalisation_side is not 'right' or 'left' + """ + metadata_cols = ["studyId", *metadata_cols] + if colocalisation_side not in ["right", "left"]: + raise ValueError( + f"colocalisation_side must be either 'right' or 'left', got {colocalisation_side}" + ) + + study_loci_w_metadata = ( + study_locus.df.select("studyLocusId", "studyId") + .join( + f.broadcast(study_index.df.select("studyId", *metadata_cols)), + "studyId", + ) + .distinct() + ) + return ( + # Append that to the respective side of the colocalisation dataset + study_loci_w_metadata.selectExpr( + f"studyLocusId as {colocalisation_side}StudyLocusId", + *[ + f"{col} as {colocalisation_side}{col[0].upper() + col[1:]}" + for col in metadata_cols + ], + ).join(self.df, f"{colocalisation_side}StudyLocusId", "right") + ) diff --git a/src/gentropy/dataset/dataset.py b/src/gentropy/dataset/dataset.py index e019ea379..cbeae7073 100644 --- a/src/gentropy/dataset/dataset.py +++ b/src/gentropy/dataset/dataset.py @@ -73,8 +73,11 @@ def get_schema(cls: type[Self]) -> StructType: Returns: StructType: Schema for the Dataset + + Raises: + NotImplementedError: Must be implemented in the child classes """ - pass + raise NotImplementedError("Must be implemented in the child classes") @classmethod def get_QC_column_name(cls: type[Self]) -> str | None: diff --git a/src/gentropy/dataset/l2g_feature.py b/src/gentropy/dataset/l2g_feature.py index 2e9f19d61..319570cfd 100644 --- a/src/gentropy/dataset/l2g_feature.py +++ b/src/gentropy/dataset/l2g_feature.py @@ -1,20 +1,46 @@ -"""L2G Feature Dataset.""" +"""L2G Feature Dataset with a collection of methods that extract features from the gentropy datasets to be fed in L2G.""" + from __future__ import annotations +from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any + +import pyspark.sql.functions as f from gentropy.common.schemas import parse_spark_schema +from gentropy.common.spark_helpers import convert_from_wide_to_long +from gentropy.dataset.colocalisation import Colocalisation from gentropy.dataset.dataset import Dataset +from gentropy.dataset.l2g_gold_standard import L2GGoldStandard +from gentropy.dataset.study_index import StudyIndex +from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.v2g import V2G if TYPE_CHECKING: + from pyspark.sql import DataFrame from pyspark.sql.types import StructType @dataclass -class L2GFeature(Dataset): +class L2GFeature(Dataset, ABC): """Locus-to-gene feature dataset.""" + def __post_init__( + self: L2GFeature, + feature_dependency_type: Any = None, + credible_set: StudyLocus | None = None, + ) -> None: + """Initializes a L2GFeature dataset. Any child class of L2GFeature must implement the `compute` method. + + Args: + feature_dependency_type (Any): The dependency that the L2GFeature dataset depends on. Defaults to None. + credible_set (StudyLocus | None): The credible set that the L2GFeature dataset is based on. Defaults to None. + """ + super().__post_init__() + self.feature_dependency_type = feature_dependency_type + self.credible_set = credible_set + @classmethod def get_schema(cls: type[L2GFeature]) -> StructType: """Provides the schema for the L2GFeature dataset. @@ -23,3 +49,458 @@ def get_schema(cls: type[L2GFeature]) -> StructType: StructType: Schema for the L2GFeature dataset """ return parse_spark_schema("l2g_feature.json") + + @classmethod + @abstractmethod + def compute( + cls: type[L2GFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: Any, + ) -> L2GFeature: + """Computes the L2GFeature dataset. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (Any): The dependency that the L2GFeature class needs to compute the feature + Returns: + L2GFeature: a L2GFeature dataset + + Raises: + NotImplementedError: This method must be implemented in the child classes + """ + raise NotImplementedError("Must be implemented in the child classes") + + +def _common_colocalisation_feature_logic( + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + colocalisation_method: str, + colocalisation_metric: str, + feature_name: str, + qtl_type: str, + *, + colocalisation: Colocalisation, + study_index: StudyIndex, + study_locus: StudyLocus, +) -> DataFrame: + """Wrapper to call the logic that creates a type of colocalisation features. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + colocalisation_method (str): The colocalisation method to filter the data by + colocalisation_metric (str): The colocalisation metric to use + feature_name (str): The name of the feature to create + qtl_type (str): The type of QTL to filter the data by + colocalisation (Colocalisation): Dataset with the colocalisation results + study_index (StudyIndex): Study index to fetch study type and gene + study_locus (StudyLocus): Study locus to traverse between colocalisation and study index + + Returns: + DataFrame: Feature annotation in long format with the columns: studyLocusId, geneId, featureName, featureValue + """ + joining_cols = ( + ["studyLocusId", "geneId"] + if isinstance(study_loci_to_annotate, L2GGoldStandard) + else ["studyLocusId"] + ) + return convert_from_wide_to_long( + study_loci_to_annotate.df.join( + colocalisation.extract_maximum_coloc_probability_per_region_and_gene( + study_locus, + study_index, + filter_by_colocalisation_method=colocalisation_method, + filter_by_qtl=qtl_type, + ), + on=joining_cols, + ) + .selectExpr( + "studyLocusId", + "geneId", + f"{colocalisation_metric} as {feature_name}", + ) + .distinct(), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ) + + +class EQtlColocClppMaximumFeature(L2GFeature): + """Max CLPP for each (study, locus, gene) aggregating over all eQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "eQtlColocClppMaximum" + + @classmethod + def compute( + cls: type[EQtlColocClppMaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> EQtlColocClppMaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dictionary with the dependencies required. They are passed as keyword arguments. + + Returns: + EQtlColocClppMaximumFeature: Feature dataset + """ + colocalisation_method = "ECaviar" + colocalisation_metric = "clpp" + qtl_type = "eqtl" + + return cls( + _df=_common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + _schema=cls.get_schema(), + ) + + +class PQtlColocClppMaximumFeature(L2GFeature): + """Max CLPP for each (study, locus, gene) aggregating over all pQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "pQtlColocClppMaximum" + + @classmethod + def compute( + cls: type[PQtlColocClppMaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> PQtlColocClppMaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + PQtlColocClppMaximumFeature: Feature dataset + """ + colocalisation_method = "ECaviar" + colocalisation_metric = "clpp" + qtl_type = "pqtl" + return cls( + _df=_common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + _schema=cls.get_schema(), + ) + + +class SQtlColocClppMaximumFeature(L2GFeature): + """Max CLPP for each (study, locus, gene) aggregating over all sQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "sQtlColocClppMaximum" + + @classmethod + def compute( + cls: type[SQtlColocClppMaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> SQtlColocClppMaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + SQtlColocClppMaximumFeature: Feature dataset + """ + colocalisation_method = "ECaviar" + colocalisation_metric = "clpp" + qtl_type = "sqtl" + return cls( + _df=_common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + _schema=cls.get_schema(), + ) + + +class TuQtlColocClppMaximumFeature(L2GFeature): + """Max CLPP for each (study, locus, gene) aggregating over all tuQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "tuQtlColocClppMaximum" + + @classmethod + def compute( + cls: type[TuQtlColocClppMaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> TuQtlColocClppMaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + TuQtlColocClppMaximumFeature: Feature dataset + """ + colocalisation_method = "ECaviar" + colocalisation_metric = "clpp" + qtl_type = "tuqtl" + return cls( + _df=_common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + _schema=cls.get_schema(), + ) + + +class EQtlColocH4MaximumFeature(L2GFeature): + """Max CLPP for each (study, locus, gene) aggregating over all eQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "eQtlColocH4Maximum" + + @classmethod + def compute( + cls: type[EQtlColocH4MaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> EQtlColocH4MaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + EQtlColocH4MaximumFeature: Feature dataset + """ + colocalisation_method = "Coloc" + colocalisation_metric = "h4" + qtl_type = "eqtl" + return cls( + _df=_common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + _schema=cls.get_schema(), + ) + + +class PQtlColocH4MaximumFeature(L2GFeature): + """Max CLPP for each (study, locus, gene) aggregating over all pQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "pQtlColocH4Maximum" + + @classmethod + def compute( + cls: type[PQtlColocH4MaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> PQtlColocH4MaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + PQtlColocH4MaximumFeature: Feature dataset + """ + colocalisation_method = "Coloc" + colocalisation_metric = "h4" + qtl_type = "pqtl" + return cls( + _df=_common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + _schema=cls.get_schema(), + ) + + +class SQtlColocH4MaximumFeature(L2GFeature): + """Max CLPP for each (study, locus, gene) aggregating over all sQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "sQtlColocH4Maximum" + + @classmethod + def compute( + cls: type[SQtlColocH4MaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> SQtlColocH4MaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + SQtlColocH4MaximumFeature: Feature dataset + """ + colocalisation_method = "Coloc" + colocalisation_metric = "h4" + qtl_type = "sqtl" + return cls( + _df=_common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + _schema=cls.get_schema(), + ) + + +class TuQtlColocH4MaximumFeature(L2GFeature): + """Max H4 for each (study, locus, gene) aggregating over all tuQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "tuQtlColocH4Maximum" + + @classmethod + def compute( + cls: type[TuQtlColocH4MaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> TuQtlColocH4MaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + TuQtlColocH4MaximumFeature: Feature dataset + """ + colocalisation_method = "Coloc" + colocalisation_metric = "h4" + qtl_type = "tuqtl" + return cls( + _df=_common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + _schema=cls.get_schema(), + ) + + +class DistanceTssMinimumFeature(L2GFeature): + """Minimum distance of all tagging variants to gene TSS.""" + + @classmethod + def compute( + cls: type[DistanceTssMinimumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: V2G, + ) -> L2GFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (V2G): Dataset that contains the distance information + + Returns: + L2GFeature: Feature dataset + + Raises: + NotImplementedError: Not implemented + """ + raise NotImplementedError + + +class DistanceTssMeanFeature(L2GFeature): + """Average distance of all tagging variants to gene TSS. + + NOTE: to be rewritten taking variant index as input + """ + + fill_na_value = 500_000 + feature_dependency_type = V2G + + @classmethod + def compute( + cls: type[DistanceTssMeanFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: V2G, + ) -> DistanceTssMeanFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (V2G): Dataset that contains the distance information + + Returns: + DistanceTssMeanFeature: Feature dataset + """ + agg_expr = f.mean("weightedScore").alias("distanceTssMean") + # Everything but expresion is common logic + v2g = feature_dependency.df.filter(f.col("datasourceId") == "canonical_tss") + wide_df = ( + study_loci_to_annotate.df.withColumn( + "variantInLocus", f.explode_outer("locus") + ) + .select( + "studyLocusId", + f.col("variantInLocus.variantId").alias("variantInLocusId"), + f.col("variantInLocus.posteriorProbability").alias( + "variantInLocusPosteriorProbability" + ), + ) + .join( + v2g.selectExpr("variantId as variantInLocusId", "geneId", "score"), + on="variantInLocusId", + how="inner", + ) + .withColumn( + "weightedScore", + f.col("score") * f.col("variantInLocusPosteriorProbability"), + ) + .groupBy("studyLocusId", "geneId") + .agg(agg_expr) + ) + return cls( + _df=convert_from_wide_to_long( + wide_df, + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) diff --git a/src/gentropy/dataset/l2g_feature_matrix.py b/src/gentropy/dataset/l2g_feature_matrix.py index 4c611e3da..b4893a785 100644 --- a/src/gentropy/dataset/l2g_feature_matrix.py +++ b/src/gentropy/dataset/l2g_feature_matrix.py @@ -2,112 +2,103 @@ from __future__ import annotations -from dataclasses import dataclass, field from functools import reduce from typing import TYPE_CHECKING, Type -from gentropy.common.schemas import parse_spark_schema from gentropy.common.spark_helpers import convert_from_long_to_wide -from gentropy.dataset.dataset import Dataset -from gentropy.method.l2g.feature_factory import ColocalisationFactory, StudyLocusFactory +from gentropy.dataset.l2g_gold_standard import L2GGoldStandard +from gentropy.method.l2g.feature_factory import FeatureFactory, L2GFeatureInputLoader if TYPE_CHECKING: - from pyspark.sql.types import StructType + from pyspark.sql import DataFrame - from gentropy.dataset.colocalisation import Colocalisation - from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus - from gentropy.dataset.v2g import V2G -@dataclass -class L2GFeatureMatrix(Dataset): - """Dataset with features for Locus to Gene prediction. +class L2GFeatureMatrix: + """Dataset with features for Locus to Gene prediction.""" - Attributes: - features_list (list[str] | None): List of features to use. If None, all possible features are used. - fixed_cols (list[str]): Columns that should be kept fixed in the feature matrix, although not considered as features. - mode (str): Mode of the feature matrix. Defaults to "train". Can be either "train" or "predict". - """ - - features_list: list[str] | None = None - fixed_cols: list[str] = field(default_factory=lambda: ["studyLocusId", "geneId"]) - mode: str = "train" - - def __post_init__(self: L2GFeatureMatrix) -> None: + def __init__( + self, + _df: DataFrame, + features_list: list[str] | None = None, + with_gold_standard: bool = False, + ) -> None: """Post-initialisation to set the features list. If not provided, all columns except the fixed ones are used. - Raises: - ValueError: If the mode is neither 'train' nor 'predict'. + Args: + _df (DataFrame): Feature matrix dataset + features_list (list[str] | None): List of features to use. If None, all possible features are used. + with_gold_standard (bool): Whether to include the gold standard set in the feature matrix. """ - if self.mode not in ["train", "predict"]: - raise ValueError("Mode should be either 'train' or 'predict'") - if self.mode == "train": - self.fixed_cols = self.fixed_cols + ["goldStandardSet"] - self.features_list = self.features_list or [ - col for col in self._df.columns if col not in self.fixed_cols + self.fixed_cols = ["studyLocusId", "geneId"] + if with_gold_standard: + self.fixed_cols.append("goldStandardSet") + + self.features_list = features_list or [ + col for col in _df.columns if col not in self.fixed_cols ] - self.validate_schema() + self._df = _df.selectExpr( + self.fixed_cols + + [ + f"CAST({feature} AS FLOAT) AS {feature}" + for feature in self.features_list + ] + ) @classmethod - def generate_features( + def from_features_list( cls: Type[L2GFeatureMatrix], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, features_list: list[str], - credible_set: StudyLocus, - study_index: StudyIndex, - variant_gene: V2G, - colocalisation: Colocalisation, + features_input_loader: L2GFeatureInputLoader, ) -> L2GFeatureMatrix: - """Generate features from the gentropy datasets. + """Generate features from the gentropy datasets by calling the feature factory that will instantiate the corresponding features. Args: - features_list (list[str]): List of features to generate - credible_set (StudyLocus): Credible set dataset - study_index (StudyIndex): Study index dataset - variant_gene (V2G): Variant to gene dataset - colocalisation (Colocalisation): Colocalisation dataset + study_loci_to_annotate (StudyLocus | L2GGoldStandard): Study locus pairs to annotate + features_list (list[str]): List of feature names to be computed. + features_input_loader (L2GFeatureInputLoader): Object that contais features input. Returns: L2GFeatureMatrix: L2G feature matrix dataset - - Raises: - ValueError: If the feature matrix is empty """ - if features_dfs := [ - # Extract features - ColocalisationFactory._get_max_coloc_per_credible_set( - colocalisation, - credible_set, - study_index, - ).df, - StudyLocusFactory._get_tss_distance_features(credible_set, variant_gene).df, - StudyLocusFactory._get_vep_features(credible_set, variant_gene).df, - ]: - fm = reduce( - lambda x, y: x.unionByName(y), - features_dfs, + features_long_df = reduce( + lambda x, y: x.unionByName(y, allowMissingColumns=True), + [ + # Compute all features and merge them into a single dataframe + feature.df + for feature in FeatureFactory( + study_loci_to_annotate, features_list + ).generate_features(features_input_loader) + ], + ) + if isinstance(study_loci_to_annotate, L2GGoldStandard): + return cls( + _df=convert_from_long_to_wide( + # Add gold standard set to the feature matrix + features_long_df.join( + study_loci_to_annotate.df.select( + "studyLocusId", "geneId", "goldStandardSet" + ), + ["studyLocusId", "geneId"], + ), + ["studyLocusId", "geneId", "goldStandardSet"], + "featureName", + "featureValue", + ), + with_gold_standard=True, ) - else: - raise ValueError("No features found") - - # raise error if the feature matrix is empty return cls( _df=convert_from_long_to_wide( - fm, ["studyLocusId", "geneId"], "featureName", "featureValue" + features_long_df, + ["studyLocusId", "geneId"], + "featureName", + "featureValue", ), - _schema=cls.get_schema(), - features_list=features_list, + with_gold_standard=False, ) - @classmethod - def get_schema(cls: type[L2GFeatureMatrix]) -> StructType: - """Provides the schema for the L2gFeatureMatrix dataset. - - Returns: - StructType: Schema for the L2gFeatureMatrix dataset - """ - return parse_spark_schema("l2g_feature_matrix.json") - def calculate_feature_missingness_rate( self: L2GFeatureMatrix, ) -> dict[str, float]: @@ -145,7 +136,7 @@ def fill_na( Returns: L2GFeatureMatrix: L2G feature matrix dataset """ - self.df = self._df.fillna(value, subset=subset) + self._df = self._df.fillna(value, subset=subset) return self def select_features( @@ -164,6 +155,13 @@ def select_features( ValueError: If no features have been selected. """ if features_list := features_list or self.features_list: - self.df = self._df.select(self.fixed_cols + features_list) + # cast to float every feature in the features_list + self._df = self._df.selectExpr( + self.fixed_cols + + [ + f"CAST({feature} AS FLOAT) AS {feature}" + for feature in features_list + ] + ) return self raise ValueError("features_list cannot be None") diff --git a/src/gentropy/dataset/l2g_gold_standard.py b/src/gentropy/dataset/l2g_gold_standard.py index 5bc48413c..89f4c5f5d 100644 --- a/src/gentropy/dataset/l2g_gold_standard.py +++ b/src/gentropy/dataset/l2g_gold_standard.py @@ -1,4 +1,5 @@ """L2G gold standard dataset.""" + from __future__ import annotations from dataclasses import dataclass @@ -15,6 +16,7 @@ from pyspark.sql import DataFrame from pyspark.sql.types import StructType + from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix from gentropy.dataset.study_locus_overlap import StudyLocusOverlap from gentropy.dataset.v2g import V2G @@ -100,6 +102,29 @@ def process_gene_interactions( "scoring as score", ) + def build_feature_matrix( + self: L2GGoldStandard, + full_feature_matrix: L2GFeatureMatrix, + ) -> L2GFeatureMatrix: + """Return a feature matrix for study loci in the gold standard. + + Args: + full_feature_matrix (L2GFeatureMatrix): Feature matrix for all study loci to join on + + Returns: + L2GFeatureMatrix: Feature matrix for study loci in the gold standard + """ + from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix + + return L2GFeatureMatrix( + _df=full_feature_matrix._df.join( + f.broadcast(self.df.drop("variantId", "studyId", "sources")), + on=["studyLocusId", "geneId"], + how="inner", + ), + with_gold_standard=True, + ) + def filter_unique_associations( self: L2GGoldStandard, study_locus_overlap: StudyLocusOverlap, diff --git a/src/gentropy/dataset/l2g_prediction.py b/src/gentropy/dataset/l2g_prediction.py index 9895f55b7..97e58f526 100644 --- a/src/gentropy/dataset/l2g_prediction.py +++ b/src/gentropy/dataset/l2g_prediction.py @@ -7,12 +7,10 @@ from gentropy.common.schemas import parse_spark_schema from gentropy.common.session import Session -from gentropy.dataset.colocalisation import Colocalisation from gentropy.dataset.dataset import Dataset from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix -from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus -from gentropy.dataset.v2g import V2G +from gentropy.method.l2g.feature_factory import L2GFeatureInputLoader from gentropy.method.l2g.model import LocusToGeneModel if TYPE_CHECKING: @@ -40,31 +38,27 @@ def get_schema(cls: type[L2GPrediction]) -> StructType: @classmethod def from_credible_set( cls: Type[L2GPrediction], - features_list: list[str], - credible_set: StudyLocus, - study_index: StudyIndex, - v2g: V2G, - coloc: Colocalisation, session: Session, + credible_set: StudyLocus, + features_list: list[str], + features_input_loader: L2GFeatureInputLoader, model_path: str | None, hf_token: str | None = None, download_from_hub: bool = True, - ) -> tuple[L2GPrediction, L2GFeatureMatrix]: + ) -> L2GPrediction: """Extract L2G predictions for a set of credible sets derived from GWAS. Args: - features_list (list[str]): List of features to use for the model - credible_set (StudyLocus): Credible set dataset - study_index (StudyIndex): Study index dataset - v2g (V2G): Variant to gene dataset - coloc (Colocalisation): Colocalisation dataset session (Session): Session object that contains the Spark session + credible_set (StudyLocus): Dataset containing credible sets from GWAS only + features_list (list[str]): List of features to use for the model + features_input_loader (L2GFeatureInputLoader): Loader with all feature dependencies model_path (str | None): Path to the model file. It can be either in the filesystem or the name on the Hugging Face Hub (in the form of username/repo_name). hf_token (str | None): Hugging Face token to download the model from the Hub. Only required if the model is private. download_from_hub (bool): Whether to download the model from the Hugging Face Hub. Defaults to True. Returns: - tuple[L2GPrediction, L2GFeatureMatrix]: L2G dataset and feature matrix limited to GWAS study only. + L2GPrediction: L2G scores for a set of credible sets. """ # Load the model if download_from_hub: @@ -75,31 +69,14 @@ def from_credible_set( l2g_model = LocusToGeneModel.load_from_disk(model_path) # Prepare data - fm = L2GFeatureMatrix.generate_features( - features_list=features_list, - credible_set=credible_set, - study_index=study_index, - variant_gene=v2g, - colocalisation=coloc, - ).fill_na() - - gwas_fm = ( - L2GFeatureMatrix( - _df=( - fm.df.join( - credible_set.filter_by_study_type( - "gwas", study_index - ).df.select("studyLocusId"), - on="studyLocusId", - ) - ), - _schema=L2GFeatureMatrix.get_schema(), - mode="predict", + fm = ( + L2GFeatureMatrix.from_features_list( + study_loci_to_annotate=credible_set, + features_list=features_list, + features_input_loader=features_input_loader, ) + .fill_na() .select_features(features_list) - .persist() - ) - return ( - l2g_model.predict(gwas_fm, session), - gwas_fm, ) + + return l2g_model.predict(fm, session) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 9ab19dab6..e8363aa4e 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -26,9 +26,11 @@ from pyspark.sql import Column, DataFrame from pyspark.sql.types import StructType + from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix from gentropy.dataset.ld_index import LDIndex from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.summary_statistics import SummaryStatistics + from gentropy.method.l2g.feature_factory import L2GFeatureInputLoader class StudyLocusQualityCheck(Enum): @@ -647,6 +649,28 @@ def neglog_pvalue(self: StudyLocus) -> Column: self.df.pValueExponent, ) + def build_feature_matrix( + self: StudyLocus, + features_list: list[str], + features_input_loader: L2GFeatureInputLoader, + ) -> L2GFeatureMatrix: + """Returns the feature matrix for a StudyLocus. + + Args: + features_list (list[str]): List of features to include in the feature matrix. + features_input_loader (L2GFeatureInputLoader): Feature input loader to use. + + Returns: + L2GFeatureMatrix: Feature matrix for this study-locus. + """ + from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix + + return L2GFeatureMatrix.from_features_list( + self, + features_list, + features_input_loader, + ) + def annotate_credible_sets(self: StudyLocus) -> StudyLocus: """Annotate study-locus dataset with credible set flags. diff --git a/src/gentropy/datasource/eqtl_catalogue/study_index.py b/src/gentropy/datasource/eqtl_catalogue/study_index.py index 6add70ffb..d284eb781 100644 --- a/src/gentropy/datasource/eqtl_catalogue/study_index.py +++ b/src/gentropy/datasource/eqtl_catalogue/study_index.py @@ -45,6 +45,15 @@ class EqtlCatalogueStudyIndex: ] ) raw_studies_metadata_path = "https://raw.githubusercontent.com/eQTL-Catalogue/eQTL-Catalogue-resources/092e01a9601feb404f1c88f86311b43b907a88f6/data_tables/dataset_metadata_upcoming.tsv" + method_to_study_type_mapping = { + "ge": "eqtl", + "exon": "eqtl", + "tx": "eqtl", + "microarray": "eqtl", + "leafcutter": "sqtl", + "aptamer": "pqtl", + "txrev": "tuqtl", + } @classmethod def _identify_study_type( @@ -76,17 +85,8 @@ def _identify_study_type( +------------+---------+----------+ """ - method_to_study_type_mapping = { - "ge": "eqtl", - "exon": "eqtl", - "tx": "eqtl", - "microarray": "eqtl", - "leafcutter": "sqtl", - "aptamer": "pqtl", - "txrev": "tuqtl", - } qtl_type_mapping = f.create_map( - *[f.lit(x) for x in chain(*method_to_study_type_mapping.items())] + *[f.lit(x) for x in chain(*cls.method_to_study_type_mapping.items())] )[quantification_method_col] return f.when( biosample_col.startswith("CL"), f.concat(f.lit("sc"), qtl_type_mapping) diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index cb13d3640..832023cd8 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -18,6 +18,7 @@ from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus from gentropy.dataset.v2g import V2G +from gentropy.method.l2g.feature_factory import L2GFeatureInputLoader from gentropy.method.l2g.model import LocusToGeneModel from gentropy.method.l2g.trainer import LocusToGeneTrainer @@ -28,41 +29,44 @@ class LocusToGeneStep: def __init__( self, session: Session, + hyperparameters: dict[str, Any], + *, run_mode: str, - predictions_path: str, - credible_set_path: str, - variant_gene_path: str, - colocalisation_path: str, - study_index_path: str, - gold_standard_curation_path: str, - gene_interactions_path: str, features_list: list[str], - hyperparameters: dict[str, Any], download_from_hub: bool, - model_path: str | None, + wandb_run_name: str, + model_path: str | None = None, + credible_set_path: str, + gold_standard_curation_path: str | None = None, + variant_gene_path: str | None = None, + colocalisation_path: str | None = None, + study_index_path: str | None = None, + gene_interactions_path: str | None = None, + predictions_path: str | None = None, feature_matrix_path: str | None = None, - wandb_run_name: str | None = None, + write_feature_matrix: bool, hf_hub_repo_id: str | None = LocusToGeneConfig().hf_hub_repo_id, ) -> None: """Initialise the step and run the logic based on mode. Args: session (Session): Session object that contains the Spark session + hyperparameters (dict[str, Any]): Hyperparameters for the model run_mode (str): Run mode, either 'train' or 'predict' - predictions_path (str): Path to save the predictions - credible_set_path (str): Path to the credible set dataset - variant_gene_path (str): Path to the variant to gene dataset - colocalisation_path (str): Path to the colocalisation dataset - study_index_path (str): Path to the study index dataset - gold_standard_curation_path (str): Path to the gold standard curation dataset - gene_interactions_path (str): Path to the gene interactions dataset features_list (list[str]): List of features to use for the model - hyperparameters (dict[str, Any]): Hyperparameters for the model - download_from_hub (bool): Whether to download the model from the Hugging Face Hub - model_path (str | None): Path to the fitted model - feature_matrix_path (str | None): Path to save the feature matrix. Defaults to None. - wandb_run_name (str | None): Name of the wandb run. Defaults to None. - hf_hub_repo_id (str | None): Hugging Face Hub repo id. Defaults to the one set in the step configuration. + download_from_hub (bool): Whether to download the model from Hugging Face Hub + wandb_run_name (str): Name of the run to track model training in Weights and Biases + model_path (str | None): Path to the model. It can be either in the filesystem or the name on the Hugging Face Hub (in the form of username/repo_name). + credible_set_path (str): Path to the credible set dataset necessary to build the feature matrix + gold_standard_curation_path (str | None): Path to the gold standard curation file + variant_gene_path (str | None): Path to the variant-gene dataset + colocalisation_path (str | None): Path to the colocalisation dataset + study_index_path (str | None): Path to the study index dataset + gene_interactions_path (str | None): Path to the gene interactions dataset + predictions_path (str | None): Path to the L2G predictions output dataset + feature_matrix_path (str | None): Path to the L2G feature matrix output dataset + write_feature_matrix (bool): Whether to write the full feature matrix to the filesystem + hf_hub_repo_id (str | None): Hugging Face Hub repository ID. If provided, the model will be uploaded to Hugging Face. Raises: ValueError: If run_mode is not 'train' or 'predict' @@ -76,12 +80,6 @@ def __init__( self.run_mode = run_mode self.model_path = model_path self.predictions_path = predictions_path - self.credible_set_path = credible_set_path - self.variant_gene_path = variant_gene_path - self.colocalisation_path = colocalisation_path - self.study_index_path = study_index_path - self.gold_standard_curation_path = gold_standard_curation_path - self.gene_interactions_path = gene_interactions_path self.features_list = list(features_list) self.hyperparameters = dict(hyperparameters) self.feature_matrix_path = feature_matrix_path @@ -92,151 +90,163 @@ def __init__( # Load common inputs self.credible_set = StudyLocus.from_parquet( session, credible_set_path, recursiveFileLookup=True + ).filter(f.col("studyType") == "gwas") + self.studies = ( + StudyIndex.from_parquet(session, study_index_path, recursiveFileLookup=True) + if study_index_path + else None ) - self.studies = StudyIndex.from_parquet( - session, study_index_path, recursiveFileLookup=True + self.v2g = ( + V2G.from_parquet(session, variant_gene_path) if variant_gene_path else None ) - self.v2g = V2G.from_parquet(session, variant_gene_path) - self.coloc = Colocalisation.from_parquet( - session, colocalisation_path, recursiveFileLookup=True + self.coloc = ( + Colocalisation.from_parquet( + session, colocalisation_path, recursiveFileLookup=True + ) + if colocalisation_path + else None + ) + self.features_input_loader = L2GFeatureInputLoader( + v2g=self.v2g, + coloc=self.coloc, + studies=self.studies, + study_locus=self.credible_set, ) if run_mode == "predict": self.run_predict() elif run_mode == "train": + self.gs_curation = ( + self.session.spark.read.json(gold_standard_curation_path) + if gold_standard_curation_path + else None + ) + self.interactions = ( + self.session.spark.read.parquet(gene_interactions_path) + if gene_interactions_path + else None + ) self.run_train() def run_predict(self) -> None: """Run the prediction step. Raises: - ValueError: If predictions_path is not set. + ValueError: If not all dependencies in prediction mode are set """ - if not self.predictions_path: - raise ValueError("predictions_path must be set for predict mode.") - predictions, feature_matrix = L2GPrediction.from_credible_set( - self.features_list, - self.credible_set, - self.studies, - self.v2g, - self.coloc, - self.session, - model_path=self.model_path, - hf_token=access_gcp_secret("hfhub-key", "open-targets-genetics-dev"), - download_from_hub=self.download_from_hub, - ) - if self.feature_matrix_path: - feature_matrix.df.write.mode(self.session.write_mode).parquet( - self.feature_matrix_path + if self.studies and self.v2g and self.coloc: + predictions = L2GPrediction.from_credible_set( + self.session, + self.credible_set, + self.features_list, + self.features_input_loader, + model_path=self.model_path, + hf_token=access_gcp_secret("hfhub-key", "open-targets-genetics-dev"), + download_from_hub=self.download_from_hub, ) - predictions.df.write.mode(self.session.write_mode).parquet( - self.predictions_path - ) - self.session.logger.info(self.predictions_path) + if self.predictions_path: + predictions.df.write.mode(self.session.write_mode).parquet( + self.predictions_path + ) + self.session.logger.info(self.predictions_path) + else: + raise ValueError("Dependencies for predict mode not set.") def run_train(self) -> None: - """Run the training step. - - Raises: - ValueError: If gold_standard_curation_path, gene_interactions_path, or wandb_run_name are not set. - """ - if not ( - self.gold_standard_curation_path - and self.gene_interactions_path + """Run the training step.""" + if ( + self.gs_curation + and self.interactions + and self.v2g and self.wandb_run_name and self.model_path ): - raise ValueError( - "gold_standard_curation_path, gene_interactions_path, and wandb_run_name, and a path to save the model must be set for train mode." + wandb_key = access_gcp_secret("wandb-key", "open-targets-genetics-dev") + # Process gold standard and L2G features + data = self._generate_feature_matrix(write_feature_matrix=True) + + # Instantiate classifier and train model + l2g_model = LocusToGeneModel( + model=GradientBoostingClassifier(random_state=42), + hyperparameters=self.hyperparameters, ) + wandb_login(key=wandb_key) + trained_model = LocusToGeneTrainer( + model=l2g_model, feature_matrix=data + ).train(self.wandb_run_name) + if trained_model.training_data and trained_model.model and self.model_path: + trained_model.save(self.model_path) + if self.hf_hub_repo_id: + hf_hub_token = access_gcp_secret( + "hfhub-key", "open-targets-genetics-dev" + ) + trained_model.export_to_hugging_face_hub( + # we upload the model in the filesystem + self.model_path.split("/")[-1], + hf_hub_token, + data=trained_model.training_data._df.drop( + "goldStandardSet", "geneId" + ).toPandas(), + repo_id=self.hf_hub_repo_id, + commit_message="chore: update model", + ) - wandb_key = access_gcp_secret("wandb-key", "open-targets-genetics-dev") - # Process gold standard and L2G features - data = self._generate_feature_matrix().persist() + def _generate_feature_matrix(self, write_feature_matrix: bool) -> L2GFeatureMatrix: + """Generate the feature matrix of annotated gold standards. - # Instantiate classifier and train model - l2g_model = LocusToGeneModel( - model=GradientBoostingClassifier(random_state=42), - hyperparameters=self.hyperparameters, - ) - wandb_login(key=wandb_key) - trained_model = LocusToGeneTrainer(model=l2g_model, feature_matrix=data).train( - self.wandb_run_name - ) - if trained_model.training_data and trained_model.model: - trained_model.save(self.model_path) - if self.hf_hub_repo_id: - hf_hub_token = access_gcp_secret( - "hfhub-key", "open-targets-genetics-dev" - ) - trained_model.export_to_hugging_face_hub( - # we upload the model in the filesystem - self.model_path.split("/")[-1], - hf_hub_token, - data=trained_model.training_data.df.drop( - "goldStandardSet", "geneId" - ).toPandas(), - repo_id=self.hf_hub_repo_id, - commit_message="chore: update model", - ) - - def _generate_feature_matrix(self) -> L2GFeatureMatrix: - """Generate the feature matrix for training. + Args: + write_feature_matrix (bool): Whether to write the feature matrix for all credible sets to disk Returns: L2GFeatureMatrix: Feature matrix with gold standards annotated with features. - """ - gs_curation = self.session.spark.read.json(self.gold_standard_curation_path) - interactions = self.session.spark.read.parquet(self.gene_interactions_path) - study_locus_overlap = StudyLocus( - _df=self.credible_set.df.join( - f.broadcast( - gs_curation.select( - StudyLocus.assign_study_locus_id( - f.col("association_info.otg_id"), # studyId - f.concat_ws( # variantId - "_", - f.col("sentinel_variant.locus_GRCh38.chromosome"), - f.col("sentinel_variant.locus_GRCh38.position"), - f.col("sentinel_variant.alleles.reference"), - f.col("sentinel_variant.alleles.alternative"), - ), - f.col("finemappingMethod"), - ).alias("studyLocusId"), - ) - ), - "studyLocusId", - "inner", - ), - _schema=StudyLocus.get_schema(), - ).find_overlaps(self.studies) - - gold_standards = L2GGoldStandard.from_otg_curation( - gold_standard_curation=gs_curation, - v2g=self.v2g, - study_locus_overlap=study_locus_overlap, - interactions=interactions, - ) - fm = L2GFeatureMatrix.generate_features( - features_list=self.features_list, - credible_set=self.credible_set, - study_index=self.studies, - variant_gene=self.v2g, - colocalisation=self.coloc, - ) - - return ( - L2GFeatureMatrix( - _df=fm.df.join( + Raises: + ValueError: If write_feature_matrix is set to True but a path is not provided. + ValueError: If dependencies to build features are not set. + """ + if self.gs_curation and self.interactions and self.v2g and self.studies: + study_locus_overlap = StudyLocus( + _df=self.credible_set.df.join( f.broadcast( - gold_standards.df.drop("variantId", "studyId", "sources") + self.gs_curation.select( + StudyLocus.assign_study_locus_id( + f.col("association_info.otg_id"), # studyId + f.concat_ws( # variantId + "_", + f.col("sentinel_variant.locus_GRCh38.chromosome"), + f.col("sentinel_variant.locus_GRCh38.position"), + f.col("sentinel_variant.alleles.reference"), + f.col("sentinel_variant.alleles.alternative"), + ), + ).alias("studyLocusId"), + ) ), - on=["studyLocusId", "geneId"], - how="inner", + "studyLocusId", + "inner", ), - _schema=L2GFeatureMatrix.get_schema(), + _schema=StudyLocus.get_schema(), + ).find_overlaps(self.studies) + + gold_standards = L2GGoldStandard.from_otg_curation( + gold_standard_curation=self.gs_curation, + v2g=self.v2g, + study_locus_overlap=study_locus_overlap, + interactions=self.interactions, ) - .fill_na() - .select_features(self.features_list) - ) + + fm = self.credible_set.build_feature_matrix( + self.features_list, self.features_input_loader + ) + if write_feature_matrix: + if not self.feature_matrix_path: + raise ValueError("feature_matrix_path must be set.") + fm._df.write.mode(self.session.write_mode).parquet( + self.feature_matrix_path + ) + + return ( + gold_standards.build_feature_matrix(fm) + .fill_na() + .select_features(self.features_list) + ) + raise ValueError("Dependencies for train mode not set.") diff --git a/src/gentropy/method/colocalisation.py b/src/gentropy/method/colocalisation.py index 18d97fdf8..c3320f931 100644 --- a/src/gentropy/method/colocalisation.py +++ b/src/gentropy/method/colocalisation.py @@ -112,7 +112,7 @@ class Coloc: """ METHOD_NAME: str = "COLOC" - METHOD_METRIC: str = "llr" + METHOD_METRIC: str = "h4" PSEUDOCOUNT: float = 1e-10 @staticmethod diff --git a/src/gentropy/method/l2g/feature_factory.py b/src/gentropy/method/l2g/feature_factory.py index 1158c6067..c0f0ef9b4 100644 --- a/src/gentropy/method/l2g/feature_factory.py +++ b/src/gentropy/method/l2g/feature_factory.py @@ -1,341 +1,151 @@ -"""Collection of methods that extract features from the gentropy datasets to be fed in L2G.""" +"""Factory that computes features based on an input list.""" from __future__ import annotations -from functools import reduce -from itertools import chain -from typing import TYPE_CHECKING - -import pyspark.sql.functions as f - -from gentropy.common.spark_helpers import ( - convert_from_wide_to_long, - get_record_with_maximum_value, +from typing import Any, Iterator, Mapping + +from gentropy.dataset.l2g_feature import ( + EQtlColocClppMaximumFeature, + EQtlColocH4MaximumFeature, + L2GFeature, + PQtlColocClppMaximumFeature, + PQtlColocH4MaximumFeature, + SQtlColocClppMaximumFeature, + SQtlColocH4MaximumFeature, + TuQtlColocClppMaximumFeature, + TuQtlColocH4MaximumFeature, ) -from gentropy.dataset.l2g_feature import L2GFeature -from gentropy.dataset.study_locus import CredibleInterval, StudyLocus -from gentropy.method.colocalisation import Coloc, ECaviar - -if TYPE_CHECKING: - from pyspark.sql import Column, DataFrame +from gentropy.dataset.l2g_gold_standard import L2GGoldStandard +from gentropy.dataset.study_locus import StudyLocus - from gentropy.dataset.colocalisation import Colocalisation - from gentropy.dataset.study_index import StudyIndex - from gentropy.dataset.v2g import V2G +class L2GFeatureInputLoader: + """Loads all input datasets required for the L2GFeature dataset.""" -class ColocalisationFactory: - """Feature extraction in colocalisation.""" + def __init__( + self, + **kwargs: Any, + ) -> None: + """Initializes L2GFeatureInputLoader with the provided inputs and returns loaded dependencies as a dictionary. - @classmethod - def _add_colocalisation_metric(cls: type[ColocalisationFactory]) -> Column: - """Expression that adds a `colocalisationMetric` column to the colocalisation dataframe in preparation for feature extraction. - - Returns: - Column: The expression that adds a `colocalisationMetric` column with the derived metric + Args: + **kwargs (Any): keyword arguments with the name of the dependency and the dependency itself. """ - method_metric_map = { - ECaviar.METHOD_NAME: ECaviar.METHOD_METRIC, - Coloc.METHOD_NAME: Coloc.METHOD_METRIC, - } - map_expr = f.create_map(*[f.lit(x) for x in chain(*method_metric_map.items())]) - return map_expr[f.col("colocalisationMethod")].alias("colocalisationMetric") + self.input_dependencies = {k: v for k, v in kwargs.items() if v is not None} - @staticmethod - def _get_max_coloc_per_credible_set( - colocalisation: Colocalisation, - credible_set: StudyLocus, - studies: StudyIndex, - ) -> L2GFeature: - """Get the maximum colocalisation posterior probability for each pair of overlapping study-locus per type of colocalisation method and QTL type. + def get_dependency_by_type( + self, dependency_type: list[Any] | Any + ) -> dict[str, Any]: + """Returns the dependency that matches the provided type. Args: - colocalisation (Colocalisation): Colocalisation dataset - credible_set (StudyLocus): Study locus dataset - studies (StudyIndex): Study index dataset + dependency_type (list[Any] | Any): type(s) of the dependency to return. Returns: - L2GFeature: Stores the features with the max coloc probabilities for each pair of study-locus + dict[str, Any]: dictionary of dependenci(es) that match the provided type(s). """ - colocalisation_df = colocalisation.df.select( - f.col("leftStudyLocusId").alias("studyLocusId"), - "rightStudyLocusId", - f.coalesce("h4", "clpp").alias("score"), - ColocalisationFactory._add_colocalisation_metric(), - ) + if not isinstance(dependency_type, list): + dependency_type = [dependency_type] + return { + k: v + for k, v in self.input_dependencies.items() + if isinstance(v, tuple(dependency_type)) + } - colocalising_credible_sets = ( - credible_set.df.select("studyLocusId", "studyId") - # annotate studyLoci with overlapping IDs on the left - to just keep GWAS associations - .join( - colocalisation_df, - on="studyLocusId", - how="inner", - ) - # bring study metadata to just keep QTL studies on the right - .join( - credible_set.df.join( - studies.df.select("studyId", "studyType", "geneId"), "studyId" - ).selectExpr( - "studyLocusId as rightStudyLocusId", - "studyType as right_studyType", - "geneId", - ), - on="rightStudyLocusId", - how="inner", - ) - .filter(f.col("right_studyType") != "gwas") - .select( - "studyLocusId", - "right_studyType", - "geneId", - "score", - "colocalisationMetric", - ) - ) + def __iter__(self) -> Iterator[tuple[str, Any]]: + """Make the class iterable, returning an iterator over key-value pairs. - # Max PP calculation per credible set AND type of QTL AND colocalisation method - local_max = ( - get_record_with_maximum_value( - colocalising_credible_sets, - ["studyLocusId", "right_studyType", "geneId", "colocalisationMetric"], - "score", - ) - .select( - "*", - f.col("score").alias("max_score"), - f.lit("Local").alias("score_type"), - ) - .drop("score") - ) + Returns: + Iterator[tuple[str, Any]]: iterator over the dictionary's key-value pairs. + """ + return iter(self.input_dependencies.items()) - neighbourhood_max = ( - local_max.selectExpr( - "studyLocusId", "max_score as local_max_score", "geneId" - ) - .join( - # Add maximum in the neighborhood - get_record_with_maximum_value( - colocalising_credible_sets.withColumnRenamed( - "score", "tmp_nbh_max_score" - ), - ["studyLocusId", "right_studyType", "colocalisationMetric"], - "tmp_nbh_max_score", - ).drop("geneId"), - on="studyLocusId", - ) - .withColumn("score_type", f.lit("Neighborhood")) - .withColumn( - "max_score", - f.log10( - f.abs( - f.col("local_max_score") - - f.col("tmp_nbh_max_score") - + f.lit(0.0001) # intercept - ) - ), - ) - ).drop("tmp_nbh_max_score", "local_max_score") + def __repr__(self) -> str: + """Return a string representation of the input dependencies. - return L2GFeature( - _df=( - # Combine local and neighborhood metrics - local_max.unionByName( - neighbourhood_max, allowMissingColumns=True - ).select( - "studyLocusId", - "geneId", - # Feature name is a concatenation of the QTL type, colocalisation metric and if it's local or in the vicinity - f.concat_ws( - "", - f.col("right_studyType"), - f.lit("Coloc"), - f.initcap(f.col("colocalisationMetric")), - f.lit("Maximum"), - f.regexp_replace(f.col("score_type"), "Local", ""), - ).alias("featureName"), - f.col("max_score").cast("float").alias("featureValue"), - ) - ), - _schema=L2GFeature.get_schema(), - ) + Useful for understanding the loader content without having to print the object attribute. + Returns: + str: string representation of the input dependencies. + """ + return repr(self.input_dependencies) + + +class FeatureFactory: + """Factory class for creating features.""" + + feature_mapper: Mapping[str, type[L2GFeature]] = { + # "distanceTssMinimum": DistanceTssMinimumFeature, + # "distanceTssMean": DistanceTssMeanFeature, + "eQtlColocClppMaximum": EQtlColocClppMaximumFeature, + "pQtlColocClppMaximum": PQtlColocClppMaximumFeature, + "sQtlColocClppMaximum": SQtlColocClppMaximumFeature, + "tuQtlColocClppMaximum": TuQtlColocClppMaximumFeature, + "eQtlColocH4Maximum": EQtlColocH4MaximumFeature, + "pQtlColocH4Maximum": PQtlColocH4MaximumFeature, + "sQtlColocH4Maximum": SQtlColocH4MaximumFeature, + "tuQtlColocH4Maximum": TuQtlColocH4MaximumFeature, + } + + def __init__( + self: FeatureFactory, + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + features_list: list[str], + ) -> None: + """Initializes the factory. -class StudyLocusFactory(StudyLocus): - """Feature extraction in study locus.""" + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + features_list (list[str]): list of features to compute. + """ + self.study_loci_to_annotate = study_loci_to_annotate + self.features_list = features_list - @staticmethod - def _get_tss_distance_features(credible_set: StudyLocus, v2g: V2G) -> L2GFeature: - """Joins StudyLocus with the V2G to extract a score that is based on the distance to a gene TSS of any variant weighted by its posterior probability in a credible set. + def generate_features( + self: FeatureFactory, + features_input_loader: L2GFeatureInputLoader, + ) -> list[L2GFeature]: + """Generates a feature matrix by reading an object with instructions on how to create the features. Args: - credible_set (StudyLocus): Credible set dataset - v2g (V2G): Dataframe containing the distances of all variants to all genes TSS within a region + features_input_loader (L2GFeatureInputLoader): object with required features dependencies. Returns: - L2GFeature: Stores the features with the score of weighting the distance to the TSS by the posterior probability of the variant + list[L2GFeature]: list of computed features. + Raises: + ValueError: If feature not found. """ - wide_df = ( - credible_set.filter_credible_set(CredibleInterval.IS95) - .df.withColumn("variantInLocus", f.explode_outer("locus")) - .select( - "studyLocusId", - "variantId", - f.col("variantInLocus.variantId").alias("variantInLocusId"), - f.col("variantInLocus.posteriorProbability").alias( - "variantInLocusPosteriorProbability" - ), - ) - .join( - v2g.df.filter(f.col("datasourceId") == "canonical_tss").selectExpr( - "variantId as variantInLocusId", "geneId", "score" - ), - on="variantInLocusId", - how="inner", - ) - .withColumn( - "weightedScore", - f.col("score") * f.col("variantInLocusPosteriorProbability"), - ) - .groupBy("studyLocusId", "geneId") - .agg( - f.min("weightedScore").alias("distanceTssMinimum"), - f.mean("weightedScore").alias("distanceTssMean"), - ) - ) - - return L2GFeature( - _df=convert_from_wide_to_long( - wide_df, - id_vars=("studyLocusId", "geneId"), - var_name="featureName", - value_name="featureValue", - ), - _schema=L2GFeature.get_schema(), - ) - - @staticmethod - def _get_vep_features( - credible_set: StudyLocus, - v2g: V2G, + computed_features = [] + for feature in self.features_list: + if feature in self.feature_mapper: + computed_features.append( + self.compute_feature(feature, features_input_loader) + ) + else: + raise ValueError(f"Feature {feature} not found.") + return computed_features + + def compute_feature( + self: FeatureFactory, + feature_name: str, + features_input_loader: L2GFeatureInputLoader, ) -> L2GFeature: - """Get the maximum VEP score for all variants in a locus's 95% credible set. - - This informs about functional impact of the variants in the locus. For more information on variant consequences, see: https://www.ensembl.org/info/genome/variation/prediction/predicted_data.html - Two metrics: max VEP score per study locus and gene, and max VEP score per study locus. - + """Instantiates feature class. Args: - credible_set (StudyLocus): Study locus dataset with the associations to be annotated - v2g (V2G): V2G dataset with the variant/gene relationships and their consequences + feature_name (str): name of the feature + features_input_loader (L2GFeatureInputLoader): Object that contais features input. Returns: - L2GFeature: Stores the features with the max VEP score. + L2GFeature: instantiated feature object """ - - def _aggregate_vep_feature( - df: DataFrame, - aggregation_expr: Column, - aggregation_cols: list[str], - feature_name: str, - ) -> DataFrame: - """Extracts the maximum or average VEP score after grouping by the given columns. Different aggregations return different predictive annotations. - - If the group_cols include "geneId", the maximum/mean VEP score per gene is returned. - Otherwise, the maximum/mean VEP score for all genes in the neighborhood of the locus is returned. - - Args: - df (DataFrame): DataFrame with the VEP scores for each variant in a studyLocus - aggregation_expr (Column): Aggregation expression to apply - aggregation_cols (list[str]): Columns to group by - feature_name (str): Name of the feature to be returned - - Returns: - DataFrame: DataFrame with the maximum VEP score per locus or per locus/gene - """ - if "geneId" in aggregation_cols: - return df.groupBy(aggregation_cols).agg( - aggregation_expr.alias(feature_name) - ) - return ( - df.groupBy(aggregation_cols) - .agg( - aggregation_expr.alias(feature_name), - f.collect_set("geneId").alias("geneId"), - ) - .withColumn("geneId", f.explode("geneId")) - ) - - credible_set_w_variant_consequences = ( - credible_set.filter_credible_set(CredibleInterval.IS95) - .df.withColumn("variantInLocus", f.explode_outer("locus")) - .select( - f.col("studyLocusId"), - f.col("variantId"), - f.col("studyId"), - f.col("variantInLocus.variantId").alias("variantInLocusId"), - f.col("variantInLocus.posteriorProbability").alias( - "variantInLocusPosteriorProbability" - ), - ) - .join( - # Join with V2G to get variant consequences - v2g.df.filter(f.col("datasourceId") == "variantConsequence").selectExpr( - "variantId as variantInLocusId", "geneId", "score" - ), - on="variantInLocusId", - ) - .select( - "studyLocusId", - "variantId", - "studyId", - "geneId", - (f.col("score") * f.col("variantInLocusPosteriorProbability")).alias( - "weightedScore" - ), - ) - .distinct() - ) - - return L2GFeature( - _df=convert_from_wide_to_long( - reduce( - lambda x, y: x.unionByName(y, allowMissingColumns=True), - [ - # Calculate overall max VEP score for all genes in the vicinity - credible_set_w_variant_consequences.transform( - _aggregate_vep_feature, - f.max("weightedScore"), - ["studyLocusId"], - "vepMaximumNeighborhood", - ), - # Calculate overall max VEP score per gene - credible_set_w_variant_consequences.transform( - _aggregate_vep_feature, - f.max("weightedScore"), - ["studyLocusId", "geneId"], - "vepMaximum", - ), - # Calculate mean VEP score for all genes in the vicinity - credible_set_w_variant_consequences.transform( - _aggregate_vep_feature, - f.mean("weightedScore"), - ["studyLocusId"], - "vepMeanNeighborhood", - ), - # Calculate mean VEP score per gene - credible_set_w_variant_consequences.transform( - _aggregate_vep_feature, - f.mean("weightedScore"), - ["studyLocusId", "geneId"], - "vepMean", - ), - ], - ), - id_vars=("studyLocusId", "geneId"), - var_name="featureName", - value_name="featureValue", - ).filter(f.col("featureValue").isNotNull()), - _schema=L2GFeature.get_schema(), + # Extract feature class and dependency type + feature_cls = self.feature_mapper[feature_name] + feature_dependency_type = feature_cls.feature_dependency_type + return feature_cls.compute( + study_loci_to_annotate=self.study_loci_to_annotate, + feature_dependency=features_input_loader.get_dependency_by_type( + feature_dependency_type + ), ) diff --git a/src/gentropy/method/l2g/model.py b/src/gentropy/method/l2g/model.py index e0d9e42fb..6e0b0fda1 100644 --- a/src/gentropy/method/l2g/model.py +++ b/src/gentropy/method/l2g/model.py @@ -114,7 +114,7 @@ def predict( pd_dataframe.iteritems = pd_dataframe.items - feature_matrix_pdf = feature_matrix.df.toPandas() + feature_matrix_pdf = feature_matrix._df.toPandas() # L2G score is the probability the classifier assigns to the positive class (the second element in the probability array) feature_matrix_pdf["score"] = self.model.predict_proba( # We drop the fixed columns to only pass the feature values to the classifier diff --git a/src/gentropy/method/l2g/trainer.py b/src/gentropy/method/l2g/trainer.py index 85fedc45b..69dfb24ff 100644 --- a/src/gentropy/method/l2g/trainer.py +++ b/src/gentropy/method/l2g/trainer.py @@ -134,7 +134,7 @@ def log_to_wandb( run.log({"f1": f1_score(self.y_test, y_predicted, average="weighted")}) # Track gold standards and their features run.log( - {"featureMatrix": Table(dataframe=self.feature_matrix.df.toPandas())} + {"featureMatrix": Table(dataframe=self.feature_matrix._df.toPandas())} ) # Log feature missingness run.log( @@ -155,7 +155,7 @@ def train( Returns: LocusToGeneModel: Fitted model """ - data_df = self.feature_matrix.df.drop("geneId").toPandas() + data_df = self.feature_matrix._df.drop("geneId").toPandas() # Encode labels in `goldStandardSet` to a numeric value data_df["goldStandardSet"] = data_df["goldStandardSet"].map( diff --git a/tests/gentropy/conftest.py b/tests/gentropy/conftest.py index 629f3a505..93ee38471 100644 --- a/tests/gentropy/conftest.py +++ b/tests/gentropy/conftest.py @@ -584,37 +584,17 @@ def sample_otp_interactions(spark: SparkSession) -> DataFrame: @pytest.fixture() def mock_l2g_feature_matrix(spark: SparkSession) -> L2GFeatureMatrix: """Mock l2g feature matrix dataset.""" - schema = L2GFeatureMatrix.get_schema() - - data_spec = ( - dg.DataGenerator( - spark, - rows=50, - partitions=4, - randomSeedMethod="hash_fieldname", - ) - .withSchema(schema) - .withColumnSpec("distanceTssMean", percentNulls=0.1) - .withColumnSpec("distanceTssMinimum", percentNulls=0.1) - .withColumnSpec("eqtlColocClppMaximum", percentNulls=0.1) - .withColumnSpec("eqtlColocClppMaximumNeighborhood", percentNulls=0.1) - .withColumnSpec("eqtlColocLlrMaximum", percentNulls=0.1) - .withColumnSpec("eqtlColocLlrMaximumNeighborhood", percentNulls=0.1) - .withColumnSpec("pqtlColocClppMaximum", percentNulls=0.1) - .withColumnSpec("pqtlColocClppMaximumNeighborhood", percentNulls=0.1) - .withColumnSpec("pqtlColocLlrMaximum", percentNulls=0.1) - .withColumnSpec("pqtlColocLlrMaximumNeighborhood", percentNulls=0.1) - .withColumnSpec("sqtlColocClppMaximum", percentNulls=0.1) - .withColumnSpec("sqtlColocClppMaximumNeighborhood", percentNulls=0.1) - .withColumnSpec("sqtlColocLlrMaximum", percentNulls=0.1) - .withColumnSpec("sqtlColocLlrMaximumNeighborhood", percentNulls=0.1) - .withColumnSpec( - "goldStandardSet", percentNulls=0.0, values=["positive", "negative"] - ) + return L2GFeatureMatrix( + _df=spark.createDataFrame( + [ + (1, "gene1", 100.0, None), + (2, "gene2", 1000.0, 0.0), + ], + "studyLocusId LONG, geneId STRING, distanceTssMean FLOAT, distanceTssMinimum FLOAT", + ), + with_gold_standard=False, ) - return L2GFeatureMatrix(_df=data_spec.build(), _schema=schema) - @pytest.fixture() def mock_l2g_gold_standard(spark: SparkSession) -> L2GGoldStandard: diff --git a/tests/gentropy/dataset/test_colocalisation.py b/tests/gentropy/dataset/test_colocalisation.py index 1651aa2d4..5371cf42c 100644 --- a/tests/gentropy/dataset/test_colocalisation.py +++ b/tests/gentropy/dataset/test_colocalisation.py @@ -2,9 +2,131 @@ from __future__ import annotations +from typing import TYPE_CHECKING + +import pytest + from gentropy.dataset.colocalisation import Colocalisation +from gentropy.dataset.study_index import StudyIndex +from gentropy.dataset.study_locus import StudyLocus + +if TYPE_CHECKING: + from pyspark.sql import SparkSession def test_colocalisation_creation(mock_colocalisation: Colocalisation) -> None: """Test colocalisation creation with mock data.""" assert isinstance(mock_colocalisation, Colocalisation) + + +def test_append_study_metadata_study_locus( + mock_colocalisation: Colocalisation, + mock_study_locus: StudyLocus, + mock_study_index: StudyIndex, + metadata_cols: list[str] | None = None, +) -> None: + """Test appending right study metadata.""" + if metadata_cols is None: + metadata_cols = ["studyType"] + expected_extra_col = ["rightStudyType", "rightStudyId"] + res_df = mock_colocalisation.append_study_metadata( + mock_study_locus, + mock_study_index, + metadata_cols=metadata_cols, + colocalisation_side="right", + ) + for col in expected_extra_col: + assert col in res_df.columns, f"Column {col} not found in result DataFrame." + + +class TestAppendStudyMetadata: + """Test Colocalisation.append_study_metadata method.""" + + @pytest.mark.parametrize( + ("colocalisation_side", "expected_geneId"), [("right", "g1"), ("left", None)] + ) + def test_append_study_metadata_right( + self: TestAppendStudyMetadata, + colocalisation_side: str, + expected_geneId: str | None, + metadata_cols: list[str] | None = None, + ) -> None: + """Test appending right study metadata.""" + if metadata_cols is None: + metadata_cols = ["geneId"] + observed_df = self.sample_colocalisation.append_study_metadata( + self.sample_study_locus, + self.sample_study_index, + metadata_cols=metadata_cols, + colocalisation_side=colocalisation_side, + ) + assert ( + observed_df.select(f"{colocalisation_side}GeneId").collect()[0][0] + == expected_geneId + ), f"Expected {colocalisation_side}GeneId {expected_geneId}, but got {observed_df.select(f'{colocalisation_side}GeneId').collect()[0][0]}" + + @pytest.fixture(autouse=True) + def _setup(self: TestAppendStudyMetadata, spark: SparkSession) -> None: + """Setup fixture.""" + self.sample_study_locus = StudyLocus( + _df=spark.createDataFrame( + [ + ( + 1, + "var1", + "gwas1", + ), + ( + 2, + "var2", + "eqtl1", + ), + ], + ["studyLocusId", "variantId", "studyId"], + ), + _schema=StudyLocus.get_schema(), + ) + self.sample_study_index = StudyIndex( + _df=spark.createDataFrame( + [("gwas1", "gwas", None, "p1"), ("eqtl1", "eqtl", "g1", "p2")], + [ + "studyId", + "studyType", + "geneId", + "projectId", + ], + ), + _schema=StudyIndex.get_schema(), + ) + self.sample_colocalisation = Colocalisation( + _df=spark.createDataFrame( + [(1, 2, "X", "COLOC", 1, 0.9)], + [ + "leftStudyLocusId", + "rightStudyLocusId", + "chromosome", + "colocalisationMethod", + "numberColocalisingVariants", + "h4", + ], + ), + _schema=Colocalisation.get_schema(), + ) + + +def test_extract_maximum_coloc_probability_per_region_and_gene( + mock_colocalisation: Colocalisation, + mock_study_locus: StudyLocus, + mock_study_index: StudyIndex, + filter_by_colocalisation_method: str | None = None, +) -> None: + """Test extracting maximum coloc probability per region and gene returns a dataframe with the correct columns: studyLocusId, geneId, h4.""" + filter_by_colocalisation_method = filter_by_colocalisation_method or "Coloc" + res_df = mock_colocalisation.extract_maximum_coloc_probability_per_region_and_gene( + mock_study_locus, + mock_study_index, + filter_by_colocalisation_method=filter_by_colocalisation_method, + ) + expected_cols = ["studyLocusId", "geneId", "h4"] + for col in expected_cols: + assert col in res_df.columns, f"Column {col} not found in result DataFrame." diff --git a/tests/gentropy/dataset/test_l2g.py b/tests/gentropy/dataset/test_l2g.py index d0f1c3672..d37ce5a4a 100644 --- a/tests/gentropy/dataset/test_l2g.py +++ b/tests/gentropy/dataset/test_l2g.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING -import pytest +from pyspark.sql.types import FloatType from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix from gentropy.dataset.l2g_gold_standard import L2GGoldStandard @@ -34,7 +34,7 @@ def test_process_gene_interactions(sample_otp_interactions: DataFrame) -> None: ), "Gene interactions has a different schema." -def test_predictions(mock_l2g_predictions: L2GFeatureMatrix) -> None: +def test_predictions(mock_l2g_predictions: L2GPrediction) -> None: """Test L2G predictions creation with mock data.""" assert isinstance(mock_l2g_predictions, L2GPrediction) @@ -154,45 +154,36 @@ def test_remove_false_negatives(spark: SparkSession) -> None: assert observed_df.collect() == expected_df.collect() -def test_l2g_feature_constructor_with_schema_mismatch(spark: SparkSession) -> None: - """Test if provided shema mismatch results in error in L2GFeatureMatrix constructor. - - distanceTssMean is expected to be FLOAT by schema in src.gentropy.assets.schemas and is actualy DOUBLE. - """ - with pytest.raises(ValueError) as e: - L2GFeatureMatrix( - _df=spark.createDataFrame( - [ - (1, "gene1", 100.0), - (2, "gene2", 1000.0), - ], - "studyLocusId LONG, geneId STRING, distanceTssMean DOUBLE", - ), - _schema=L2GFeatureMatrix.get_schema(), - ) - assert e.value.args[0] == ( - "The following fields present differences in their datatypes: ['distanceTssMean']." - ) - - -def test_calculate_feature_missingness_rate(spark: SparkSession) -> None: - """Test L2GFeatureMatrix.calculate_feature_missingness_rate.""" +def test_l2g_feature_constructor_with_schema_mismatch( + spark: SparkSession, +) -> None: + """Test if provided schema mismatch is converted to right type in the L2GFeatureMatrix constructor.""" fm = L2GFeatureMatrix( _df=spark.createDataFrame( [ - (1, "gene1", 100.0, None), - (2, "gene2", 1000.0, 0.0), + (1, "gene1", 100.0), + (2, "gene2", 1000.0), ], - "studyLocusId LONG, geneId STRING, distanceTssMean FLOAT, distanceTssMinimum FLOAT", + "studyLocusId LONG, geneId STRING, distanceTssMean DOUBLE", ), - _schema=L2GFeatureMatrix.get_schema(), + with_gold_standard=False, ) + assert ( + fm._df.schema["distanceTssMean"].dataType == FloatType() + ), "Feature `distanceTssMean` is not being casted to FloatType. Check L2GFeatureMatrix constructor." + +def test_calculate_feature_missingness_rate( + spark: SparkSession, mock_l2g_feature_matrix: L2GFeatureMatrix +) -> None: + """Test L2GFeatureMatrix.calculate_feature_missingness_rate.""" expected_missingness = {"distanceTssMean": 0.0, "distanceTssMinimum": 1.0} - observed_missingness = fm.calculate_feature_missingness_rate() + observed_missingness = mock_l2g_feature_matrix.calculate_feature_missingness_rate() assert isinstance(observed_missingness, dict) - assert fm.features_list is not None and len(observed_missingness) == len( - fm.features_list + assert mock_l2g_feature_matrix.features_list is not None and len( + observed_missingness + ) == len( + mock_l2g_feature_matrix.features_list ), "Missing features in the missingness rate dictionary." assert ( observed_missingness == expected_missingness diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py new file mode 100644 index 000000000..82df2dd4f --- /dev/null +++ b/tests/gentropy/dataset/test_l2g_feature.py @@ -0,0 +1,59 @@ +"""Test L2G feature generation.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pytest + +from gentropy.dataset.l2g_feature import ( + EQtlColocClppMaximumFeature, + EQtlColocH4MaximumFeature, + L2GFeature, + PQtlColocClppMaximumFeature, + PQtlColocH4MaximumFeature, + SQtlColocClppMaximumFeature, + SQtlColocH4MaximumFeature, + TuQtlColocClppMaximumFeature, + TuQtlColocH4MaximumFeature, +) +from gentropy.method.l2g.feature_factory import L2GFeatureInputLoader + +if TYPE_CHECKING: + from gentropy.dataset.colocalisation import Colocalisation + from gentropy.dataset.study_index import StudyIndex + from gentropy.dataset.study_locus import StudyLocus + + +@pytest.mark.parametrize( + "feature_class", + [ + EQtlColocH4MaximumFeature, + PQtlColocH4MaximumFeature, + SQtlColocH4MaximumFeature, + TuQtlColocH4MaximumFeature, + EQtlColocClppMaximumFeature, + PQtlColocClppMaximumFeature, + SQtlColocClppMaximumFeature, + TuQtlColocClppMaximumFeature, + ], +) +def test_feature_factory_return_type( + feature_class: Any, + mock_study_locus: StudyLocus, + mock_colocalisation: Colocalisation, + mock_study_index: StudyIndex, +) -> None: + """Test that every feature factory returns a L2GFeature dataset.""" + loader = L2GFeatureInputLoader( + colocalisation=mock_colocalisation, + study_index=mock_study_index, + study_locus=mock_study_locus, + ) + feature_dataset = feature_class.compute( + study_loci_to_annotate=mock_study_locus, + feature_dependency=loader.get_dependency_by_type( + feature_class.feature_dependency_type + ), + ) + assert isinstance(feature_dataset, L2GFeature) diff --git a/tests/gentropy/dataset/test_l2g_feature_matrix.py b/tests/gentropy/dataset/test_l2g_feature_matrix.py new file mode 100644 index 000000000..46384239c --- /dev/null +++ b/tests/gentropy/dataset/test_l2g_feature_matrix.py @@ -0,0 +1,150 @@ +"""Test L2G feature matrix methods.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest +from pyspark.sql.types import ( + ArrayType, + DoubleType, + LongType, + StringType, + StructField, + StructType, +) + +from gentropy.dataset.colocalisation import Colocalisation +from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix +from gentropy.dataset.l2g_gold_standard import L2GGoldStandard +from gentropy.dataset.study_index import StudyIndex +from gentropy.dataset.study_locus import StudyLocus +from gentropy.method.l2g.feature_factory import L2GFeatureInputLoader + +if TYPE_CHECKING: + from pyspark.sql import SparkSession + + +class TestFromFeaturesList: + """Test L2GFeatureMatrix.from_features_list method. + + If the columns from the features list are there, it means that the business logic is working (the dataframe is not empty when converting from long to wide). + """ + + def test_study_locus( + self: TestFromFeaturesList, + ) -> None: + """Test building feature matrix for a SL with the eQtlColocH4Maximum feature.""" + features_list = ["eQtlColocH4Maximum"] + loader = L2GFeatureInputLoader( + colocalisation=self.sample_colocalisation, + study_index=self.sample_study_index, + study_locus=self.sample_study_locus, + ) + fm = L2GFeatureMatrix.from_features_list( + self.sample_study_locus, features_list, loader + ) + for feature in features_list: + assert ( + feature in fm._df.columns + ), f"Feature {feature} not found in feature matrix." + + def test_gold_standard( + self: TestFromFeaturesList, + ) -> None: + """Test building feature matrix for a gold standard with the eQtlColocH4Maximum feature.""" + features_list = ["eQtlColocH4Maximum"] + loader = L2GFeatureInputLoader( + colocalisation=self.sample_colocalisation, + study_index=self.sample_study_index, + study_locus=self.sample_study_locus, + ) + fm = L2GFeatureMatrix.from_features_list( + self.sample_gold_standard, features_list, loader + ) + for feature in features_list: + assert ( + feature in fm._df.columns + ), f"Feature {feature} not found in feature matrix." + + @pytest.fixture(autouse=True) + def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: + """Setup fixture.""" + self.sample_gold_standard = L2GGoldStandard( + _df=spark.createDataFrame( + [(1, "var1", "gwas1", "g1", "positive", ["a_source"])], + L2GGoldStandard.get_schema(), + ), + _schema=L2GGoldStandard.get_schema(), + ) + self.sample_study_locus = StudyLocus( + _df=spark.createDataFrame( + [ + ( + 1, + "var1", + "gwas1", + [ + {"variantId": "var1", "posteriorProbability": 0.8}, + {"variantId": "var12", "posteriorProbability": 0.2}, + ], + ), + ( + 2, + "var2", + "eqtl1", + [ + {"variantId": "var2", "posteriorProbability": 1.0}, + ], + ), + ], + schema=StructType( + [ + StructField("studyLocusId", LongType(), True), + StructField("variantId", StringType(), True), + StructField("studyId", StringType(), True), + StructField( + "locus", + ArrayType( + StructType( + [ + StructField("variantId", StringType(), True), + StructField( + "posteriorProbability", DoubleType(), True + ), + ] + ) + ), + True, + ), + ] + ), + ), + _schema=StudyLocus.get_schema(), + ) + self.sample_study_index = StudyIndex( + _df=spark.createDataFrame( + [("gwas1", "gwas", None, "p1"), ("eqtl1", "eqtl", "g1", "p2")], + [ + "studyId", + "studyType", + "geneId", + "projectId", + ], + ), + _schema=StudyIndex.get_schema(), + ) + self.sample_colocalisation = Colocalisation( + _df=spark.createDataFrame( + [(1, 2, "X", "COLOC", 1, 0.9)], + [ + "leftStudyLocusId", + "rightStudyLocusId", + "chromosome", + "colocalisationMethod", + "numberColocalisingVariants", + "h4", + ], + ), + _schema=Colocalisation.get_schema(), + ) diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index 94390d20b..c89521b3c 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -18,6 +18,8 @@ StructType, ) +from gentropy.dataset.colocalisation import Colocalisation +from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix from gentropy.dataset.ld_index import LDIndex from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import ( @@ -28,6 +30,7 @@ from gentropy.dataset.study_locus_overlap import StudyLocusOverlap from gentropy.dataset.summary_statistics import SummaryStatistics from gentropy.dataset.variant_index import VariantIndex +from gentropy.method.l2g.feature_factory import L2GFeatureInputLoader @pytest.mark.parametrize( @@ -780,6 +783,24 @@ def test_study_validation_correctness(self: TestStudyLocusValidation) -> None: ) == 1 +def test_build_feature_matrix( + mock_study_locus: StudyLocus, + mock_colocalisation: Colocalisation, + mock_study_index: StudyIndex, +) -> None: + """Test building feature matrix with the eQtlColocH4Maximum feature.""" + features_list = ["eQtlColocH4Maximum"] + loader = L2GFeatureInputLoader( + colocalisation=mock_colocalisation, + study_index=mock_study_index, + study_locus=mock_study_locus, + ) + fm = mock_study_locus.build_feature_matrix(features_list, loader) + assert isinstance( + fm, L2GFeatureMatrix + ), "Feature matrix should be of type L2GFeatureMatrix" + + class TestStudyLocusRedundancyFlagging: """Collection of tests related to flagging redundant credible sets.""" diff --git a/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py index 6f91d32a9..78f97d48f 100644 --- a/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py +++ b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py @@ -7,15 +7,21 @@ import pytest from pyspark.sql import DataFrame +from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix from gentropy.dataset.l2g_gold_standard import L2GGoldStandard +from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.v2g import V2G from gentropy.datasource.open_targets.l2g_gold_standard import ( OpenTargetsL2GGoldStandard, ) +from gentropy.method.l2g.feature_factory import L2GFeatureInputLoader if TYPE_CHECKING: from pyspark.sql.session import SparkSession + from gentropy.dataset.colocalisation import Colocalisation + from gentropy.dataset.study_locus import StudyLocus + def test_open_targets_as_l2g_gold_standard( sample_l2g_gold_standard: DataFrame, @@ -104,3 +110,22 @@ def _setup(self: TestExpandGoldStandardWithNegatives, spark: SparkSession) -> No V2G(_df=sample_v2g_df, _schema=V2G.get_schema()), ) ) + + +def test_build_feature_matrix( + mock_l2g_gold_standard: L2GGoldStandard, + mock_study_locus: StudyLocus, + mock_colocalisation: Colocalisation, + mock_study_index: StudyIndex, +) -> None: + """Test building feature matrix with the eQtlColocH4Maximum feature.""" + features_list = ["eQtlColocH4Maximum"] + loader = L2GFeatureInputLoader( + colocalisation=mock_colocalisation, + study_index=mock_study_index, + study_locus=mock_study_locus, + ) + fm = mock_study_locus.build_feature_matrix(features_list, loader) + assert isinstance( + mock_l2g_gold_standard.build_feature_matrix(fm), L2GFeatureMatrix + ), "Feature matrix should be of type L2GFeatureMatrix" diff --git a/tests/gentropy/method/test_locus_to_gene.py b/tests/gentropy/method/test_locus_to_gene.py deleted file mode 100644 index 460d65062..000000000 --- a/tests/gentropy/method/test_locus_to_gene.py +++ /dev/null @@ -1,156 +0,0 @@ -"""Test locus-to-gene model training.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING - -import pytest -from sklearn.ensemble import RandomForestClassifier - -from gentropy.dataset.colocalisation import Colocalisation -from gentropy.dataset.l2g_feature import L2GFeature -from gentropy.dataset.study_index import StudyIndex -from gentropy.dataset.study_locus import StudyLocus -from gentropy.method.l2g.feature_factory import ColocalisationFactory, StudyLocusFactory -from gentropy.method.l2g.model import LocusToGeneModel - -if TYPE_CHECKING: - from pyspark.sql import SparkSession - - from gentropy.dataset.v2g import V2G - - -@pytest.fixture(scope="module") -def model() -> LocusToGeneModel: - """Creates an instance of the LocusToGene class.""" - return LocusToGeneModel(model=RandomForestClassifier()) - - -class TestColocalisationFactory: - """Test the ColocalisationFactory methods.""" - - def test_get_max_coloc_per_credible_set( - self: TestColocalisationFactory, - mock_study_locus: StudyLocus, - mock_study_index: StudyIndex, - mock_colocalisation: Colocalisation, - ) -> None: - """Test the function that extracts the maximum log likelihood ratio for each pair of overlapping study-locus returns the right data type.""" - coloc_features = ColocalisationFactory._get_max_coloc_per_credible_set( - mock_colocalisation, - mock_study_locus, - mock_study_index, - ) - assert isinstance( - coloc_features, L2GFeature - ), "Unexpected type returned from _get_max_coloc_per_credible_set" - - def test_get_max_coloc_per_credible_set_semantic( - self: TestColocalisationFactory, - spark: SparkSession, - ) -> None: - """Test logic of the function that extracts the maximum log likelihood ratio for each pair of overlapping study-locus.""" - # Prepare mock datasets based on 2 associations - credset = StudyLocus( - _df=spark.createDataFrame( - # 2 associations with a common variant in the locus - [ - { - "studyLocusId": 1, - "variantId": "lead1", - "studyId": "study1", # this is a GWAS - "locus": [ - {"variantId": "commonTag", "posteriorProbability": 0.9}, - ], - "chromosome": "1", - }, - { - "studyLocusId": 2, - "variantId": "lead2", - "studyId": "study2", # this is a eQTL study - "locus": [ - {"variantId": "commonTag", "posteriorProbability": 0.9}, - ], - "chromosome": "1", - }, - ], - StudyLocus.get_schema(), - ), - _schema=StudyLocus.get_schema(), - ) - - studies = StudyIndex( - _df=spark.createDataFrame( - [ - { - "studyId": "study1", - "studyType": "gwas", - "traitFromSource": "trait1", - "projectId": "project1", - }, - { - "studyId": "study2", - "studyType": "eqtl", - "geneId": "gene1", - "traitFromSource": "trait2", - "projectId": "project2", - }, - ] - ), - _schema=StudyIndex.get_schema(), - ) - coloc = Colocalisation( - _df=spark.createDataFrame( - [ - { - "leftStudyLocusId": 1, - "rightStudyLocusId": 2, - "chromosome": "1", - "colocalisationMethod": "eCAVIAR", - "numberColocalisingVariants": 1, - "clpp": 0.81, # 0.9*0.9 - "log2h4h3": None, - } - ], - schema=Colocalisation.get_schema(), - ), - _schema=Colocalisation.get_schema(), - ) - expected_coloc_features_df = spark.createDataFrame( - [ - (1, "gene1", "eqtlColocClppMaximum", 0.81), - (1, "gene1", "eqtlColocClppMaximumNeighborhood", -4.0), - ], - L2GFeature.get_schema(), - ) - # Test - coloc_features = ColocalisationFactory._get_max_coloc_per_credible_set( - coloc, - credset, - studies, - ) - assert coloc_features.df.collect() == expected_coloc_features_df.collect() - - -class TestStudyLocusFactory: - """Test the StudyLocusFactory methods.""" - - def test_get_tss_distance_features( - self: TestStudyLocusFactory, mock_study_locus: StudyLocus, mock_v2g: V2G - ) -> None: - """Test the function that extracts the distance to the TSS.""" - tss_distance = StudyLocusFactory._get_tss_distance_features( - mock_study_locus, mock_v2g - ) - assert isinstance( - tss_distance, L2GFeature - ), "Unexpected model type returned from _get_tss_distance_features" - - def test_get_vep_features( - self: TestStudyLocusFactory, mock_study_locus: StudyLocus, mock_v2g: V2G - ) -> None: - """Test the function that extracts the VEP features.""" - vep_features = StudyLocusFactory._get_vep_features(mock_study_locus, mock_v2g) - assert isinstance( - vep_features, L2GFeature - ), "Unexpected model type returned from _get_vep_features" From a29222ed8d9bb8d023981626917eca6e659d2335 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Mon, 23 Sep 2024 19:42:51 +0200 Subject: [PATCH 047/188] feat(dataproc): ability to version gentropy for dataproc cluster (#774) --- .github/workflows/artifact.yml | 16 ++++++++++++ Makefile | 32 +++++++++++------------- docs/development/troubleshooting.md | 16 ++++++++++++ pyproject.toml | 2 +- utils/install_dependencies_on_cluster.sh | 4 --- 5 files changed, 48 insertions(+), 22 deletions(-) diff --git a/.github/workflows/artifact.yml b/.github/workflows/artifact.yml index 61dbab28e..2aa634a7e 100644 --- a/.github/workflows/artifact.yml +++ b/.github/workflows/artifact.yml @@ -10,6 +10,7 @@ env: REGION: europe-west1 GAR_LOCATION: europe-west1-docker.pkg.dev/open-targets-genetics-dev REPOSITORY: gentropy-app + PYTHON_VERSION_DEFAULT: "3.10.8" jobs: build-push-artifact: @@ -67,3 +68,18 @@ jobs: tags: "${{ env.GAR_LOCATION }}/${{ env.REPOSITORY }}/custom_ensembl_vep:${{ github.ref_name }}" context: . file: "src/vep/Dockerfile" + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ env.PYTHON_VERSION_DEFAULT }} + - name: Install and configure Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + + - name: Build and push spark cluster dependencies + run: | + make build diff --git a/Makefile b/Makefile index b83075558..1d79d35fd 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ PROJECT_ID ?= open-targets-genetics-dev REGION ?= europe-west1 -APP_NAME ?= $$(cat pyproject.toml| grep -m 1 "name" | cut -d" " -f3 | sed 's/"//g') -VERSION_NO ?= $$(poetry version --short) -CLEAN_VERSION_NO := $(shell echo "$(VERSION_NO)" | tr -cd '[:alnum:]') -BUCKET_NAME=gs://genetics_etl_python_playground/initialisation/${VERSION_NO}/ -BUCKET_COMPOSER_DAGS=gs://europe-west1-ot-workflows-fe147745-bucket/dags/ +APP_NAME ?= $$(cat pyproject.toml | grep -m 1 "name" | cut -d" " -f3 | sed 's/"//g') +REF ?= $$(git rev-parse --abbrev-ref HEAD) +PACKAGE_VERSION ?= $$(poetry version --short) +CLEAN_PACKAGE_VERSION := $(shell echo "$(PACKAGE_VERSION)" | tr -cd '[:alnum:]') +BUCKET_NAME=gs://genetics_etl_python_playground/initialisation/${APP_NAME}/${REF} .PHONY: $(shell sed -n -e '/^$$/ { n ; /^[^ .\#][^ ]*:/ { s/:.*$$// ; p ; } ; }' $(MAKEFILE_LIST)) @@ -38,35 +38,33 @@ build-documentation: ## Create local server with documentation create-dev-cluster: build ## Spin up a simple dataproc cluster with all dependencies for development purposes @echo "Creating Dataproc Dev Cluster" @gcloud config set project ${PROJECT_ID} - @gcloud dataproc clusters create "ot-genetics-dev-${CLEAN_VERSION_NO}-$(USER)" \ + @gcloud dataproc clusters create "ot-genetics-dev-${CLEAN_PACKAGE_VERSION}-$(USER)" \ --image-version 2.1 \ --region ${REGION} \ --master-machine-type n1-standard-16 \ - --initialization-actions=gs://genetics_etl_python_playground/initialisation/${VERSION_NO}/install_dependencies_on_cluster.sh \ - --metadata="PACKAGE=gs://genetics_etl_python_playground/initialisation/${VERSION_NO}/gentropy-${VERSION_NO}-py3-none-any.whl,CONFIGTAR=gs://genetics_etl_python_playground/initialisation/${VERSION_NO}/config.tar.gz" \ + --initialization-actions=$(BUCKET_NAME)/install_dependencies_on_cluster.sh \ + --metadata="PACKAGE=$(BUCKET_NAME)/${APP_NAME}-${PACKAGE_VERSION}-py3-none-any.whl" \ --secondary-worker-type spot \ --worker-machine-type n1-standard-4 \ --worker-boot-disk-size 500 \ --autoscaling-policy="projects/${PROJECT_ID}/regions/${REGION}/autoscalingPolicies/otg-etl" \ --optional-components=JUPYTER \ --enable-component-gateway \ - --max-idle=30m + --max-idle=60m make update-dev-cluster: build ## Reinstalls the package on the dev-cluster @echo "Updating Dataproc Dev Cluster" @gcloud config set project ${PROJECT_ID} - gcloud dataproc jobs submit pig --cluster="ot-genetics-dev-${CLEAN_VERSION_NO}" \ + gcloud dataproc jobs submit pig --cluster="ot-genetics-dev-${CLEAN_PACKAGE_VERSION}" \ --region ${REGION} \ --jars=${BUCKET_NAME}/install_dependencies_on_cluster.sh \ -e='sh chmod 750 $${PWD}/install_dependencies_on_cluster.sh; sh $${PWD}/install_dependencies_on_cluster.sh' build: clean ## Build Python package with dependencies @gcloud config set project ${PROJECT_ID} - @echo "Packaging Code and Dependencies for ${APP_NAME}-${VERSION_NO}" + @echo "Packaging Code and Dependencies for ${APP_NAME}-${PACKAGE_VERSION}" @poetry build - @tar -czf dist/config.tar.gz config/ - @echo "Uploading to Dataproc" - @gsutil cp src/gentropy/cli.py ${BUCKET_NAME} - @gsutil cp ./dist/${APP_NAME}-${VERSION_NO}-py3-none-any.whl ${BUCKET_NAME} - @gsutil cp ./dist/config.tar.gz ${BUCKET_NAME} - @gsutil cp ./utils/install_dependencies_on_cluster.sh ${BUCKET_NAME} + @echo "Uploading to ${BUCKET_NAME}" + @gsutil cp src/${APP_NAME}/cli.py ${BUCKET_NAME}/ + @gsutil cp ./dist/${APP_NAME}-${PACKAGE_VERSION}-py3-none-any.whl ${BUCKET_NAME}/ + @gsutil cp ./utils/install_dependencies_on_cluster.sh ${BUCKET_NAME}/ diff --git a/docs/development/troubleshooting.md b/docs/development/troubleshooting.md index a30f72be0..498ee3b86 100644 --- a/docs/development/troubleshooting.md +++ b/docs/development/troubleshooting.md @@ -49,3 +49,19 @@ Some functions on MacOS may throw a java error: This can be resolved by adding the follow line to your `~/.zshrc`: `export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES` + +## Creating development dataproc cluster (OT users only) + +To start dataproc cluster in the development mode run + +``` +make create-dev-cluster +``` + +The command above will prepare 3 different resources: + +- gentropy package +- cli script +- cluster setup script + +and based on the branch ref (for example `dev`) will create a namespaced folder under GCS (`gs://genetics_etl_python_playground/initialisation/gentropy/dev`) with the three files described above. These files will be then used to create the cluster environment. diff --git a/pyproject.toml b/pyproject.toml index 1343c7b50..8e5469c6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ google = "^3.0.0" omegaconf = "^2.3.0" typing-extensions = "^4.9.0" scikit-learn = "^1.3.2" -pandas = {extras = ["gcp", "parquet"], version = "^2.2.2"} +pandas = { extras = ["gcp", "parquet"], version = "^2.2.2" } skops = ">=0.9,<0.11" google-cloud-secret-manager = "^2.20.0" diff --git a/utils/install_dependencies_on_cluster.sh b/utils/install_dependencies_on_cluster.sh index 9f26b9f17..6b76a7d60 100644 --- a/utils/install_dependencies_on_cluster.sh +++ b/utils/install_dependencies_on_cluster.sh @@ -3,7 +3,6 @@ set -exo pipefail readonly PACKAGE=$(/usr/share/google/get_metadata_value attributes/PACKAGE || true) -readonly CONFIGTAR=$(/usr/share/google/get_metadata_value attributes/CONFIGTAR || true) function err() { echo "[$(date +'%Y-%m-%dT%H:%M:%S%z')]: $*" >&2 @@ -63,9 +62,6 @@ function main() { echo "Install package..." run_with_retry pip install --upgrade ${PACKAGENAME} - echo "Downloading and uncompressing config..." - gsutil cp ${CONFIGTAR} . || err "Failed to download CONFIGTAR" - tar -xvf $(basename ${CONFIGTAR}) || err "Failed to extract CONFIGTAR" } main From dcacaf7d2694818c4d60545944ba6d40f27e45a8 Mon Sep 17 00:00:00 2001 From: Vivien Ho <56025826+vivienho@users.noreply.github.com> Date: Tue, 24 Sep 2024 14:37:11 +0100 Subject: [PATCH 048/188] feat: add `studyType` to `StudyLocus` and `Colocalisation` (and `StudyLocusOverlap`) (#782) * feat: add studyType to StudyLocus schema * feat: add annotate_study_type function to add studyType to StudyLocus * fix: remove lines for retrieving studyType as StudyLocus now contains studyType * fix: add studyType to test input data as StudyLocus now contains studyType * feat: add leftStudyType and rightStudyType to Colocalisation and StudyLocusOverlap schemas * feat: update _convert_to_square_matrix and its test with leftStudyType and rightStudyType * feat: update test_find_overlaps_semantic inputs with leftStudyType and rightStudyType * feat: update tests in test_colocalisation_method.py with leftStudyType and rightStudyType * feat: add leftStudyType and rightStudyType when creating StudyLocusOverlap * feat: add leftStudyType and rightStudyType to Colocalisation results * fix: remove redundant study_index parameter from filter_by_study_type function def and calls * fix: remove redundant study_index parameter from find_overlaps function def and calls * fix: remove leftStudyType from Colocalisation (not needed as always gwas) * fix: remove leftStudyType from StudyLocusOverlap (not needed as always gwas) * fix: missing comma * feat: update tests in test_locus_to_gene.py with studyType and rightStudyType * feat: update tests (colocalisation, l2g, l2g feature matrix) with rightStudyType * fix: remove studyType from metadata_cols in append_study_metadata function call --- .../assets/schemas/colocalisation.json | 6 ++ src/gentropy/assets/schemas/study_locus.json | 6 ++ .../assets/schemas/study_locus_overlap.json | 6 ++ src/gentropy/colocalisation.py | 8 +-- src/gentropy/dataset/colocalisation.py | 2 +- src/gentropy/dataset/study_locus.py | 31 ++++++++-- src/gentropy/dataset/study_locus_overlap.py | 7 +-- src/gentropy/l2g.py | 4 +- src/gentropy/method/colocalisation.py | 4 +- src/gentropy/study_locus_validation.py | 1 + tests/gentropy/dataset/test_colocalisation.py | 3 +- tests/gentropy/dataset/test_l2g.py | 4 +- .../dataset/test_l2g_feature_matrix.py | 3 +- tests/gentropy/dataset/test_study_locus.py | 58 ++++--------------- .../dataset/test_study_locus_overlap.py | 10 ++-- .../dataset/test_study_locus_overlaps.py | 8 +-- .../method/test_colocalisation_method.py | 5 ++ 17 files changed, 83 insertions(+), 83 deletions(-) diff --git a/src/gentropy/assets/schemas/colocalisation.json b/src/gentropy/assets/schemas/colocalisation.json index 7ff7453b9..6e1163cfe 100644 --- a/src/gentropy/assets/schemas/colocalisation.json +++ b/src/gentropy/assets/schemas/colocalisation.json @@ -13,6 +13,12 @@ "type": "long", "metadata": {} }, + { + "name": "rightStudyType", + "nullable": false, + "type": "string", + "metadata": {} + }, { "name": "chromosome", "nullable": false, diff --git a/src/gentropy/assets/schemas/study_locus.json b/src/gentropy/assets/schemas/study_locus.json index 11908f687..a8d15aba6 100644 --- a/src/gentropy/assets/schemas/study_locus.json +++ b/src/gentropy/assets/schemas/study_locus.json @@ -6,6 +6,12 @@ "nullable": false, "type": "long" }, + { + "metadata": {}, + "name": "studyType", + "nullable": true, + "type": "string" + }, { "metadata": {}, "name": "variantId", diff --git a/src/gentropy/assets/schemas/study_locus_overlap.json b/src/gentropy/assets/schemas/study_locus_overlap.json index 9a8e123cd..22ba7705e 100644 --- a/src/gentropy/assets/schemas/study_locus_overlap.json +++ b/src/gentropy/assets/schemas/study_locus_overlap.json @@ -12,6 +12,12 @@ "nullable": false, "type": "long" }, + { + "metadata": {}, + "name": "rightStudyType", + "nullable": false, + "type": "string" + }, { "metadata": {}, "name": "chromosome", diff --git a/src/gentropy/colocalisation.py b/src/gentropy/colocalisation.py index 6b370d426..4f8431b98 100644 --- a/src/gentropy/colocalisation.py +++ b/src/gentropy/colocalisation.py @@ -8,7 +8,6 @@ from pyspark.sql.functions import col from gentropy.common.session import Session -from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import CredibleInterval, StudyLocus from gentropy.method.colocalisation import Coloc @@ -23,7 +22,6 @@ def __init__( self, session: Session, credible_set_path: str, - study_index_path: str, coloc_path: str, colocalisation_method: str, ) -> None: @@ -32,7 +30,6 @@ def __init__( Args: session (Session): Session object. credible_set_path (str): Input credible sets path. - study_index_path (str): Input study index path. coloc_path (str): Output Colocalisation path. colocalisation_method (str): Colocalisation method. """ @@ -47,14 +44,11 @@ def __init__( session, credible_set_path, recursiveFileLookup=True ) ) - si = StudyIndex.from_parquet( - session, study_index_path, recursiveFileLookup=True - ) # Transform overlaps = credible_set.filter_credible_set( CredibleInterval.IS95 - ).find_overlaps(si) + ).find_overlaps() colocalisation_results = colocalisation_class.colocalise(overlaps) # type: ignore # Load diff --git a/src/gentropy/dataset/colocalisation.py b/src/gentropy/dataset/colocalisation.py index c0d074ae3..94a4f09dc 100644 --- a/src/gentropy/dataset/colocalisation.py +++ b/src/gentropy/dataset/colocalisation.py @@ -91,7 +91,7 @@ def extract_maximum_coloc_probability_per_region_and_gene( self.append_study_metadata( study_locus, study_index, - metadata_cols=["studyType", "geneId"], + metadata_cols=["geneId"], colocalisation_side="right", ) # it also filters based on method and qtl type diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index e8363aa4e..57482fda8 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -157,6 +157,24 @@ def validate_study(self: StudyLocus, study_index: StudyIndex) -> StudyLocus: _schema=self.get_schema(), ) + def annotate_study_type(self: StudyLocus, study_index: StudyIndex) -> StudyLocus: + """Gets study type from study index and adds it to study locus. + + Args: + study_index (StudyIndex): Study index to get study type. + + Returns: + StudyLocus: Updated study locus with study type. + """ + return StudyLocus( + _df=( + self.df + .drop("studyType") + .join(study_index.study_type_lut(), on="studyId", how="left") + ), + _schema=self.get_schema(), + ) + def validate_variant_identifiers( self: StudyLocus, variant_index: VariantIndex ) -> StudyLocus: @@ -394,6 +412,7 @@ def _align_overlapping_tags( f.col("chromosome"), f.col("tagVariantId"), f.col("studyLocusId").alias("rightStudyLocusId"), + f.col("studyType").alias("rightStudyType"), *[f.col(col).alias(f"right_{col}") for col in stats_cols], ).join(peak_overlaps, on=["chromosome", "rightStudyLocusId"], how="inner") @@ -410,6 +429,7 @@ def _align_overlapping_tags( ).select( "leftStudyLocusId", "rightStudyLocusId", + "rightStudyType", "chromosome", "tagVariantId", f.struct( @@ -505,13 +525,12 @@ def get_QC_mappings(cls: type[StudyLocus]) -> dict[str, str]: return {member.name: member.value for member in StudyLocusQualityCheck} def filter_by_study_type( - self: StudyLocus, study_type: str, study_index: StudyIndex + self: StudyLocus, study_type: str ) -> StudyLocus: """Creates a new StudyLocus dataset filtered by study type. Args: study_type (str): Study type to filter for. Can be one of `gwas`, `eqtl`, `pqtl`, `eqtl`. - study_index (StudyIndex): Study index to resolve study types. Returns: StudyLocus: Filtered study-locus dataset. @@ -524,7 +543,7 @@ def filter_by_study_type( f"Study type {study_type} not supported. Supported types are: gwas, eqtl, pqtl, sqtl." ) new_df = ( - self.df.join(study_index.study_type_lut(), on="studyId", how="inner") + self.df .filter(f.col("studyType") == study_type) .drop("studyType") ) @@ -576,7 +595,7 @@ def filter_ld_set(ld_set: Column, r2_threshold: float) -> Column: ) def find_overlaps( - self: StudyLocus, study_index: StudyIndex, intra_study_overlap: bool = False + self: StudyLocus, intra_study_overlap: bool = False ) -> StudyLocusOverlap: """Calculate overlapping study-locus. @@ -584,14 +603,14 @@ def find_overlaps( appearing on the right side. Args: - study_index (StudyIndex): Study index to resolve study types. intra_study_overlap (bool): If True, finds intra-study overlaps for credible set deduplication. Default is False. Returns: StudyLocusOverlap: Pairs of overlapping study-locus with aligned tags. """ loci_to_overlap = ( - self.df.join(study_index.study_type_lut(), on="studyId", how="inner") + self.df + .filter(f.col("studyType").isNotNull()) .withColumn("locus", f.explode("locus")) .select( "studyLocusId", diff --git a/src/gentropy/dataset/study_locus_overlap.py b/src/gentropy/dataset/study_locus_overlap.py index 5f839bd9c..d14a2da96 100644 --- a/src/gentropy/dataset/study_locus_overlap.py +++ b/src/gentropy/dataset/study_locus_overlap.py @@ -10,7 +10,6 @@ if TYPE_CHECKING: from pyspark.sql.types import StructType - from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus @@ -36,18 +35,17 @@ def get_schema(cls: type[StudyLocusOverlap]) -> StructType: @classmethod def from_associations( - cls: type[StudyLocusOverlap], study_locus: StudyLocus, study_index: StudyIndex + cls: type[StudyLocusOverlap], study_locus: StudyLocus ) -> StudyLocusOverlap: """Find the overlapping signals in a particular set of associations (StudyLocus dataset). Args: study_locus (StudyLocus): Study-locus associations to find the overlapping signals - study_index (StudyIndex): Study index to find the overlapping signals Returns: StudyLocusOverlap: Study-locus overlap dataset """ - return study_locus.find_overlaps(study_index) + return study_locus.find_overlaps() def _convert_to_square_matrix(self: StudyLocusOverlap) -> StudyLocusOverlap: """Convert the dataset to a square matrix. @@ -60,6 +58,7 @@ def _convert_to_square_matrix(self: StudyLocusOverlap) -> StudyLocusOverlap: self.df.selectExpr( "leftStudyLocusId as rightStudyLocusId", "rightStudyLocusId as leftStudyLocusId", + "rightStudyType", "tagVariantId", ) ).distinct(), diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 832023cd8..13dbb881b 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -204,7 +204,7 @@ def _generate_feature_matrix(self, write_feature_matrix: bool) -> L2GFeatureMatr ValueError: If write_feature_matrix is set to True but a path is not provided. ValueError: If dependencies to build features are not set. """ - if self.gs_curation and self.interactions and self.v2g and self.studies: + if self.gs_curation and self.interactions and self.v2g: study_locus_overlap = StudyLocus( _df=self.credible_set.df.join( f.broadcast( @@ -225,7 +225,7 @@ def _generate_feature_matrix(self, write_feature_matrix: bool) -> L2GFeatureMatr "inner", ), _schema=StudyLocus.get_schema(), - ).find_overlaps(self.studies) + ).find_overlaps() gold_standards = L2GGoldStandard.from_otg_curation( gold_standard_curation=self.gs_curation, diff --git a/src/gentropy/method/colocalisation.py b/src/gentropy/method/colocalisation.py index c3320f931..7a3a0d9c5 100644 --- a/src/gentropy/method/colocalisation.py +++ b/src/gentropy/method/colocalisation.py @@ -79,7 +79,7 @@ def colocalise( f.col("statistics.right_posteriorProbability"), ), ) - .groupBy("leftStudyLocusId", "rightStudyLocusId", "chromosome") + .groupBy("leftStudyLocusId", "rightStudyLocusId", "rightStudyType", "chromosome") .agg( f.count("*").alias("numberColocalisingVariants"), f.sum(f.col("clpp")).alias("clpp"), @@ -168,7 +168,7 @@ def colocalise( f.col("left_logBF") + f.col("right_logBF"), ) # Group by overlapping peak and generating dense vectors of log_BF: - .groupBy("chromosome", "leftStudyLocusId", "rightStudyLocusId") + .groupBy("chromosome", "leftStudyLocusId", "rightStudyLocusId", "rightStudyType") .agg( f.count("*").alias("numberColocalisingVariants"), fml.array_to_vector(f.collect_list(f.col("left_logBF"))).alias( diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index e3d10f3db..4d1c234dc 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -46,6 +46,7 @@ def __init__( # Add flag for MHC region .qc_MHC_region() .validate_study(study_index) # Flagging studies not in study index + .annotate_study_type(study_index) # Add study type to study locus .qc_redundant_top_hits_from_PICS() # Flagging top hits from studies with PICS summary statistics .validate_unique_study_locus_id() # Flagging duplicated study locus ids ).persist() # we will need this for 2 types of outputs diff --git a/tests/gentropy/dataset/test_colocalisation.py b/tests/gentropy/dataset/test_colocalisation.py index 5371cf42c..8f2766fb4 100644 --- a/tests/gentropy/dataset/test_colocalisation.py +++ b/tests/gentropy/dataset/test_colocalisation.py @@ -100,10 +100,11 @@ def _setup(self: TestAppendStudyMetadata, spark: SparkSession) -> None: ) self.sample_colocalisation = Colocalisation( _df=spark.createDataFrame( - [(1, 2, "X", "COLOC", 1, 0.9)], + [(1, 2, "eqtl", "X", "COLOC", 1, 0.9)], [ "leftStudyLocusId", "rightStudyLocusId", + "rightStudyType", "chromosome", "colocalisationMethod", "numberColocalisingVariants", diff --git a/tests/gentropy/dataset/test_l2g.py b/tests/gentropy/dataset/test_l2g.py index d37ce5a4a..2523b97dd 100644 --- a/tests/gentropy/dataset/test_l2g.py +++ b/tests/gentropy/dataset/test_l2g.py @@ -70,8 +70,8 @@ def test_filter_unique_associations(spark: SparkSession) -> None: ) mock_sl_overlap_df = spark.createDataFrame( - [(1, 2, "variant2"), (1, 4, "variant4")], - "leftStudyLocusId LONG, rightStudyLocusId LONG, tagVariantId STRING", + [(1, 2, "eqtl", "variant2"), (1, 4, "eqtl", "variant4")], + "leftStudyLocusId LONG, rightStudyLocusId LONG, rightStudyType STRING, tagVariantId STRING", ) expected_df = spark.createDataFrame( diff --git a/tests/gentropy/dataset/test_l2g_feature_matrix.py b/tests/gentropy/dataset/test_l2g_feature_matrix.py index 46384239c..09460ee85 100644 --- a/tests/gentropy/dataset/test_l2g_feature_matrix.py +++ b/tests/gentropy/dataset/test_l2g_feature_matrix.py @@ -136,10 +136,11 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: ) self.sample_colocalisation = Colocalisation( _df=spark.createDataFrame( - [(1, 2, "X", "COLOC", 1, 0.9)], + [(1, 2, "eqtl", "X", "COLOC", 1, 0.9)], [ "leftStudyLocusId", "rightStudyLocusId", + "rightStudyType", "chromosome", "colocalisationMethod", "numberColocalisingVariants", diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index c89521b3c..ffb210d09 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -43,6 +43,7 @@ { "leftStudyLocusId": 1, "rightStudyLocusId": 2, + "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "commonTag", "statistics": { @@ -53,6 +54,7 @@ { "leftStudyLocusId": 1, "rightStudyLocusId": 2, + "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "nonCommonTag", "statistics": { @@ -79,6 +81,7 @@ def test_find_overlaps_semantic( "studyLocusId": 1, "variantId": "lead1", "studyId": "study1", + "studyType": "gwas", "locus": [ {"variantId": "commonTag", "posteriorProbability": 0.9}, ], @@ -88,6 +91,7 @@ def test_find_overlaps_semantic( "studyLocusId": 2, "variantId": "lead2", "studyId": "study2", + "studyType": "eqtl", "locus": [ {"variantId": "commonTag", "posteriorProbability": 0.6}, {"variantId": "nonCommonTag", "posteriorProbability": 0.6}, @@ -108,6 +112,7 @@ def test_find_overlaps_semantic( "studyLocusId": 1, "variantId": "lead1", "studyId": "study1", + "studyType": "gwas", "locus": [ {"variantId": "var1", "posteriorProbability": 0.9}, ], @@ -117,6 +122,7 @@ def test_find_overlaps_semantic( "studyLocusId": 2, "variantId": "lead2", "studyId": "study2", + "studyType": "eqtl", "locus": None, "chromosome": "1", }, @@ -126,25 +132,6 @@ def test_find_overlaps_semantic( _schema=StudyLocus.get_schema(), ) - studies = StudyIndex( - _df=spark.createDataFrame( - [ - { - "studyId": "study1", - "studyType": "gwas", - "traitFromSource": "trait1", - "projectId": "project1", - }, - { - "studyId": "study2", - "studyType": "eqtl", - "traitFromSource": "trait2", - "projectId": "project2", - }, - ] - ), - _schema=StudyIndex.get_schema(), - ) expected_overlaps_df = spark.createDataFrame( expected, StudyLocusOverlap.get_schema() ) @@ -154,18 +141,14 @@ def test_find_overlaps_semantic( "statistics.right_posteriorProbability", ] assert ( - credset.find_overlaps(studies).df.select(*cols_to_compare).collect() + credset.find_overlaps().df.select(*cols_to_compare).collect() == expected_overlaps_df.select(*cols_to_compare).collect() ), "Overlaps differ from expected." -def test_find_overlaps( - mock_study_locus: StudyLocus, mock_study_index: StudyIndex -) -> None: +def test_find_overlaps(mock_study_locus: StudyLocus) -> None: """Test study locus overlaps.""" - assert isinstance( - mock_study_locus.find_overlaps(mock_study_index), StudyLocusOverlap - ) + assert isinstance(mock_study_locus.find_overlaps(), StudyLocusOverlap) @pytest.mark.parametrize( @@ -184,39 +167,22 @@ def test_filter_by_study_type( "studyLocusId": 1, "variantId": "lead1", "studyId": "study1", + "studyType": "gwas", }, { # from eqtl "studyLocusId": 2, "variantId": "lead2", "studyId": "study2", + "studyType": "eqtl", }, ], StudyLocus.get_schema(), ), _schema=StudyLocus.get_schema(), ) - studies = StudyIndex( - _df=spark.createDataFrame( - [ - { - "studyId": "study1", - "studyType": "gwas", - "traitFromSource": "trait1", - "projectId": "project1", - }, - { - "studyId": "study2", - "studyType": "eqtl", - "traitFromSource": "trait2", - "projectId": "project2", - }, - ] - ), - _schema=StudyIndex.get_schema(), - ) - observed = sl.filter_by_study_type(study_type, studies) + observed = sl.filter_by_study_type(study_type) assert observed.df.count() == expected_sl_count diff --git a/tests/gentropy/dataset/test_study_locus_overlap.py b/tests/gentropy/dataset/test_study_locus_overlap.py index e26b59c30..7e591df30 100644 --- a/tests/gentropy/dataset/test_study_locus_overlap.py +++ b/tests/gentropy/dataset/test_study_locus_overlap.py @@ -19,19 +19,19 @@ def test_convert_to_square_matrix(spark: SparkSession) -> None: mock_sl_overlap = StudyLocusOverlap( _df=spark.createDataFrame( [ - (1, 2, "variant2"), + (1, 2, "eqtl", "variant2"), ], - "leftStudyLocusId LONG, rightStudyLocusId LONG, tagVariantId STRING", + "leftStudyLocusId LONG, rightStudyLocusId LONG, rightStudyType STRING, tagVariantId STRING", ), _schema=StudyLocusOverlap.get_schema(), ) expected_df = spark.createDataFrame( [ - (1, 2, "variant2"), - (2, 1, "variant2"), + (1, 2, "eqtl", "variant2"), + (2, 1, "eqtl", "variant2"), ], - "leftStudyLocusId LONG, rightStudyLocusId LONG, tagVariantId STRING", + "leftStudyLocusId LONG, rightStudyLocusId LONG, rightStudyType STRING, tagVariantId STRING", ) observed_df = mock_sl_overlap._convert_to_square_matrix().df diff --git a/tests/gentropy/dataset/test_study_locus_overlaps.py b/tests/gentropy/dataset/test_study_locus_overlaps.py index bd3415959..745f07ed2 100644 --- a/tests/gentropy/dataset/test_study_locus_overlaps.py +++ b/tests/gentropy/dataset/test_study_locus_overlaps.py @@ -13,8 +13,6 @@ if TYPE_CHECKING: from pyspark.sql import SparkSession - from gentropy.dataset.study_index import StudyIndex - def test_study_locus_overlap_creation( mock_study_locus_overlap: StudyLocusOverlap, @@ -23,11 +21,9 @@ def test_study_locus_overlap_creation( assert isinstance(mock_study_locus_overlap, StudyLocusOverlap) -def test_study_locus_overlap_from_associations( - mock_study_locus: StudyLocus, mock_study_index: StudyIndex -) -> None: +def test_study_locus_overlap_from_associations(mock_study_locus: StudyLocus) -> None: """Test colocalisation creation from mock associations.""" - overlaps = StudyLocusOverlap.from_associations(mock_study_locus, mock_study_index) + overlaps = StudyLocusOverlap.from_associations(mock_study_locus) assert isinstance(overlaps, StudyLocusOverlap) diff --git a/tests/gentropy/method/test_colocalisation_method.py b/tests/gentropy/method/test_colocalisation_method.py index d6798d831..e292784c1 100644 --- a/tests/gentropy/method/test_colocalisation_method.py +++ b/tests/gentropy/method/test_colocalisation_method.py @@ -29,6 +29,7 @@ def test_coloc(mock_study_locus_overlap: StudyLocusOverlap) -> None: { "leftStudyLocusId": 1, "rightStudyLocusId": 2, + "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "snp", "statistics": {"left_logBF": 10.3, "right_logBF": 10.5}, @@ -52,6 +53,7 @@ def test_coloc(mock_study_locus_overlap: StudyLocusOverlap) -> None: { "leftStudyLocusId": 1, "rightStudyLocusId": 2, + "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "snp1", "statistics": {"left_logBF": 10.3, "right_logBF": 10.5}, @@ -59,6 +61,7 @@ def test_coloc(mock_study_locus_overlap: StudyLocusOverlap) -> None: { "leftStudyLocusId": 1, "rightStudyLocusId": 2, + "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "snp2", "statistics": {"left_logBF": 10.3, "right_logBF": 10.5}, @@ -119,6 +122,7 @@ def test_coloc_no_logbf( { "leftStudyLocusId": 1, "rightStudyLocusId": 2, + "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "snp", "statistics": { @@ -131,6 +135,7 @@ def test_coloc_no_logbf( [ StructField("leftStudyLocusId", LongType(), False), StructField("rightStudyLocusId", LongType(), False), + StructField("rightStudyType", StringType(), False), StructField("chromosome", StringType(), False), StructField("tagVariantId", StringType(), False), StructField( From df45a6c9a7b925745f919a42f03f11a4c6eaaab5 Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Tue, 24 Sep 2024 15:58:42 +0100 Subject: [PATCH 049/188] feat: adding window based clumping to StudyLocus (#779) * feat: adding window based clumping to locus * fix: reverting some changes * chore: pre-commit auto fixes [...] * fix: fixing probem introduced by merge conflict * fix: addressing review comment --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- src/gentropy/dataset/study_locus.py | 42 +++++++----- src/gentropy/dataset/summary_statistics.py | 7 +- src/gentropy/gwas_catalog_ingestion.py | 10 ++- src/gentropy/method/window_based_clumping.py | 51 +++++++++------ tests/gentropy/dataset/test_study_locus.py | 68 ++++++++++++++++++++ 5 files changed, 138 insertions(+), 40 deletions(-) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 57482fda8..2385df984 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -17,6 +17,7 @@ order_array_of_structs_by_field, ) from gentropy.common.utils import get_logsum +from gentropy.config import WindowBasedClumpingStepConfig from gentropy.dataset.dataset import Dataset from gentropy.dataset.study_locus_overlap import StudyLocusOverlap from gentropy.dataset.variant_index import VariantIndex @@ -45,7 +46,8 @@ class StudyLocusQualityCheck(Enum): PALINDROMIC_ALLELE_FLAG (str): Alleles are palindromic - cannot harmonize AMBIGUOUS_STUDY (str): Association with ambiguous study UNRESOLVED_LD (str): Variant not found in LD reference - LD_CLUMPED (str): Explained by a more significant variant in high LD (clumped) + LD_CLUMPED (str): Explained by a more significant variant in high LD + WINDOW_CLUMPED (str): Explained by a more significant variant in the same window NO_POPULATION (str): Study does not have population annotation to resolve LD NOT_QUALIFYING_LD_BLOCK (str): LD block does not contain variants at the required R^2 threshold FAILED_STUDY (str): Flagging study loci if the study has failed QC @@ -65,7 +67,8 @@ class StudyLocusQualityCheck(Enum): PALINDROMIC_ALLELE_FLAG = "Palindrome alleles - cannot harmonize" AMBIGUOUS_STUDY = "Association with ambiguous study" UNRESOLVED_LD = "Variant not found in LD reference" - LD_CLUMPED = "Explained by a more significant variant in high LD (clumped)" + LD_CLUMPED = "Explained by a more significant variant in high LD" + WINDOW_CLUMPED = "Explained by a more significant variant in the same window" NO_POPULATION = "Study does not have population annotation to resolve LD" NOT_QUALIFYING_LD_BLOCK = ( "LD block does not contain variants at the required R^2 threshold" @@ -168,9 +171,9 @@ def annotate_study_type(self: StudyLocus, study_index: StudyIndex) -> StudyLocus """ return StudyLocus( _df=( - self.df - .drop("studyType") - .join(study_index.study_type_lut(), on="studyId", how="left") + self.df.drop("studyType").join( + study_index.study_type_lut(), on="studyId", how="left" + ) ), _schema=self.get_schema(), ) @@ -524,9 +527,7 @@ def get_QC_mappings(cls: type[StudyLocus]) -> dict[str, str]: """ return {member.name: member.value for member in StudyLocusQualityCheck} - def filter_by_study_type( - self: StudyLocus, study_type: str - ) -> StudyLocus: + def filter_by_study_type(self: StudyLocus, study_type: str) -> StudyLocus: """Creates a new StudyLocus dataset filtered by study type. Args: @@ -542,11 +543,7 @@ def filter_by_study_type( raise ValueError( f"Study type {study_type} not supported. Supported types are: gwas, eqtl, pqtl, sqtl." ) - new_df = ( - self.df - .filter(f.col("studyType") == study_type) - .drop("studyType") - ) + new_df = self.df.filter(f.col("studyType") == study_type).drop("studyType") return StudyLocus( _df=new_df, _schema=self._schema, @@ -609,8 +606,7 @@ def find_overlaps( StudyLocusOverlap: Pairs of overlapping study-locus with aligned tags. """ loci_to_overlap = ( - self.df - .filter(f.col("studyType").isNotNull()) + self.df.filter(f.col("studyType").isNotNull()) .withColumn("locus", f.explode("locus")) .select( "studyLocusId", @@ -1051,3 +1047,19 @@ def annotate_locus_statistics_boundaries( ) return self + + def window_based_clumping( + self: StudyLocus, + window_size: int = WindowBasedClumpingStepConfig().distance, + ) -> StudyLocus: + """Clump study locus by window size. + + Args: + window_size (int): Window size for clumping. + + Returns: + StudyLocus: Clumped study locus, where clumped associations are flagged. + """ + from gentropy.method.window_based_clumping import WindowBasedClumping + + return WindowBasedClumping.clump(self, window_size) diff --git a/src/gentropy/dataset/summary_statistics.py b/src/gentropy/dataset/summary_statistics.py index d0875fe85..25edbeca7 100644 --- a/src/gentropy/dataset/summary_statistics.py +++ b/src/gentropy/dataset/summary_statistics.py @@ -77,10 +77,11 @@ def window_based_clumping( from gentropy.method.window_based_clumping import WindowBasedClumping return WindowBasedClumping.clump( - self, + # Before clumping, we filter the summary statistics by p-value: + self.pvalue_filter(gwas_significance), distance=distance, - gwas_significance=gwas_significance, - ) + # After applying the clumping, we filter the clumped loci by the flag: + ).valid_rows(["WINDOW_CLUMPED"]) def locus_breaker_clumping( self: SummaryStatistics, diff --git a/src/gentropy/gwas_catalog_ingestion.py b/src/gentropy/gwas_catalog_ingestion.py index 725f1ca4d..5dab5bf16 100644 --- a/src/gentropy/gwas_catalog_ingestion.py +++ b/src/gentropy/gwas_catalog_ingestion.py @@ -3,6 +3,7 @@ from __future__ import annotations from gentropy.common.session import Session +from gentropy.config import WindowBasedClumpingStepConfig from gentropy.dataset.variant_index import VariantIndex from gentropy.datasource.gwas_catalog.associations import ( GWASCatalogCuratedAssociationsParser, @@ -30,6 +31,7 @@ def __init__( gnomad_variant_path: str, catalog_studies_out: str, catalog_associations_out: str, + distance: int = WindowBasedClumpingStepConfig().distance, gwas_catalog_study_curation_file: str | None = None, inclusion_list_path: str | None = None, ) -> None: @@ -44,6 +46,7 @@ def __init__( gnomad_variant_path (str): Path to GnomAD variants. catalog_studies_out (str): Output GWAS catalog studies path. catalog_associations_out (str): Output GWAS catalog associations path. + distance (int): Distance, within which tagging variants are collected around the semi-index. gwas_catalog_study_curation_file (str | None): file of the curation table. Optional. inclusion_list_path (str | None): optional inclusion list (parquet) """ @@ -86,4 +89,9 @@ def __init__( # Load study_index.df.write.mode(session.write_mode).parquet(catalog_studies_out) - study_locus.df.write.mode(session.write_mode).parquet(catalog_associations_out) + + ( + study_locus.window_based_clumping(distance) + .df.write.mode(session.write_mode) + .parquet(catalog_associations_out) + ) diff --git a/src/gentropy/method/window_based_clumping.py b/src/gentropy/method/window_based_clumping.py index 629fe627e..9ef747abf 100644 --- a/src/gentropy/method/window_based_clumping.py +++ b/src/gentropy/method/window_based_clumping.py @@ -12,7 +12,7 @@ from pyspark.sql.window import Window from gentropy.config import WindowBasedClumpingStepConfig -from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.study_locus import StudyLocus, StudyLocusQualityCheck if TYPE_CHECKING: from numpy.typing import NDArray @@ -154,22 +154,38 @@ def _prune_peak(position: NDArray[np.float64], window_size: int) -> DenseVector: @staticmethod def clump( - summary_statistics: SummaryStatistics, + unclumped_associations: SummaryStatistics | StudyLocus, distance: int = WindowBasedClumpingStepConfig().distance, - gwas_significance: float = WindowBasedClumpingStepConfig().gwas_significance, ) -> StudyLocus: - """Clump significant signals from summary statistics based on window. + """Clump single point associations from summary statistics or study locus dataset based on window. Args: - summary_statistics (SummaryStatistics): Summary statistics to be used for clumping. + unclumped_associations (SummaryStatistics | StudyLocus): Input dataset to be used for clumping. Assumes that the input dataset is already filtered for significant variants. distance (int): Distance in base pairs to be used for clumping. Defaults to 500_000. - gwas_significance (float): GWAS significance threshold. Defaults to 5e-8. Returns: - StudyLocus: clumped summary statistics (without locus collection) - - Check WindowBasedClumpingStepConfig object for default values + StudyLocus: clumped associations, where the clumped variants are flagged. """ + # Quality check expression that flags variants that are not considered lead variant: + qc_check = f.col("semiIndices")[f.col("pvRank") - 1] <= 0 + + # The quality control expression will depend on the input dataset, as the column might be already present: + qc_expression = ( + # When the column is already present and the condition is met, the value is appended to the array, otherwise keep as is: + f.when( + qc_check, + f.array_union( + f.col("qualityControls"), + f.array(f.lit(StudyLocusQualityCheck.WINDOW_CLUMPED.value)), + ), + ).otherwise(f.col("qualityControls")) + if "qualityControls" in unclumped_associations.df.columns + # If column is not there yet, initialize it with the flag value, or an empty array: + else f.when( + qc_check, f.array(f.lit(StudyLocusQualityCheck.WINDOW_CLUMPED.value)) + ).otherwise(f.array().cast(t.ArrayType(t.StringType()))) + ) + # Create window for locus clusters # - variants where the distance between subsequent variants is below the defined threshold. # - Variants are sorted by descending significance @@ -179,11 +195,8 @@ def clump( return StudyLocus( _df=( - summary_statistics - # Dropping snps below significance - all subsequent steps are done on significant variants: - .pvalue_filter(gwas_significance) - .df - # Clustering summary variants for efficient windowing (complexity reduction): + unclumped_associations.df + # Clustering variants for efficient windowing (complexity reduction): .withColumn( "cluster_id", WindowBasedClumping._cluster_peaks( @@ -207,7 +220,7 @@ def clump( ), ).otherwise(f.array()), ) - # Get semi indices only ONCE per cluster: + # Collect top loci per cluster: .withColumn( "semiIndices", f.when( @@ -230,9 +243,6 @@ def clump( ), ).otherwise(f.col("semiIndices")), ) - # Keeping semi indices only: - .filter(f.col("semiIndices")[f.col("pvRank") - 1] > 0) - .drop("pvRank", "collectedPositions", "semiIndices", "cluster_id") # Adding study-locus id: .withColumn( "studyLocusId", @@ -241,9 +251,8 @@ def clump( ), ) # Initialize QC column as array of strings: - .withColumn( - "qualityControls", f.array().cast(t.ArrayType(t.StringType())) - ) + .withColumn("qualityControls", qc_expression) + .drop("pvRank", "collectedPositions", "semiIndices", "cluster_id") ), _schema=StudyLocus.get_schema(), ) diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index ffb210d09..51fc2ed92 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -749,6 +749,74 @@ def test_study_validation_correctness(self: TestStudyLocusValidation) -> None: ) == 1 +class TestStudyLocusWindowClumping: + """Testing window-based clumping on study locus.""" + + TEST_DATASET = [ + ("s1", "c1", 1, -1), + ("s1", "c1", 2, -2), + ("s1", "c1", 3, -3), + ("s2", "c2", 2, -2), + ("s3", "c2", 2, -2), + ] + + TEST_SCHEMA = t.StructType( + [ + t.StructField("studyId", t.StringType(), False), + t.StructField("chromosome", t.StringType(), False), + t.StructField("position", t.IntegerType(), False), + t.StructField("pValueExponent", t.IntegerType(), False), + ] + ) + + @pytest.fixture(autouse=True) + def _setup(self: TestStudyLocusWindowClumping, spark: SparkSession) -> None: + """Setup study locus for testing.""" + self.study_locus = StudyLocus( + _df=( + spark.createDataFrame( + self.TEST_DATASET, schema=self.TEST_SCHEMA + ).withColumns( + { + "studyLocusId": f.monotonically_increasing_id().cast( + t.LongType() + ), + "pValueMantissa": f.lit(1).cast(t.FloatType()), + "variantId": f.concat( + f.lit("v"), + f.monotonically_increasing_id().cast(t.StringType()), + ), + } + ) + ), + _schema=StudyLocus.get_schema(), + ) + + def test_clump_return_type(self: TestStudyLocusWindowClumping) -> None: + """Testing if the clumping returns the right type.""" + assert isinstance(self.study_locus.window_based_clumping(3), StudyLocus) + + def test_clump_no_data_loss(self: TestStudyLocusWindowClumping) -> None: + """Testing if the clumping returns same number of rows.""" + assert ( + self.study_locus.window_based_clumping(3).df.count() + == self.study_locus.df.count() + ) + + def test_correct_flag(self: TestStudyLocusWindowClumping) -> None: + """Testing if the clumping flags are for variants.""" + assert ( + self.study_locus.window_based_clumping(3) + .df.filter( + f.array_contains( + f.col("qualityControls"), + StudyLocusQualityCheck.WINDOW_CLUMPED.value, + ) + ) + .count() + ) == 2 + + def test_build_feature_matrix( mock_study_locus: StudyLocus, mock_colocalisation: Colocalisation, From 148e26e7013ebd400f4ada63a4d0a8b2480c490b Mon Sep 17 00:00:00 2001 From: Yakov Date: Tue, 24 Sep 2024 16:36:44 +0100 Subject: [PATCH 050/188] fix: small qc flag fixes (#784) --- src/gentropy/dataset/study_index.py | 2 -- src/gentropy/study_locus_validation.py | 5 +---- src/gentropy/study_validation.py | 3 +-- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/gentropy/dataset/study_index.py b/src/gentropy/dataset/study_index.py index ac637f137..3c3debba9 100644 --- a/src/gentropy/dataset/study_index.py +++ b/src/gentropy/dataset/study_index.py @@ -30,14 +30,12 @@ class StudyQualityCheck(Enum): UNRESOLVED_DISEASE (str): Disease identifier could not match to referece or retired identifier - labelling failing disease UNKNOWN_STUDY_TYPE (str): Indicating the provided type of study is not supported. DUPLICATED_STUDY (str): Flagging if a study identifier is not unique. - NO_GENE_PROVIDED (str): Flagging QTL studies if the measured """ UNRESOLVED_TARGET = "Target/gene identifier could not match to reference." UNRESOLVED_DISEASE = "No valid disease identifier found." UNKNOWN_STUDY_TYPE = "This type of study is not supported." DUPLICATED_STUDY = "The identifier of this study is not unique." - NO_GENE_PROVIDED = "QTL study doesn't have gene assigned." @dataclass diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index 4d1c234dc..7c853bbcb 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -41,14 +41,11 @@ def __init__( # Running validation then writing output: study_locus_with_qc = ( StudyLocus.from_parquet(session, list(study_locus_path)) - # Flagging study locus with subsignificant p-values - .validate_lead_pvalue(pvalue_cutoff=gwas_significance) # Add flag for MHC region .qc_MHC_region() .validate_study(study_index) # Flagging studies not in study index - .annotate_study_type(study_index) # Add study type to study locus + .annotate_study_type(study_index) # Add study type to study locus .qc_redundant_top_hits_from_PICS() # Flagging top hits from studies with PICS summary statistics - .validate_unique_study_locus_id() # Flagging duplicated study locus ids ).persist() # we will need this for 2 types of outputs study_locus_with_qc.valid_rows( diff --git a/src/gentropy/study_validation.py b/src/gentropy/study_validation.py index 5bfb83fe0..565aa410d 100644 --- a/src/gentropy/study_validation.py +++ b/src/gentropy/study_validation.py @@ -58,8 +58,7 @@ def __init__( # Running validation: study_index_with_qc = ( - study_index.validate_disease(disease_index) - .validate_unique_study_id() # Flagging duplicated study ids + study_index.validate_unique_study_id() # Flagging duplicated study ids .validate_study_type() # Flagging non-supported study types. .validate_target(target_index) # Flagging QTL studies with invalid targets .validate_disease(disease_index) # Flagging invalid EFOs From ccdb1f25de6dba9e3c4ebdaa9b432f9e300a8bb8 Mon Sep 17 00:00:00 2001 From: Tobi Alegbe Date: Tue, 24 Sep 2024 16:49:10 +0100 Subject: [PATCH 051/188] feat: add biosample index (#769) * Initial commit of biosample index * Make minimal class * Tidy up first draft of adding biosample index * Add beginning of logic for checking if biosample from a studyindex is in biosample index * Make early file for merging multiple biosample indices into one * Finish adding basic iteration of biosample index, needs debugging * Tweak slightly * Modified the parser to accept JSON files * Update biosample index * Tests and docs * Updating tests * Revert GWAS catalog file * fix(biosample index): update to match pre-commit standards * fix(biosample index): merging indices fix * fix(biosample index): update study index qc logic * fix(biosample index): fix missing mock_biosample_index * chore(biosample index): change datasource name from ontologies * fix(biosample index): add dataset doc * fix(biosample index): change dbXrefs to xrefs * chore (biosample index): better commenting Co-authored-by: Daniel Suveges * fix(biosample index): various minor tweaks to biosample index * fix(biosample index): minor bug * fix(biosample index): fix merge shift to method * feat(biosample index): make biosampleName not nullable --------- Co-authored-by: Daniel Suveges --- docs/python_api/datasets/biosample_index.md | 9 + docs/python_api/datasources/_datasources.md | 7 +- .../biosample_ontologies/_cell_ontology.md | 5 + .../biosample_ontologies/_uberon.md | 5 + docs/python_api/steps/biosample_index_step.md | 5 + poetry.lock | 3 +- .../assets/schemas/biosample_index.json | 83 ++ src/gentropy/biosample_index.py | 34 + src/gentropy/config.py | 12 + src/gentropy/dataset/biosample_index.py | 72 ++ src/gentropy/dataset/study_index.py | 36 + .../biosample_ontologies/__init__.py | 3 + .../datasource/biosample_ontologies/utils.py | 130 +++ src/gentropy/study_validation.py | 5 + tests/gentropy/conftest.py | 30 + .../data_samples/cell_ontology_sample.json | 351 +++++++ .../gentropy/data_samples/uberon_sample.json | 889 ++++++++++++++++++ .../gentropy/dataset/test_biosample_index.py | 8 + .../test_biosample_ontology.py | 50 + 19 files changed, 1735 insertions(+), 2 deletions(-) create mode 100644 docs/python_api/datasets/biosample_index.md create mode 100644 docs/python_api/datasources/biosample_ontologies/_cell_ontology.md create mode 100644 docs/python_api/datasources/biosample_ontologies/_uberon.md create mode 100644 docs/python_api/steps/biosample_index_step.md create mode 100644 src/gentropy/assets/schemas/biosample_index.json create mode 100644 src/gentropy/biosample_index.py create mode 100644 src/gentropy/dataset/biosample_index.py create mode 100644 src/gentropy/datasource/biosample_ontologies/__init__.py create mode 100644 src/gentropy/datasource/biosample_ontologies/utils.py create mode 100644 tests/gentropy/data_samples/cell_ontology_sample.json create mode 100644 tests/gentropy/data_samples/uberon_sample.json create mode 100644 tests/gentropy/dataset/test_biosample_index.py create mode 100644 tests/gentropy/datasource/biosample_ontologies/test_biosample_ontology.py diff --git a/docs/python_api/datasets/biosample_index.md b/docs/python_api/datasets/biosample_index.md new file mode 100644 index 000000000..d3e4ee2c8 --- /dev/null +++ b/docs/python_api/datasets/biosample_index.md @@ -0,0 +1,9 @@ +--- +title: Biosample index +--- + +::: gentropy.dataset.biosample_index.BiosampleIndex + +## Schema + +--8<-- "assets/schemas/biosample_index.md" diff --git a/docs/python_api/datasources/_datasources.md b/docs/python_api/datasources/_datasources.md index e6e081b21..43b212e50 100644 --- a/docs/python_api/datasources/_datasources.md +++ b/docs/python_api/datasources/_datasources.md @@ -26,7 +26,7 @@ This section contains information about the data source harmonisation tools avai 2. GWAS catalog's [harmonisation pipeline](https://www.ebi.ac.uk/gwas/docs/methods/summary-statistics#_harmonised_summary_statistics_data) 3. Ensembl's [Variant Effect Predictor](https://www.ensembl.org/info/docs/tools/vep/index.html) -## Linkage desiquilibrium +## Linkage disequilibrium 1. [GnomAD](gnomad/_gnomad.md) v2.1.1 LD matrixes (7 ancestries) @@ -37,3 +37,8 @@ This section contains information about the data source harmonisation tools avai ## Gene annotation 1. [Open Targets Platform Target Dataset](open_targets/target.md) (derived from Ensembl) + +## Biological samples + +1. [Uberon](biosample_ontologies/_uberon.md) +2. [Cell Ontology](biosample_ontologies/_cell_ontology.md) diff --git a/docs/python_api/datasources/biosample_ontologies/_cell_ontology.md b/docs/python_api/datasources/biosample_ontologies/_cell_ontology.md new file mode 100644 index 000000000..5798e032b --- /dev/null +++ b/docs/python_api/datasources/biosample_ontologies/_cell_ontology.md @@ -0,0 +1,5 @@ +--- +title: Cell Ontology +--- + +The [Cell Ontology](http://www.obofoundry.org/ontology/cl.html) is a structured controlled vocabulary for cell types. It is used to annotate cell types in single-cell RNA-seq data and other omics data. diff --git a/docs/python_api/datasources/biosample_ontologies/_uberon.md b/docs/python_api/datasources/biosample_ontologies/_uberon.md new file mode 100644 index 000000000..4bb47305a --- /dev/null +++ b/docs/python_api/datasources/biosample_ontologies/_uberon.md @@ -0,0 +1,5 @@ +--- +title: Uberon +--- + +The [Uberon](http://uberon.github.io/) ontology is a multi-species anatomy ontology that integrates cross-species ontologies into a single ontology. diff --git a/docs/python_api/steps/biosample_index_step.md b/docs/python_api/steps/biosample_index_step.md new file mode 100644 index 000000000..d8f7abbb4 --- /dev/null +++ b/docs/python_api/steps/biosample_index_step.md @@ -0,0 +1,5 @@ +--- +title: biosample_index +--- + +::: gentropy.biosample_index.BiosampleIndexStep diff --git a/poetry.lock b/poetry.lock index 226311a8b..296f07145 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.0 and should not be changed by hand. [[package]] name = "aiodns" @@ -3952,6 +3952,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, diff --git a/src/gentropy/assets/schemas/biosample_index.json b/src/gentropy/assets/schemas/biosample_index.json new file mode 100644 index 000000000..6309ca2c7 --- /dev/null +++ b/src/gentropy/assets/schemas/biosample_index.json @@ -0,0 +1,83 @@ +{ + "type": "struct", + "fields": [ + { + "name": "biosampleId", + "type": "string", + "nullable": false, + "metadata": {} + }, + { + "name": "biosampleName", + "type": "string", + "nullable": false, + "metadata": {} + }, + { + "name": "description", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "xrefs", + "type": { + "type": "array", + "elementType": "string", + "containsNull": true + }, + "nullable": true, + "metadata": {} + }, + { + "name": "synonyms", + "type": { + "type": "array", + "elementType": "string", + "containsNull": true + }, + "nullable": true, + "metadata": {} + }, + { + "name": "parents", + "type": { + "type": "array", + "elementType": "string", + "containsNull": true + }, + "nullable": true, + "metadata": {} + }, + { + "name": "ancestors", + "type": { + "type": "array", + "elementType": "string", + "containsNull": true + }, + "nullable": true, + "metadata": {} + }, + { + "name": "descendants", + "type": { + "type": "array", + "elementType": "string", + "containsNull": true + }, + "nullable": true, + "metadata": {} + }, + { + "name": "children", + "type": { + "type": "array", + "elementType": "string", + "containsNull": true + }, + "nullable": true, + "metadata": {} + } + ] +} diff --git a/src/gentropy/biosample_index.py b/src/gentropy/biosample_index.py new file mode 100644 index 000000000..e85c2e135 --- /dev/null +++ b/src/gentropy/biosample_index.py @@ -0,0 +1,34 @@ +"""Step to generate biosample index dataset.""" +from __future__ import annotations + +from gentropy.common.session import Session +from gentropy.datasource.biosample_ontologies.utils import extract_ontology_from_json + + +class BiosampleIndexStep: + """Biosample index step. + + This step generates a Biosample index dataset from the various ontology sources. Currently Cell Ontology and Uberon are supported. + """ + + def __init__( + self, + session: Session, + cell_ontology_input_path: str, + uberon_input_path: str, + biosample_index_path: str, + ) -> None: + """Run Biosample index generation step. + + Args: + session (Session): Session object. + cell_ontology_input_path (str): Input cell ontology dataset path. + uberon_input_path (str): Input uberon dataset path. + biosample_index_path (str): Output gene index dataset path. + """ + cell_ontology_index = extract_ontology_from_json(cell_ontology_input_path, session.spark) + uberon_index = extract_ontology_from_json(uberon_input_path, session.spark) + + biosample_index = cell_ontology_index.merge_indices([uberon_index]) + + biosample_index.df.write.mode(session.write_mode).parquet(biosample_index_path) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index d5e02924b..32edc9a4a 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -51,6 +51,16 @@ class GeneIndexConfig(StepConfig): _target_: str = "gentropy.gene_index.GeneIndexStep" +@dataclass +class BiosampleIndexConfig(StepConfig): + """Biosample index step configuration.""" + + cell_ontology_input_path: str = MISSING + uberon_input_path: str = MISSING + biosample_index_path: str = MISSING + _target_: str = "gentropy.biosample_index.BiosampleIndexStep" + + @dataclass class GWASCatalogStudyCurationConfig(StepConfig): """GWAS Catalog study curation step configuration.""" @@ -472,6 +482,7 @@ class StudyValidationStepConfig(StepConfig): study_index_path: list[str] = MISSING target_index_path: str = MISSING disease_index_path: str = MISSING + biosample_index_path: str = MISSING valid_study_index_path: str = MISSING invalid_study_index_path: str = MISSING invalid_qc_reasons: list[str] = MISSING @@ -512,6 +523,7 @@ def register_config() -> None: cs.store(group="step", name="colocalisation", node=ColocalisationConfig) cs.store(group="step", name="eqtl_catalogue", node=EqtlCatalogueConfig) cs.store(group="step", name="gene_index", node=GeneIndexConfig) + cs.store(group="step", name="biosample_index", node=BiosampleIndexConfig) cs.store( group="step", name="gwas_catalog_study_curation", diff --git a/src/gentropy/dataset/biosample_index.py b/src/gentropy/dataset/biosample_index.py new file mode 100644 index 000000000..39c597142 --- /dev/null +++ b/src/gentropy/dataset/biosample_index.py @@ -0,0 +1,72 @@ +"""Biosample index dataset.""" + +from __future__ import annotations + +from dataclasses import dataclass +from functools import reduce +from typing import TYPE_CHECKING + +import pyspark.sql.functions as f +from pyspark.sql import DataFrame +from pyspark.sql.types import ArrayType, StringType + +from gentropy.common.schemas import parse_spark_schema +from gentropy.dataset.dataset import Dataset + +if TYPE_CHECKING: + from pyspark.sql.types import StructType + + +@dataclass +class BiosampleIndex(Dataset): + """Biosample index dataset. + + A Biosample index dataset captures the metadata of the biosamples (e.g. tissues, cell types, cell lines, etc) such as alternate names and relationships with other biosamples. + """ + + @classmethod + def get_schema(cls: type[BiosampleIndex]) -> StructType: + """Provide the schema for the BiosampleIndex dataset. + + Returns: + StructType: The schema of the BiosampleIndex dataset. + """ + return parse_spark_schema("biosample_index.json") + + def merge_indices( + self: BiosampleIndex, + biosample_indices : list[BiosampleIndex] + ) -> BiosampleIndex: + """Merge a list of biosample indices into a single biosample index. + + Where there are conflicts, in single values - the first value is taken. In list values, the union of all values is taken. + + Args: + biosample_indices (list[BiosampleIndex]): Biosample indices to merge. + + Returns: + BiosampleIndex: Merged biosample index. + """ + # Extract the DataFrames from the BiosampleIndex objects + biosample_dfs = [biosample_index.df for biosample_index in biosample_indices] + [self.df] + + # Merge the DataFrames + merged_df = reduce(DataFrame.unionAll, biosample_dfs) + + # Determine aggregation functions for each column + # Currently this will take the first value for single values and merge lists for list values + agg_funcs = [] + for field in merged_df.schema.fields: + if field.name != "biosampleId": # Skip the grouping column + if field.dataType == ArrayType(StringType()): + agg_funcs.append(f.array_distinct(f.flatten(f.collect_list(field.name))).alias(field.name)) + else: + agg_funcs.append(f.first(f.col(field.name), ignorenulls=True).alias(field.name)) + + # Perform aggregation + aggregated_df = merged_df.groupBy("biosampleId").agg(*agg_funcs) + + return BiosampleIndex( + _df=aggregated_df, + _schema=BiosampleIndex.get_schema() + ) diff --git a/src/gentropy/dataset/study_index.py b/src/gentropy/dataset/study_index.py index 3c3debba9..3f9b65097 100644 --- a/src/gentropy/dataset/study_index.py +++ b/src/gentropy/dataset/study_index.py @@ -19,6 +19,7 @@ from pyspark.sql import Column, DataFrame from pyspark.sql.types import StructType + from gentropy.dataset.biosample_index import BiosampleIndex from gentropy.dataset.gene_index import GeneIndex @@ -29,12 +30,14 @@ class StudyQualityCheck(Enum): UNRESOLVED_TARGET (str): Target/gene identifier could not match to reference - Labelling failing target. UNRESOLVED_DISEASE (str): Disease identifier could not match to referece or retired identifier - labelling failing disease UNKNOWN_STUDY_TYPE (str): Indicating the provided type of study is not supported. + UNKNOWN_BIOSAMPLE (str): Flagging if a biosample identifier is not found in the reference. DUPLICATED_STUDY (str): Flagging if a study identifier is not unique. """ UNRESOLVED_TARGET = "Target/gene identifier could not match to reference." UNRESOLVED_DISEASE = "No valid disease identifier found." UNKNOWN_STUDY_TYPE = "This type of study is not supported." + UNKNOWN_BIOSAMPLE = "Biosample identifier was not found in the reference." DUPLICATED_STUDY = "The identifier of this study is not unique." @@ -406,3 +409,36 @@ def validate_target(self: StudyIndex, target_index: GeneIndex) -> StudyIndex: ) return StudyIndex(_df=validated_df, _schema=StudyIndex.get_schema()) + + def validate_biosample(self: StudyIndex, biosample_index: BiosampleIndex) -> StudyIndex: + """Validating biosample identifiers in the study index against the provided biosample index. + + Args: + biosample_index (BiosampleIndex): Biosample index containing a reference of biosample identifiers e.g. cell types, tissues, cell lines, etc. + + Returns: + StudyIndex: with flagged studies if biosampleIndex could not be validated. + """ + biosample_set = biosample_index.df.select("biosampleId", f.lit(True).alias("isIdFound")) + + validated_df = ( + self.df.join(biosample_set, self.df.biosampleFromSourceId == biosample_set.biosampleId, how="left") + .withColumn( + "isIdFound", + f.when( + f.col("isIdFound").isNull(), + f.lit(False), + ).otherwise(f.lit(True)), + ) + .withColumn( + "qualityControls", + StudyIndex.update_quality_flag( + f.col("qualityControls"), + ~f.col("isIdFound"), + StudyQualityCheck.UNKNOWN_BIOSAMPLE, + ), + ) + .drop("isIdFound").drop("biosampleId") + ) + + return StudyIndex(_df=validated_df, _schema=StudyIndex.get_schema()) diff --git a/src/gentropy/datasource/biosample_ontologies/__init__.py b/src/gentropy/datasource/biosample_ontologies/__init__.py new file mode 100644 index 000000000..d3fa6b416 --- /dev/null +++ b/src/gentropy/datasource/biosample_ontologies/__init__.py @@ -0,0 +1,3 @@ +"""Biosample index data source.""" + +from __future__ import annotations diff --git a/src/gentropy/datasource/biosample_ontologies/utils.py b/src/gentropy/datasource/biosample_ontologies/utils.py new file mode 100644 index 000000000..3ef1747ee --- /dev/null +++ b/src/gentropy/datasource/biosample_ontologies/utils.py @@ -0,0 +1,130 @@ +"""Utility functions for Biosample ontology processing.""" +from pyspark.sql import DataFrame, SparkSession +from pyspark.sql import functions as f +from pyspark.sql.types import ArrayType, StringType +from pyspark.sql.window import Window + +from gentropy.dataset.biosample_index import BiosampleIndex + + +def extract_ontology_from_json( + ontology_json : str, + spark : SparkSession +) -> BiosampleIndex: + """Extracts the ontology information from a JSON file. Currently only supports Uberon and Cell Ontology. + + Args: + ontology_json (str): Path to the JSON file containing the ontology information. + spark (SparkSession): Spark session. + + Returns: + BiosampleIndex: Parsed and annotated biosample index table. + """ + + def json_graph_traversal( + df : DataFrame, + node_col : str, + link_col: str, + traversal_type: str + ) -> DataFrame: + """Traverse a graph represented in a DataFrame to find all ancestors or descendants. + + Args: + df (DataFrame): DataFrame containing the graph data. + node_col (str): Column name for the node. + link_col (str): Column name for the link. + traversal_type (str): Type of traversal - "ancestors" or "descendants". + + Returns: + DataFrame: DataFrame with the result column added. + """ + # Collect graph data as a map + graph_map = df.select(node_col, link_col).rdd.collectAsMap() + broadcasted_graph = spark.sparkContext.broadcast(graph_map) + + def get_relationships( + node : str + ) -> list[str]: + """Get all relationships for a given node. + + Args: + node (str): Node ID. + + Returns: + list[str]: List of relationships. + """ + relationships = set() + stack = [node] + while stack: + current = stack.pop() + if current in broadcasted_graph.value: + current_links = broadcasted_graph.value[current] + stack.extend(current_links) + relationships.update(current_links) + return list(relationships) + + # Choose column name based on traversal type + result_col = "ancestors" if traversal_type == "ancestors" else "descendants" + + # Register the UDF based on traversal type + relationship_udf = f.udf(get_relationships, ArrayType(StringType())) + + # Apply the UDF to create the result column + return df.withColumn(result_col, relationship_udf(f.col(node_col))) + + # Load the JSON file + df = spark.read.json(ontology_json, multiLine=True) + + # Exploding the 'graphs' array to make individual records easier to access + df_graphs = df.select(f.explode_outer("graphs").alias("graph")) + + # Exploding the 'nodes' array within each graph + df_nodes = df_graphs.select( + f.col("graph.id").alias("graph_id"), + f.explode_outer("graph.nodes").alias("node")) + + # Exploding the 'edges' array within each graph for relationship data + df_edges = df_graphs.select( + f.col("graph.id").alias("graph_id"), + f.explode_outer("graph.edges").alias("edge") + ).select( + f.col("edge.sub").alias("subject"), + f.col("edge.pred").alias("predicate"), + f.col("edge.obj").alias("object") + ) + df_edges = df_edges.withColumn("subject", f.regexp_replace(f.col("subject"), "http://purl.obolibrary.org/obo/", "")) + df_edges = df_edges.withColumn("object", f.regexp_replace(f.col("object"), "http://purl.obolibrary.org/obo/", "")) + + # Extract the relevant information from the nodes + transformed_df = df_nodes.select( + f.regexp_replace(f.col("node.id"), "http://purl.obolibrary.org/obo/", "").alias("biosampleId"), + f.coalesce(f.col("node.lbl"), f.col("node.id")).alias("biosampleName"), + f.col("node.meta.definition.val").alias("description"), + f.collect_set(f.col("node.meta.xrefs.val")).over(Window.partitionBy("node.id")).getItem(0).alias("xrefs"), + f.collect_set(f.col("node.meta.synonyms.val")).over(Window.partitionBy("node.id")).getItem(0).alias("synonyms")) + + + # Extract the relationships from the edges + # Prepare relationship-specific DataFrames + df_parents = df_edges.filter(f.col("predicate") == "is_a").select("subject", "object").withColumnRenamed("object", "parent") + df_children = df_edges.filter(f.col("predicate") == "is_a").select("object", "subject").withColumnRenamed("subject", "child") + + # Aggregate relationships back to nodes + df_parents_grouped = df_parents.groupBy("subject").agg(f.array_distinct(f.collect_list("parent")).alias("parents")) + df_children_grouped = df_children.groupBy("object").agg(f.array_distinct(f.collect_list("child")).alias("children")) + + # Get all ancestors + df_with_ancestors = json_graph_traversal(df_parents_grouped, "subject", "parents", "ancestors") + # Get all descendants + df_with_descendants = json_graph_traversal(df_children_grouped, "object", "children", "descendants") + + # Join the ancestor and descendant DataFrames + df_with_relationships = df_with_ancestors.join(df_with_descendants, df_with_ancestors.subject == df_with_descendants.object, "full_outer").withColumn("biosampleId", f.coalesce(df_with_ancestors.subject, df_with_descendants.object)).drop("subject", "object") + + # Join the original DataFrame with the relationship DataFrame + final_df = transformed_df.join(df_with_relationships, ["biosampleId"], "left") + + return BiosampleIndex( + _df=final_df, + _schema=BiosampleIndex.get_schema() + ) diff --git a/src/gentropy/study_validation.py b/src/gentropy/study_validation.py index 565aa410d..e1337dd00 100644 --- a/src/gentropy/study_validation.py +++ b/src/gentropy/study_validation.py @@ -5,6 +5,7 @@ from pyspark.sql import functions as f from gentropy.common.session import Session +from gentropy.dataset.biosample_index import BiosampleIndex from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.study_index import StudyIndex @@ -22,6 +23,7 @@ def __init__( study_index_path: list[str], target_index_path: str, disease_index_path: str, + biosample_index_path: str, valid_study_index_path: str, invalid_study_index_path: str, invalid_qc_reasons: list[str] = [], @@ -33,12 +35,14 @@ def __init__( study_index_path (list[str]): Path to study index file. target_index_path (str): Path to target index file. disease_index_path (str): Path to disease index file. + biosample_index_path (str): Path to biosample index file. valid_study_index_path (str): Path to write the valid records. invalid_study_index_path (str): Path to write the output file. invalid_qc_reasons (list[str]): List of invalid quality check reason names from `StudyQualityCheck` (e.g. ['DUPLICATED_STUDY']). """ # Reading datasets: target_index = GeneIndex.from_parquet(session, target_index_path) + biosample_index = BiosampleIndex.from_parquet(session, biosample_index_path) # Reading disease index and pre-process. # This logic does not belong anywhere, but gentorpy has no disease dataset yet. disease_index = ( @@ -62,6 +66,7 @@ def __init__( .validate_study_type() # Flagging non-supported study types. .validate_target(target_index) # Flagging QTL studies with invalid targets .validate_disease(disease_index) # Flagging invalid EFOs + .validate_biosample(biosample_index) # Flagging studies with invalid biosamples ).persist() # we will need this for 2 types of outputs study_index_with_qc.valid_rows( diff --git a/tests/gentropy/conftest.py b/tests/gentropy/conftest.py index 93ee38471..4045833f9 100644 --- a/tests/gentropy/conftest.py +++ b/tests/gentropy/conftest.py @@ -13,6 +13,7 @@ from gentropy.common.Liftover import LiftOverSpark from gentropy.common.session import Session +from gentropy.dataset.biosample_index import BiosampleIndex from gentropy.dataset.colocalisation import Colocalisation from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.intervals import Intervals @@ -559,6 +560,35 @@ def mock_gene_index(spark: SparkSession) -> GeneIndex: return GeneIndex(_df=data_spec.build(), _schema=gi_schema) +@pytest.fixture() +def mock_biosample_index(spark: SparkSession) -> BiosampleIndex: + """Mock biosample index dataset.""" + bi_schema = BiosampleIndex.get_schema() + + # Makes arrays of varying length with random integers between 1 and 100 + array_expression = "transform(sequence(1, 1 + floor(rand() * 9)), x -> cast((rand() * 100) as int))" + + data_spec = ( + dg.DataGenerator( + spark, + rows=400, + partitions=4, + randomSeedMethod="hash_fieldname", + ) + .withSchema(bi_schema) + .withColumnSpec("biosampleName", percentNulls=0.1) + .withColumnSpec("description", percentNulls=0.1) + .withColumnSpec("xrefs", expr=array_expression, percentNulls=0.1) + .withColumnSpec("synonyms", expr=array_expression, percentNulls=0.1) + .withColumnSpec("parents", expr=array_expression, percentNulls=0.1) + .withColumnSpec("ancestors", expr=array_expression, percentNulls=0.1) + .withColumnSpec("descendants", expr=array_expression, percentNulls=0.1) + .withColumnSpec("children", expr=array_expression, percentNulls=0.1) + ) + + return BiosampleIndex(_df=data_spec.build(), _schema=bi_schema) + + @pytest.fixture() def liftover_chain_37_to_38(spark: SparkSession) -> LiftOverSpark: """Sample liftover chain file.""" diff --git a/tests/gentropy/data_samples/cell_ontology_sample.json b/tests/gentropy/data_samples/cell_ontology_sample.json new file mode 100644 index 000000000..5e73bfdee --- /dev/null +++ b/tests/gentropy/data_samples/cell_ontology_sample.json @@ -0,0 +1,351 @@ +{ + "graphs": [ + { + "id": "http://purl.obolibrary.org/obo/cl.json", + "meta": { + "basicPropertyValues": [ + { + "pred": "http://purl.obolibrary.org/obo/IAO_0000700", + "val": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "pred": "http://purl.org/dc/elements/1.1/description", + "val": "An ontology of cell types." + }, + { + "pred": "http://purl.org/dc/elements/1.1/title", + "val": "Cell Ontology" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0001-5208-3432" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0001-9114-8737" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0001-9990-8331" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-2244-7917" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-6601-2165" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-7073-9172" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-8688-6599" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-9900-7880" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0003-1980-3228" + }, + { + "pred": "http://purl.org/dc/terms/license", + "val": "http://creativecommons.org/licenses/by/4.0/" + }, + { + "pred": "http://www.w3.org/2000/01/rdf-schema#comment", + "val": "See PMID:15693950, PMID:12799354, PMID:20123131, PMID:21208450; Contact Alexander Diehl, addiehl@buffalo.edu, university at buffalo." + }, + { + "pred": "http://www.w3.org/2002/07/owl#versionInfo", + "val": "2024-08-16" + } + ], + "version": "http://purl.obolibrary.org/obo/cl/releases/2024-08-16/cl.json" + }, + "nodes": [ + { + "id": "http://purl.obolibrary.org/obo/CL_0000653", + "lbl": "podocyte", + "type": "CLASS", + "meta": { + "definition": { + "val": "A specialized kidney epithelial cell, contained within a glomerulus, that contains \"feet\" that interdigitate with the \"feet\" of other podocytes.", + "xrefs": ["GOC:tfm", "https://doi.org/10.1101/2021.10.10.463829"] + }, + "subsets": [ + "http://purl.obolibrary.org/obo/cl#cellxgene_subset", + "http://purl.obolibrary.org/obo/uberon/core#human_reference_atlas" + ], + "synonyms": [ + { + "pred": "hasBroadSynonym", + "val": "epithelial cell of visceral layer of glomerular capsule", + "xrefs": ["FMA:70967"] + }, + { + "pred": "hasExactSynonym", + "val": "glomerular podocyte", + "xrefs": ["FMA:70967"] + }, + { + "pred": "hasExactSynonym", + "val": "glomerular visceral epithelial cell" + }, + { + "pred": "hasExactSynonym", + "val": "kidney podocyte" + }, + { + "pred": "hasExactSynonym", + "val": "renal podocyte" + } + ], + "xrefs": [ + { + "val": "BTO:0002295" + }, + { + "val": "FMA:70967" + }, + { + "val": "ZFA:0009285" + } + ], + "basicPropertyValues": [ + { + "pred": "http://purl.obolibrary.org/obo/RO_0002175", + "val": "http://purl.obolibrary.org/obo/NCBITaxon_9606" + }, + { + "pred": "http://www.w3.org/2000/01/rdf-schema#seeAlso", + "val": "https://github.com/obophenotype/cell-ontology/issues/1460" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000654", + "lbl": "primary oocyte", + "type": "CLASS", + "meta": { + "definition": { + "val": "A primary oocyte is an oocyte that has not completed female meosis I.", + "xrefs": ["GOC:tfm", "ISBN:0721662544"] + }, + "subsets": [ + "http://purl.obolibrary.org/obo/uberon/core#human_reference_atlas" + ], + "synonyms": [ + { + "pred": "hasRelatedSynonym", + "val": "primary oogonium" + } + ], + "xrefs": [ + { + "val": "BTO:0000512" + }, + { + "val": "FMA:18645" + } + ], + "basicPropertyValues": [ + { + "pred": "http://purl.obolibrary.org/obo/RO_0002175", + "val": "http://purl.obolibrary.org/obo/NCBITaxon_9606" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000655", + "lbl": "secondary oocyte", + "type": "CLASS", + "meta": { + "definition": { + "val": "A secondary oocyte is an oocyte that has not completed meiosis II.", + "xrefs": ["GOC:tfm", "ISBN:0721662544"] + }, + "synonyms": [ + { + "pred": "hasRelatedSynonym", + "val": "primary oogonium" + } + ], + "xrefs": [ + { + "val": "BTO:0003094" + }, + { + "val": "FMA:18646" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000656", + "lbl": "primary spermatocyte", + "type": "CLASS", + "meta": { + "definition": { + "val": "A diploid cell that has derived from a spermatogonium and can subsequently begin meiosis and divide into two haploid secondary spermatocytes.", + "xrefs": ["GOC:tfm", "ISBN:0721662544"] + }, + "xrefs": [ + { + "val": "BTO:0001115" + }, + { + "val": "CALOHA:TS-2194" + }, + { + "val": "FMA:72292" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000657", + "lbl": "secondary spermatocyte", + "type": "CLASS", + "meta": { + "definition": { + "val": "One of the two haploid cells into which a primary spermatocyte divides, and which in turn gives origin to spermatids.", + "xrefs": ["GOC:tfm", "ISBN:0721662544"] + }, + "xrefs": [ + { + "val": "BTO:0000709" + }, + { + "val": "CALOHA:TS-2195" + }, + { + "val": "FBbt:00004941" + }, + { + "val": "FMA:72293" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000658", + "lbl": "cuticle secreting cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "An epithelial cell that secretes cuticle.", + "xrefs": ["GOC:tfm"] + } + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000659", + "lbl": "eggshell secreting cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "An extracellular matrix secreting cell that secretes eggshell.", + "xrefs": ["GOC:tfm"] + } + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_1000451", + "lbl": "obsolete epithelial cell of visceral layer of glomerular capsule", + "type": "CLASS", + "meta": { + "basicPropertyValues": [ + { + "pred": "http://purl.obolibrary.org/obo/IAO_0100001", + "val": "http://purl.obolibrary.org/obo/CL_0000653" + } + ], + "deprecated": true + } + } + ], + "edges": [ + { + "sub": "http://purl.obolibrary.org/obo/UBERON_0005751", + "pred": "http://purl.obolibrary.org/obo/BFO_0000051", + "obj": "http://purl.obolibrary.org/obo/CL_0000653" + }, + { + "sub": "http://purl.obolibrary.org/obo/GO_1903210", + "pred": "http://purl.obolibrary.org/obo/BFO_0000066", + "obj": "http://purl.obolibrary.org/obo/CL_0000653" + }, + { + "sub": "http://purl.obolibrary.org/obo/GO_0090521", + "pred": "http://purl.obolibrary.org/obo/RO_0002565", + "obj": "http://purl.obolibrary.org/obo/CL_0000653" + }, + { + "sub": "http://purl.obolibrary.org/obo/GO_0072015", + "pred": "http://purl.obolibrary.org/obo/RO_0002296", + "obj": "http://purl.obolibrary.org/obo/CL_0000653" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_4030008", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000653" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002525", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000653" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002523", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000653" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000653", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0002522" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000653", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_1000450" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000653", + "pred": "http://purl.obolibrary.org/obo/BFO_0000050", + "obj": "http://purl.obolibrary.org/obo/UBERON_0005751" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000655", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000023", + "meta": { + "basicPropertyValues": [ + { + "pred": "http://www.geneontology.org/formats/oboInOwl#is_inferred", + "val": "true" + } + ] + } + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000655", + "pred": "http://purl.obolibrary.org/obo/CL_4030044", + "obj": "http://purl.obolibrary.org/obo/GO_0007147" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000655", + "pred": "http://purl.obolibrary.org/obo/RO_0002202", + "obj": "http://purl.obolibrary.org/obo/CL_0000654" + } + ] + } + ] +} diff --git a/tests/gentropy/data_samples/uberon_sample.json b/tests/gentropy/data_samples/uberon_sample.json new file mode 100644 index 000000000..7dedfa23c --- /dev/null +++ b/tests/gentropy/data_samples/uberon_sample.json @@ -0,0 +1,889 @@ +{ + "graphs": [ + { + "id": "http://purl.obolibrary.org/obo/uberon.json", + "meta": { + "basicPropertyValues": [ + { + "pred": "http://purl.obolibrary.org/obo/IAO_0000700", + "val": "http://purl.obolibrary.org/obo/UBERON_0000104" + }, + { + "pred": "http://purl.obolibrary.org/obo/IAO_0000700", + "val": "http://purl.obolibrary.org/obo/UBERON_0001062" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "https://orcid.org/0000-0001-5839-6798" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "https://orcid.org/0000-0001-7972-3866" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "https://orcid.org/0000-0001-9114-8737" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "https://orcid.org/0000-0002-1810-9886" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "https://orcid.org/0000-0002-6601-2165" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "https://orcid.org/0000-0002-7356-1779" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "https://orcid.org/0000-0002-9611-1279" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "https://orcid.org/0000-0003-3162-7490" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "https://orcid.org/0000-0003-3308-6245" + }, + { + "pred": "http://purl.org/dc/elements/1.1/description", + "val": "Uberon is an integrated cross-species anatomy ontology representing a variety of entities classified according to traditional anatomical criteria such as structure, function and developmental lineage. The ontology includes comprehensive relationships to taxon-specific anatomical ontologies, allowing integration of functional, phenotype and expression data." + }, + { + "pred": "http://purl.org/dc/elements/1.1/publisher", + "val": "http://uberon.org" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://dbpedia.org" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://palaeos.com" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://www.brain-map.org" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://braininfo.rprc.washington.edu/" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://en.wikipedia.org/wiki/" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-GrossAnatomy.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://pons.incf.org/wiki/Common_Upper_Mammalian_Brain_Ontology_%28Cumbo%29" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/aao.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/aba.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/aeo.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/bila.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/bto.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/caro.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/cl.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/ehdaa2.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/emapa.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/fbbt.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/fma.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/go.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/hp.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/ma.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/mp.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/tao.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/vhog.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/vsao.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/wbbt.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/xao.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://purl.obolibrary.org/obo/zfa.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://uri.neuinfo.org/nif/nifstd" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://www.e-lico.eu/public/kupo/kupo.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://www.ebi.ac.uk/efo/efo.owl" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:0030229073 Invertebrate Zoology, Barnes" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:0073040584 Vertebrates, Kardong" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:0123813611 Comparative Anatomy and Histology: A Mouse and Human Atlas, Treuting and Dintzis" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:0226313379 Fins into Limbs: Evolution, Development, and Transformation, Hall" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:0443065837 Human embryology, Larsen" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:0471888893 Comparative Vertebrate Neuroanatomy: Evolution and Adaptation by Butler and Hodos" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:0683400088 Stedman's Medical Dictionary" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:1588900649 Color Atlas and Textbook of Human Anatomy: Nervous system and sensory organs By Werner Kahle, Michael Frotscher" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:1588903958 Principles and practice of pediatric neurosurgery By A. Leland Albright, P. David Adelson, Ian F. Pollack" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:1607950324 Craniofacial Embryogenetics & Development, 2nd edition, Sperber" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:978-0-12-369548-2 Principles of Developmental Genetics, Sally A Moody" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:9780120749034 The laboratory rat" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:9780397517251 Surgical anatomy of the hand and upper extremity. By James R. Doyle and Michael J. Botte" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:9780674021839 The Tree of Life - Guillaume Lecointre, Herve Le Guyader" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "ISBN:9780878932504 Developmental Biology" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "MESH" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "PMID:11433360 Placental development: lessons from mouse mutants" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "PMID:16417468 Forgotten and novel aspects in pancreas development, Pieler and Chen" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "aggregates AAO from 13:04:2012" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "aggregates TAO from 09:08:2012" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "aggregates VSAO from 16:07:2012" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://wiki.phenotypercn.org/wg/phenotypercn/index.php?title=Neural_Crest_Workshop" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "http://wiki.phenotypercn.org/wiki/August_2012_Notes" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "https://docs.google.com/document/d/16JZOuH9sh_a8uIXA4cqg0Q1H6MV5yCj3-rhuKsZoV_U/edit" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "https://docs.google.com/document/d/1MnUgispgGfNQoezYzWzzGTnkAnI0gzRnJIwdip6MMtw/edit" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "https://docs.google.com/document/d/1cPWBqrl_Qy7XHEWFqtR_PgQX61yRkgGuLaiDpnEXxkE/edit" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "https://docs.google.com/document/d/1r9kNPpFYGdu0SpJDLyFAVQczBlG0wAZCBMd18gG3Ot8/edit#" + }, + { + "pred": "http://purl.org/dc/elements/1.1/source", + "val": "https://docs.google.com/spreadsheet/ccc?key=0Aj8NJdyb-leqdDM0R3hTVTRHRExDVjRCSkZEbDc5N1E#gid=0" + }, + { + "pred": "http://purl.org/dc/elements/1.1/title", + "val": "Uber-anatomy ontology" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://github.com/orgs/pato-ontology/teams/pato-community" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0001-5889-4463" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0001-7433-0086" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0001-7476-6306" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0001-7920-5321" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0001-7958-3701" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0001-8682-8754" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0001-9107-0714" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0001-9990-8331" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-0819-0473" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-0956-8634" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-1112-5832" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-1572-1316" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-1604-3078" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-1615-2899" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-2061-091X" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-2244-7917" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-3437-3329" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-3467-2636" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-3734-1859" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-5111-7263" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-6490-7723" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-7073-9172" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-8406-3871" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-8455-3213" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-8688-6599" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-9415-5104" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-9818-3030" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0002-9900-7880" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0003-1980-3228" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0003-2105-2283" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0003-2338-2550" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0003-3691-0324" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://orcid.org/0000-0003-4423-4370" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://www.wikidata.org/wiki/Q11695472" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://www.wikidata.org/wiki/Q23809253" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://www.wikidata.org/wiki/Q4964264" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://www.wikidata.org/wiki/Q54985720" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://www.wikidata.org/wiki/Q6983890" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://www.wikidata.org/wiki/Q7650732" + }, + { + "pred": "http://purl.org/dc/terms/contributor", + "val": "https://www.wikidata.org/wiki/Q85793053" + }, + { + "pred": "http://purl.org/dc/terms/isReferencedBy", + "val": "http://genomebiology.com/2012/13/1/R5" + }, + { + "pred": "http://purl.org/dc/terms/isReferencedBy", + "val": "http://www.ncbi.nlm.nih.gov/pubmed/22293552" + }, + { + "pred": "http://purl.org/dc/terms/license", + "val": "http://creativecommons.org/licenses/by/3.0/" + }, + { + "pred": "http://usefulinc.com/ns/doap#GitRepository", + "val": "https://github.com/cmungall/uberon/" + }, + { + "pred": "http://usefulinc.com/ns/doap#SVNRepository", + "val": "https://obo.svn.sourceforge.net/svnroot/obo/uberon/" + }, + { + "pred": "http://usefulinc.com/ns/doap#bug-database", + "val": "https://github.com/obophenotype/uberon/issues/" + }, + { + "pred": "http://usefulinc.com/ns/doap#mailing-list", + "val": "https://lists.sourceforge.net/lists/listinfo/obo-anatomy" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#default-namespace", + "val": "uberon" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#hasOBOFormatVersion", + "val": "1.2" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-equivalent", + "val": "AEO" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-equivalent", + "val": "BILA" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-equivalent", + "val": "BSPO" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-equivalent", + "val": "CARO" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-equivalent", + "val": "GO" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-equivalent", + "val": "OG" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-equivalent", + "val": "VSAO" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-has-subclass", + "val": "EHDAA" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-has-subclass", + "val": "EV" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-has-subclass", + "val": "NCIT" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-has-subclass", + "val": "OGES" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-has-subclass", + "val": "SCTID" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-is_a", + "val": "BFO" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-is_a", + "val": "VHOG" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "AAO part_of NCBITaxon:8292" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "DHBA part_of NCBITaxon:9606" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "EHDAA2 part_of NCBITaxon:9606" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "EMAPA part_of NCBITaxon:10090" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "FBdv part_of NCBITaxon:7227" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "FMA part_of NCBITaxon:9606" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "HAO part_of NCBITaxon:7399" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "HBA part_of NCBITaxon:9606" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "HsapDv part_of NCBITaxon:9606" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "KUPO part_of NCBITaxon:9606" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "MA part_of NCBITaxon:10090" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "MFO part_of NCBITaxon:8089" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "MmusDv part_of NCBITaxon:10090" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "OlatDv part_of NCBITaxon:8089" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "PBA part_of NCBITaxon:9443" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "SPD part_of NCBITaxon:6893" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "TADS part_of NCBITaxon:6939" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "TAO part_of NCBITaxon:32443" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "TGMA part_of NCBITaxon:44484" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "WBbt part_of NCBITaxon:6237" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "WBls part_of NCBITaxon:6237" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "XAO part_of NCBITaxon:8353" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "ZFA part_of NCBITaxon:7954" + }, + { + "pred": "http://www.geneontology.org/formats/oboInOwl#treat-xrefs-as-reverse-genus-differentia", + "val": "ZFS part_of NCBITaxon:7954" + }, + { + "pred": "http://www.w3.org/2000/01/rdf-schema#comment", + "val": "Aurelie Comte, Bill Bug, Catherine Leroy, Duncan Davidson and Trish Whetzel are also contributors. However their ORCIDs were not found." + }, + { + "pred": "http://www.w3.org/2002/07/owl#versionInfo", + "val": "2024-09-03" + }, + { + "pred": "http://xmlns.com/foaf/0.1/homepage", + "val": "http://uberon.org" + } + ], + "version": "http://purl.obolibrary.org/obo/uberon/releases/2024-09-03/uberon.json" + }, + "nodes": [ + { + "id": "http://purl.obolibrary.org/obo/CL_1001593", + "lbl": "parathyroid glandular cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "Glandular cell of parathyroid epithelium. Example: Parathyroid chief cell and parathyroid oxyphil cells.", + "xrefs": ["HPA:HPA", "NPX:PDR"] + }, + "synonyms": [ + { + "pred": "hasRelatedSynonym", + "val": "parathyroid gland glandular cell", + "xrefs": ["CALOHA:TS-1279"] + }, + { + "pred": "hasRelatedSynonym", + "val": "parathyroid gland glandular cells", + "xrefs": ["CALOHA:TS-1279"] + } + ], + "xrefs": [ + { + "val": "CALOHA:TS-1279" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_1001595", + "lbl": "rectum glandular cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "Glandular cell of rectal epithelium. Example: Goblet cell; enterocytes or absorptive cells; enteroendocrine and M cells.", + "xrefs": ["NPX:PDR"] + }, + "synonyms": [ + { + "pred": "hasRelatedSynonym", + "val": "rectal glandular cell", + "xrefs": ["CALOHA:TS-1281"] + }, + { + "pred": "hasRelatedSynonym", + "val": "rectum glandular cells", + "xrefs": ["CALOHA:TS-1281"] + } + ], + "xrefs": [ + { + "val": "CALOHA:TS-1281" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_1001596", + "lbl": "salivary gland glandular cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "Glandular cell of salivary gland. Example: Serous cells, mucous cells, cuboidal epithelial cells of the intercalated ducts, simple cuboidal epithelium of the striated ducts, epithelial cells of excretory ducts.", + "xrefs": ["HPA:HPA", "NPX:PDR"] + }, + "synonyms": [ + { + "pred": "hasRelatedSynonym", + "val": "salivary gland glandular cells", + "xrefs": ["CALOHA:TS-1282"] + } + ], + "xrefs": [ + { + "val": "CALOHA:TS-1282" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000653", + "lbl": "podocyte", + "type": "CLASS", + "meta": { + "definition": { + "val": "A specialized kidney epithelial cell, contained within a glomerulus, that contains \"feet\" that interdigitate with the \"feet\" of other podocytes.", + "xrefs": ["GOC:tfm", "https://doi.org/10.1101/2021.10.10.463829"] + }, + "subsets": [ + "http://purl.obolibrary.org/obo/cl#cellxgene_subset", + "http://purl.obolibrary.org/obo/uberon/core#human_reference_atlas" + ], + "synonyms": [ + { + "pred": "hasBroadSynonym", + "val": "epithelial cell of visceral layer of glomerular capsule", + "xrefs": ["FMA:70967"] + }, + { + "pred": "hasExactSynonym", + "val": "glomerular podocyte", + "xrefs": ["FMA:70967"] + }, + { + "pred": "hasExactSynonym", + "val": "glomerular visceral epithelial cell" + }, + { + "pred": "hasExactSynonym", + "val": "kidney podocyte" + }, + { + "pred": "hasExactSynonym", + "val": "renal podocyte" + } + ], + "xrefs": [ + { + "val": "BTO:0002295" + }, + { + "val": "FMA:70967" + } + ], + "basicPropertyValues": [ + { + "pred": "http://purl.obolibrary.org/obo/RO_0002175", + "val": "http://purl.obolibrary.org/obo/NCBITaxon_9606" + }, + { + "pred": "http://www.w3.org/2000/01/rdf-schema#seeAlso", + "val": "https://github.com/obophenotype/cell-ontology/issues/1460" + } + ] + } + } + ], + "edges": [ + { + "sub": "http://purl.obolibrary.org/obo/CL_1001596", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000150" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_1001596", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000152" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_1001596", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0002251" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_1001596", + "pred": "http://purl.obolibrary.org/obo/BFO_0000050", + "obj": "http://purl.obolibrary.org/obo/UBERON_0001044" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_1001596", + "pred": "http://purl.obolibrary.org/obo/BFO_0000050", + "obj": "http://purl.obolibrary.org/obo/UBERON_0004809" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002623", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000622", + "meta": { + "basicPropertyValues": [ + { + "pred": "http://www.geneontology.org/formats/oboInOwl#is_inferred", + "val": "true" + } + ] + } + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002623", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_1001596" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002623", + "pred": "http://purl.obolibrary.org/obo/BFO_0000050", + "obj": "http://purl.obolibrary.org/obo/UBERON_0001044" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002623", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000622", + "meta": { + "basicPropertyValues": [ + { + "pred": "http://www.geneontology.org/formats/oboInOwl#is_inferred", + "val": "true" + } + ] + } + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002623", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_1001596" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002623", + "pred": "http://purl.obolibrary.org/obo/BFO_0000050", + "obj": "http://purl.obolibrary.org/obo/UBERON_0001044" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000653", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_1000450" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000653", + "pred": "http://purl.obolibrary.org/obo/BFO_0000050", + "obj": "http://purl.obolibrary.org/obo/UBERON_0005751" + } + ] + } + ] +} diff --git a/tests/gentropy/dataset/test_biosample_index.py b/tests/gentropy/dataset/test_biosample_index.py new file mode 100644 index 000000000..c647710d1 --- /dev/null +++ b/tests/gentropy/dataset/test_biosample_index.py @@ -0,0 +1,8 @@ +"""Tests on Biosample index.""" + +from gentropy.dataset.biosample_index import BiosampleIndex + + +def test_biosample_index_creation(mock_biosample_index: BiosampleIndex) -> None: + """Test biosample index creation with mock biosample index.""" + assert isinstance(mock_biosample_index, BiosampleIndex) diff --git a/tests/gentropy/datasource/biosample_ontologies/test_biosample_ontology.py b/tests/gentropy/datasource/biosample_ontologies/test_biosample_ontology.py new file mode 100644 index 000000000..b88623b0d --- /dev/null +++ b/tests/gentropy/datasource/biosample_ontologies/test_biosample_ontology.py @@ -0,0 +1,50 @@ +"""Tests for biosample index dataset.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from gentropy.dataset.biosample_index import BiosampleIndex +from gentropy.datasource.biosample_ontologies.utils import extract_ontology_from_json + +if TYPE_CHECKING: + from pyspark.sql import SparkSession + + +class TestOntologyParger: + """Testing ontology parser.""" + + SAMPLE_CELL_ONTOLOGY_PATH = "tests/gentropy/data_samples/cell_ontology_sample.json" + SAMPLE_UBERON_PATH = "tests/gentropy/data_samples/uberon_sample.json" + + def test_cell_ontology_parser( + self: TestOntologyParger, spark: SparkSession + ) -> None: + """Test cell ontology parser.""" + cell_ontology = extract_ontology_from_json( + self.SAMPLE_CELL_ONTOLOGY_PATH, spark + ) + assert isinstance( + cell_ontology, BiosampleIndex + ), "Cell ontology subset is not parsed correctly to BiosampleIndex." + + def test_uberon_parser(self: TestOntologyParger, spark: SparkSession) -> None: + """Test uberon parser.""" + uberon = extract_ontology_from_json(self.SAMPLE_UBERON_PATH, spark) + assert isinstance( + uberon, BiosampleIndex + ), "Uberon subset is not parsed correctly to BiosampleIndex." + + def test_merge_biosample_indices( + self: TestOntologyParger, spark: SparkSession + ) -> None: + """Test merging of biosample indices.""" + cell_ontology = extract_ontology_from_json( + self.SAMPLE_CELL_ONTOLOGY_PATH, spark + ) + uberon = extract_ontology_from_json(self.SAMPLE_UBERON_PATH, spark) + + merged = cell_ontology.merge_indices([uberon]) + assert isinstance( + merged, BiosampleIndex + ), "Merging of biosample indices is not correct." From 84d663849716a61fa40642959d80300dd99842fc Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Tue, 24 Sep 2024 16:57:29 +0100 Subject: [PATCH 052/188] feat: 99% credible set validation during `study_locus_validation` (#765) * feat: study locus validation filters for 95% credible sets * revert: no longer needed to filter for credible set interval * feat: annotate credible sets before filter them * docs: adding more context here --- src/gentropy/colocalisation.py | 6 ++---- src/gentropy/dataset/study_locus.py | 4 ++-- src/gentropy/pics.py | 6 ++---- src/gentropy/study_locus_validation.py | 4 +++- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/gentropy/colocalisation.py b/src/gentropy/colocalisation.py index 4f8431b98..0dcdff206 100644 --- a/src/gentropy/colocalisation.py +++ b/src/gentropy/colocalisation.py @@ -8,7 +8,7 @@ from pyspark.sql.functions import col from gentropy.common.session import Session -from gentropy.dataset.study_locus import CredibleInterval, StudyLocus +from gentropy.dataset.study_locus import StudyLocus from gentropy.method.colocalisation import Coloc @@ -46,9 +46,7 @@ def __init__( ) # Transform - overlaps = credible_set.filter_credible_set( - CredibleInterval.IS95 - ).find_overlaps() + overlaps = credible_set.find_overlaps() colocalisation_results = colocalisation_class.colocalise(overlaps) # type: ignore # Load diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 2385df984..c7f9ffc3d 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -553,7 +553,7 @@ def filter_credible_set( self: StudyLocus, credible_interval: CredibleInterval, ) -> StudyLocus: - """Filter study-locus tag variants based on given credible interval. + """Annotate and filter study-locus tag variants based on given credible interval. Args: credible_interval (CredibleInterval): Credible interval to filter for. @@ -562,7 +562,7 @@ def filter_credible_set( StudyLocus: Filtered study-locus dataset. """ return StudyLocus( - _df=self._df.withColumn( + _df=self.annotate_credible_sets().df.withColumn( "locus", f.filter( f.col("locus"), diff --git a/src/gentropy/pics.py b/src/gentropy/pics.py index 80421b9ae..e80a37eb6 100644 --- a/src/gentropy/pics.py +++ b/src/gentropy/pics.py @@ -28,10 +28,8 @@ def __init__( session, study_locus_ld_annotated_in ) # PICS - picsed_sl = ( - PICS.finemap(study_locus_ld_annotated) - .annotate_credible_sets() - .filter_credible_set(credible_interval=CredibleInterval.IS99) + picsed_sl = PICS.finemap(study_locus_ld_annotated).filter_credible_set( + credible_interval=CredibleInterval.IS99 ) # Write picsed_sl.df.write.mode(session.write_mode).parquet(picsed_study_locus_out) diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index 7c853bbcb..114eb01f7 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -4,7 +4,7 @@ from gentropy.common.session import Session from gentropy.dataset.study_index import StudyIndex -from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.study_locus import CredibleInterval, StudyLocus class StudyLocusValidationStep: @@ -46,6 +46,8 @@ def __init__( .validate_study(study_index) # Flagging studies not in study index .annotate_study_type(study_index) # Add study type to study locus .qc_redundant_top_hits_from_PICS() # Flagging top hits from studies with PICS summary statistics + # Annotates credible intervals and filter to only keep 99% credible sets + .filter_credible_set(credible_interval=CredibleInterval.IS99) ).persist() # we will need this for 2 types of outputs study_locus_with_qc.valid_rows( From 2199ece29182cbfb3911c1a8b6d261f9a76bf1f0 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Tue, 24 Sep 2024 17:04:42 +0100 Subject: [PATCH 053/188] feat: flag credible sets explained by SuSiE regions (#780) * feat: flag PICS top hits in studies with credset sumstats * feat: flag credible sets explained by SuSiE region * feat: consider unresolved LD cases * refactor: improve readability of code * refactor: improve code readability --------- Co-authored-by: Daniel Suveges --- src/gentropy/dataset/study_locus.py | 72 ++++++++++ src/gentropy/study_locus_validation.py | 1 + tests/gentropy/dataset/test_study_locus.py | 160 +++++++++++++++++++++ 3 files changed, 233 insertions(+) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index c7f9ffc3d..e3706eaf0 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -57,6 +57,7 @@ class StudyLocusQualityCheck(Enum): TOP_HIT (str): Study locus from curated top hit IN_MHC (str): Flagging study loci in the MHC region REDUNDANT_PICS_TOP_HIT (str): Flagging study loci in studies with PICS results from summary statistics + EXPLAINED_BY_SUSIE (str): Study locus in region explained by a SuSiE credible set """ SUBSIGNIFICANT_FLAG = "Subsignificant p-value" @@ -84,6 +85,7 @@ class StudyLocusQualityCheck(Enum): "PICS results from summary statistics available for this same study" ) TOP_HIT = "Study locus from curated top hit" + EXPLAINED_BY_SUSIE = "Study locus in region explained by a SuSiE credible set" class CredibleInterval(Enum): @@ -963,6 +965,76 @@ def qc_redundant_top_hits_from_PICS(self: StudyLocus) -> StudyLocus: _schema=StudyLocus.get_schema(), ) + def qc_explained_by_SuSiE(self: StudyLocus) -> StudyLocus: + """Flag associations that are explained by SuSiE associations. + + Credible sets overlapping in the same region as a SuSiE credible set are flagged as explained by SuSiE. + + Returns: + StudyLocus: Updated study locus with SuSiE explained flags. + """ + # unique study-regions covered by SuSie credible sets + susie_study_regions = ( + self.filter(f.col("finemappingMethod") == "SuSiE-inf") + .df.select( + "studyId", + "chromosome", + "locusStart", + "locusEnd", + f.lit(True).alias("inSuSiE"), + ) + .distinct() + ) + + # non SuSiE credible sets (studyLocusId) overlapping in any variant with SuSiE locus + redundant_study_locus = ( + self.filter(f.col("finemappingMethod") != "SuSiE-inf") + .df.withColumn("l", f.explode("locus")) + .select( + "studyLocusId", + "studyId", + "chromosome", + f.split(f.col("l.variantId"), "_")[1].alias("tag_position"), + ) + .alias("study_locus") + .join( + susie_study_regions.alias("regions"), + how="inner", + on=[ + (f.col("study_locus.chromosome") == f.col("regions.chromosome")) + & (f.col("study_locus.studyId") == f.col("regions.studyId")) + & (f.col("study_locus.tag_position") >= f.col("regions.locusStart")) + & (f.col("study_locus.tag_position") <= f.col("regions.locusEnd")) + ], + ) + .select("studyLocusId", "inSuSiE") + .distinct() + ) + + return StudyLocus( + _df=( + self.df.join(redundant_study_locus, on="studyLocusId", how="left") + .withColumn( + "qualityControls", + self.update_quality_flag( + f.col("qualityControls"), + # credible set in SuSiE overlapping region + f.col("inSuSiE") + # credible set not based on SuSiE + & (f.col("finemappingMethod") != "SuSiE-inf") + # credible set not already flagged as unresolved LD + & ~f.array_contains( + f.col("qualityControls"), + StudyLocusQualityCheck.UNRESOLVED_LD.value, + ), + StudyLocusQualityCheck.EXPLAINED_BY_SUSIE, + ), + ) + .drop("inSuSiE") + ), + _schema=StudyLocus.get_schema(), + ) + def _qc_no_population(self: StudyLocus) -> StudyLocus: """Flag associations where the study doesn't have population information to resolve LD. diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index 114eb01f7..287cd5645 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -46,6 +46,7 @@ def __init__( .validate_study(study_index) # Flagging studies not in study index .annotate_study_type(study_index) # Add study type to study locus .qc_redundant_top_hits_from_PICS() # Flagging top hits from studies with PICS summary statistics + .qc_explained_by_SuSiE() # Flagging credible sets in regions explained by SuSiE # Annotates credible intervals and filter to only keep 99% credible sets .filter_credible_set(credible_interval=CredibleInterval.IS99) ).persist() # we will need this for 2 types of outputs diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index 51fc2ed92..29cbffad2 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -904,3 +904,163 @@ def test_qc_redundant_top_hits_from_PICS_correctness( ) .count() ) == 3 + + +class TestStudyLocusSuSiERedundancyFlagging: + """Collection of tests related to flagging redundant credible sets.""" + + STUDY_LOCUS_DATA: Any = [ + # to be flagged due to v4 + ( + 1, + "v1", + "s1", + "X", + "pics", + 1, + 3, + [ + {"variantId": "X_1_A_A"}, + {"variantId": "X_2_A_A"}, + {"variantId": "X_3_A_A"}, + ], + [], + ), + # to be flagged due to v4 + ( + 2, + "v2", + "s1", + "X", + "pics", + 4, + 5, + [ + {"variantId": "X_4_A_A"}, + {"variantId": "X_5_A_A"}, + ], + [], + ), + # NOT to be flagged (outside regions) + ( + 3, + "v3", + "s1", + "X", + "pics", + 6, + 7, + [ + {"variantId": "X_6_A_A"}, + {"variantId": "X_7_A_A"}, + ], + [], + ), + # NOT to be flagged (SuSie-Inf credible set) + ( + 4, + "v4", + "s1", + "X", + "SuSiE-inf", + 3, + 5, + [{"variantId": "X_3_A_A"}, {"variantId": "X_5_A_A"}], + [], + ), + # NOT to be flagged (Unresolved LD) + ( + 5, + "v5", + "s1", + "X", + "pics", + 5, + 5, + [ + {"variantId": "X_5_A_A"}, + ], + [StudyLocusQualityCheck.UNRESOLVED_LD.value], + ), + # NOT to be flagged (different study) + ( + 6, + "v6", + "s2", + "X", + "pics", + 3, + 5, + [ + {"variantId": "X_3_A_A"}, + {"variantId": "X_5_A_A"}, + ], + [], + ), + ] + + STUDY_LOCUS_SCHEMA = t.StructType( + [ + t.StructField("studyLocusId", t.LongType(), False), + t.StructField("variantId", t.StringType(), False), + t.StructField("studyId", t.StringType(), False), + t.StructField("chromosome", t.StringType(), False), + t.StructField("finemappingMethod", t.StringType(), False), + t.StructField("locusStart", t.IntegerType(), False), + t.StructField("locusEnd", t.IntegerType(), False), + StructField( + "locus", + ArrayType( + StructType( + [ + StructField("variantId", StringType(), True), + ] + ) + ), + True, + ), + t.StructField("qualityControls", t.ArrayType(t.StringType()), False), + ] + ) + + @pytest.fixture(autouse=True) + def _setup( + self: TestStudyLocusSuSiERedundancyFlagging, spark: SparkSession + ) -> None: + """Setup study locus for testing.""" + self.study_locus = StudyLocus( + _df=spark.createDataFrame( + self.STUDY_LOCUS_DATA, schema=self.STUDY_LOCUS_SCHEMA + ), + _schema=StudyLocus.get_schema(), + ) + + def test_qc_qc_explained_by_SuSiE_returntype( + self: TestStudyLocusSuSiERedundancyFlagging, + ) -> None: + """Test qc_explained_by_SuSiE.""" + assert isinstance(self.study_locus.qc_explained_by_SuSiE(), StudyLocus) + + def test_qc_explained_by_SuSiE_no_data_loss( + self: TestStudyLocusSuSiERedundancyFlagging, + ) -> None: + """Test qc_explained_by_SuSiE no data loss.""" + assert ( + self.study_locus.qc_explained_by_SuSiE().df.count() + == self.study_locus.df.count() + ) + + def test_qc_explained_by_SuSiE_correctness( + self: TestStudyLocusSuSiERedundancyFlagging, + ) -> None: + """Testing if the study validation flags the right number of studies.""" + assert ( + self.study_locus.qc_explained_by_SuSiE() + .df.filter( + f.array_contains( + f.col("qualityControls"), + StudyLocusQualityCheck.EXPLAINED_BY_SUSIE.value, + ) + ) + .count() + ) == 2 From 2010fb654e5cae241028e9614b79865e871034e1 Mon Sep 17 00:00:00 2001 From: Yakov Date: Tue, 24 Sep 2024 17:10:23 +0100 Subject: [PATCH 054/188] fix: remove n_eff check from qc_step (#785) --- src/gentropy/config.py | 1 + .../method/sumstat_quality_controls.py | 21 ++++++++----------- src/gentropy/sumstat_qc_step.py | 4 +++- tests/gentropy/method/test_qc_of_sumstats.py | 16 +++++--------- 4 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 32edc9a4a..86bfc7afe 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -455,6 +455,7 @@ class GWASQCStep(StepConfig): gwas_path: str = MISSING output_path: str = MISSING studyid: str = MISSING + pval_threshold: float = MISSING _target_: str = "gentropy.sumstat_qc_step.SummaryStatisticsQCStep" diff --git a/src/gentropy/method/sumstat_quality_controls.py b/src/gentropy/method/sumstat_quality_controls.py index 2858f4813..1647851de 100644 --- a/src/gentropy/method/sumstat_quality_controls.py +++ b/src/gentropy/method/sumstat_quality_controls.py @@ -1,4 +1,5 @@ """Summary statistics qulity control methods.""" + from __future__ import annotations import numpy as np @@ -225,13 +226,13 @@ def gc_lambda_check( @staticmethod def number_of_snps( - gwas_for_qc: SummaryStatistics, pval_threhod: float = 5e-8 + gwas_for_qc: SummaryStatistics, pval_threshold: float = 5e-8 ) -> DataFrame: """The function caluates number of SNPs and number of SNPs with p-value less than 5e-8. Args: gwas_for_qc (SummaryStatistics): The instance of the SummaryStatistics class. - pval_threhod (float): The threshold for the p-value. + pval_threshold (float): The threshold for the p-value. Returns: DataFrame: PySpark DataFrame with the number of SNPs and number of SNPs with p-value less than threshold. @@ -243,7 +244,7 @@ def number_of_snps( f.sum( ( f.log10(f.col("pValueMantissa")) + f.col("pValueExponent") - <= np.log10(pval_threhod) + <= np.log10(pval_threshold) ).cast("int") ).alias("n_variants_sig"), ) @@ -254,30 +255,26 @@ def number_of_snps( def get_quality_control_metrics( gwas: SummaryStatistics, limit: int = 100_000_000, - min_count: int = 100_000, - n_total: int = 100_000, + pval_threshold: float = 5e-8, ) -> DataFrame: """The function calculates the quality control metrics for the summary statistics. Args: gwas (SummaryStatistics): The instance of the SummaryStatistics class. limit (int): The limit for the number of variants to be used for the estimation. - min_count (int): The minimum number of variants to be used for the estimation. - n_total (int): The total sample size. + pval_threshold (float): The threshold for the p-value. Returns: DataFrame: PySpark DataFrame with the quality control metrics for the summary statistics. """ qc1 = SummaryStatisticsQC.sumstat_qc_beta_check(gwas_for_qc=gwas) qc2 = SummaryStatisticsQC.sumstat_qc_pz_check(gwas_for_qc=gwas, limit=limit) - qc3 = SummaryStatisticsQC.sumstat_n_eff_check( - gwas_for_qc=gwas, n_total=n_total, limit=limit, min_count=min_count - ) qc4 = SummaryStatisticsQC.gc_lambda_check(gwas_for_qc=gwas, limit=limit) - qc5 = SummaryStatisticsQC.number_of_snps(gwas_for_qc=gwas) + qc5 = SummaryStatisticsQC.number_of_snps( + gwas_for_qc=gwas, pval_threshold=pval_threshold + ) df = ( qc1.join(qc2, on="studyId", how="outer") - .join(qc3, on="studyId", how="outer") .join(qc4, on="studyId", how="outer") .join(qc5, on="studyId", how="outer") ) diff --git a/src/gentropy/sumstat_qc_step.py b/src/gentropy/sumstat_qc_step.py index 0c3b7bb14..b5aed905e 100644 --- a/src/gentropy/sumstat_qc_step.py +++ b/src/gentropy/sumstat_qc_step.py @@ -16,6 +16,7 @@ def __init__( gwas_path: str, output_path: str, studyid: str, + pval_threshold: float = 1e-8, ) -> None: """Calculating quality control metrics on the provided GWAS study. @@ -24,13 +25,14 @@ def __init__( gwas_path (str): Path to the GWAS summary statistics. output_path (str): Output path for the QC results. studyid (str): Study ID for the QC. + pval_threshold (float): P-value threshold for the QC. Default is 1e-8. """ gwas = SummaryStatistics.from_parquet(session, path=gwas_path) ( SummaryStatisticsQC.get_quality_control_metrics( - gwas=gwas, limit=100_000_000, min_count=100, n_total=100000 + gwas=gwas, limit=100_000_000, pval_threshold=pval_threshold ) .write.mode(session.write_mode) .parquet(output_path + "/qc_results_" + studyid) diff --git a/tests/gentropy/method/test_qc_of_sumstats.py b/tests/gentropy/method/test_qc_of_sumstats.py index d734fcaef..8f63e6ba2 100644 --- a/tests/gentropy/method/test_qc_of_sumstats.py +++ b/tests/gentropy/method/test_qc_of_sumstats.py @@ -3,7 +3,6 @@ from __future__ import annotations import numpy as np -import pandas as pd import pyspark.sql.functions as f import pytest from pyspark.sql.functions import rand, when @@ -18,9 +17,7 @@ def test_qc_functions( ) -> None: """Test all sumstat qc functions.""" gwas = sample_summary_statistics.sanity_filter() - QC = SummaryStatisticsQC.get_quality_control_metrics( - gwas=gwas, limit=100000, min_count=100, n_total=100000 - ) + QC = SummaryStatisticsQC.get_quality_control_metrics(gwas=gwas, limit=100000) QC = QC.toPandas() assert QC["n_variants"].iloc[0] == 1663 @@ -29,7 +26,6 @@ def test_qc_functions( assert np.round(QC["mean_beta"].iloc[0], 4) == 0.0013 assert np.round(QC["mean_diff_pz"].iloc[0], 6) == 0 assert np.round(QC["se_diff_pz"].iloc[0], 6) == 0 - assert pd.isna(QC["se_N"].iloc[0]) def test_neff_check_eaf( @@ -41,8 +37,8 @@ def test_neff_check_eaf( gwas_df = gwas_df.withColumn("effectAlleleFrequencyFromSource", f.lit(0.5)) gwas._df = gwas_df - QC = SummaryStatisticsQC.get_quality_control_metrics( - gwas=gwas, limit=100000, min_count=100, n_total=100000 + QC = SummaryStatisticsQC.sumstat_n_eff_check( + gwas_for_qc=gwas, limit=100000, min_count=100, n_total=100000 ) QC = QC.toPandas() assert np.round(QC["se_N"].iloc[0], 4) == 0.5586 @@ -59,11 +55,9 @@ def test_several_studyid( ) gwas._df = gwas_df - QC = SummaryStatisticsQC.get_quality_control_metrics( - gwas=gwas, limit=100000, min_count=100, n_total=100000 - ) + QC = SummaryStatisticsQC.get_quality_control_metrics(gwas=gwas, limit=100000) QC = QC.toPandas() - assert QC.shape == (2, 8) + assert QC.shape == (2, 7) def test_sanity_filter_remove_inf_values( From d2a68d934462a9706455bac0d67a065bb43f9f0b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 24 Sep 2024 17:41:27 +0100 Subject: [PATCH 055/188] build(deps-dev): bump pymdown-extensions from 10.9 to 10.10.1 (#781) Bumps [pymdown-extensions](https://github.com/facelessuser/pymdown-extensions) from 10.9 to 10.10.1. - [Release notes](https://github.com/facelessuser/pymdown-extensions/releases) - [Commits](https://github.com/facelessuser/pymdown-extensions/compare/10.9...10.10.1) --- updated-dependencies: - dependency-name: pymdown-extensions dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: David Ochoa --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 296f07145..4cc0e3bba 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiodns" @@ -3740,13 +3740,13 @@ files = [ [[package]] name = "pymdown-extensions" -version = "10.9" +version = "10.10.1" description = "Extension pack for Python Markdown." optional = false python-versions = ">=3.8" files = [ - {file = "pymdown_extensions-10.9-py3-none-any.whl", hash = "sha256:d323f7e90d83c86113ee78f3fe62fc9dee5f56b54d912660703ea1816fed5626"}, - {file = "pymdown_extensions-10.9.tar.gz", hash = "sha256:6ff740bcd99ec4172a938970d42b96128bdc9d4b9bcad72494f29921dc69b753"}, + {file = "pymdown_extensions-10.10.1-py3-none-any.whl", hash = "sha256:6c74ea6c2e2285186a241417480fc2d3cc52941b3ec2dced4014c84dc78c5493"}, + {file = "pymdown_extensions-10.10.1.tar.gz", hash = "sha256:ad277ee4739ced051c3b6328d22ce782358a3bec39bc6ca52815ccbf44f7acdc"}, ] [package.dependencies] From 95be9f6d0694dff95764197bdd1247675b688551 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Tue, 24 Sep 2024 21:33:13 +0100 Subject: [PATCH 056/188] build: updated precommits including adjustments to docstrings (#787) --- .pre-commit-config.yaml | 8 ++++---- poetry.lock | 1 - src/gentropy/common/version_engine.py | 8 ++++++-- src/gentropy/dataset/colocalisation.py | 3 +-- src/gentropy/dataset/dataset.py | 3 +-- src/gentropy/datasource/ensembl/vep_parser.py | 3 +-- src/gentropy/l2g.py | 3 +-- src/gentropy/method/l2g/model.py | 7 +++---- src/gentropy/method/susie_inf.py | 7 +++++-- 9 files changed, 22 insertions(+), 21 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 17b7a4f6e..a938f03a3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ ci: skip: [poetry-lock] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.1 + rev: v0.6.7 hooks: - id: ruff args: @@ -58,14 +58,14 @@ repos: exclude: "CHANGELOG.md" - repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook - rev: v9.16.0 + rev: v9.18.0 hooks: - id: commitlint additional_dependencies: ["@commitlint/config-conventional@18.6.3"] stages: [commit-msg] - repo: https://github.com/pre-commit/mirrors-mypy - rev: "v1.11.1" + rev: "v1.11.2" hooks: - id: mypy args: @@ -98,7 +98,7 @@ repos: - id: beautysh - repo: https://github.com/jsh9/pydoclint - rev: 0.5.6 + rev: 0.5.8 hooks: - id: pydoclint diff --git a/poetry.lock b/poetry.lock index 4cc0e3bba..bf854b313 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3952,7 +3952,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, diff --git a/src/gentropy/common/version_engine.py b/src/gentropy/common/version_engine.py index 1cf34bbec..d852d8f5d 100644 --- a/src/gentropy/common/version_engine.py +++ b/src/gentropy/common/version_engine.py @@ -101,7 +101,11 @@ def amend_version( class DatasourceVersionSeeker(ABC): - """Interface for datasource version seeker.""" + """Interface for datasource version seeker. + + Raises: + NotImplementedError: if method is not implemented in the subclass + """ @staticmethod @abstractmethod @@ -115,7 +119,7 @@ def seek_version(text: str) -> str: str: seeked version Raises: - ValueError: if version can not be seeked + NotImplementedError: if method is not implemented in the subclass """ raise NotImplementedError diff --git a/src/gentropy/dataset/colocalisation.py b/src/gentropy/dataset/colocalisation.py index 94a4f09dc..9e9035488 100644 --- a/src/gentropy/dataset/colocalisation.py +++ b/src/gentropy/dataset/colocalisation.py @@ -55,8 +55,7 @@ def extract_maximum_coloc_probability_per_region_and_gene( DataFrame: table with the maximum colocalisation scores for the provided study loci Raises: - ValueError: if filter_by_qtl is not in the list of valid QTL types - ValueError: if filter_by_colocalisation_method is not in the list of valid colocalisation methods + ValueError: if filter_by_qtl is not in the list of valid QTL types or is not in the list of valid colocalisation methods """ from gentropy.colocalisation import ColocalisationStep diff --git a/src/gentropy/dataset/dataset.py b/src/gentropy/dataset/dataset.py index cbeae7073..e56ef2ecc 100644 --- a/src/gentropy/dataset/dataset.py +++ b/src/gentropy/dataset/dataset.py @@ -211,8 +211,7 @@ def valid_rows(self: Self, invalid_flags: list[str], invalid: bool = False) -> S Self: filtered dataset. Raises: - ValueError: If the Dataset does not contain a QC column. - ValueError: If the invalid_flags elements do not exist in QC mappings flags. + ValueError: If the Dataset does not contain a QC column or if the invalid_flags elements do not exist in QC mappings flags. """ # If the invalid flags are not valid quality checks (enum) for this Dataset we raise an error: invalid_reasons = [] diff --git a/src/gentropy/datasource/ensembl/vep_parser.py b/src/gentropy/datasource/ensembl/vep_parser.py index c7ee05d13..4b70a36e6 100644 --- a/src/gentropy/datasource/ensembl/vep_parser.py +++ b/src/gentropy/datasource/ensembl/vep_parser.py @@ -71,8 +71,7 @@ def extract_variant_index_from_vep( VariantIndex: Variant index dataset. Raises: - ValueError: Failed reading file. - ValueError: The dataset is empty. + ValueError: Failed reading file or if the dataset is empty. """ # To speed things up and simplify the json structure, read data following an expected schema: vep_schema = cls.get_schema() diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 13dbb881b..6f80d826e 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -201,8 +201,7 @@ def _generate_feature_matrix(self, write_feature_matrix: bool) -> L2GFeatureMatr L2GFeatureMatrix: Feature matrix with gold standards annotated with features. Raises: - ValueError: If write_feature_matrix is set to True but a path is not provided. - ValueError: If dependencies to build features are not set. + ValueError: If write_feature_matrix is set to True but a path is not provided or if dependencies to build features are not set. """ if self.gs_curation and self.interactions and self.v2g: study_locus_overlap = StudyLocus( diff --git a/src/gentropy/method/l2g/model.py b/src/gentropy/method/l2g/model.py index 6e0b0fda1..45d90f90d 100644 --- a/src/gentropy/method/l2g/model.py +++ b/src/gentropy/method/l2g/model.py @@ -135,8 +135,7 @@ def save(self: LocusToGeneModel, path: str) -> None: path (str): Path to save the persisted model. Should end with .skops Raises: - ValueError: If the model has not been fitted yet - ValueError: If the path does not end with .skops + ValueError: If the model has not been fitted yet or if the path does not end with .skops """ if self.model is None: raise ValueError("Model has not been fitted yet.") @@ -215,7 +214,7 @@ def export_to_hugging_face_hub( local_repo (str): Path to the folder where the contents of the model repo + the documentation are located. This is used to push the model to the Hugging Face Hub. Raises: - Exception: If the push to the Hugging Face Hub fails + RuntimeError: If the push to the Hugging Face Hub fails """ from sklearn import __version__ as sklearn_version @@ -241,4 +240,4 @@ def export_to_hugging_face_hub( for p in Path(local_repo).glob("*"): p.unlink() Path(local_repo).rmdir() - raise e + raise RuntimeError from e diff --git a/src/gentropy/method/susie_inf.py b/src/gentropy/method/susie_inf.py index 482818b71..4f75faad8 100644 --- a/src/gentropy/method/susie_inf.py +++ b/src/gentropy/method/susie_inf.py @@ -23,6 +23,9 @@ class SUSIE_inf: Note: code copied from fine-mapping-inf package as a placeholder https://github.com/FinucaneLab/fine-mapping-inf + + Raises: + RuntimeError: if missing LD or if unsupported variance estimation """ @staticmethod @@ -89,8 +92,7 @@ def susie_inf( # noqa: C901 lbf -- length-p array of log-Bayes-factors for each CS Raises: - RuntimeError: if missing LD - RuntimeError: if unsupported variance estimation method + RuntimeError: if missing LD or if unsupported variance estimation method """ p = len(z) # Precompute V,D^2 in the SVD X=UDV', and V'X'y and y'y @@ -428,6 +430,7 @@ def cred_inf( Raises: RuntimeError: if missing inputs for purity filtering + ValueError: if either LD or V, Dsq are None """ if (V is None or Dsq is None or n is None) and LD is None: raise RuntimeError("Missing inputs for purity filtering") From 6c4bdf50430201e870dd810a2a4c325f653456cf Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Wed, 25 Sep 2024 00:16:27 +0200 Subject: [PATCH 057/188] fix(finngen_study_index): improved tests for finngen study index (#776) * fix(finngen_study_index): improved tests for finngen study index * chore(tests): added pytest mark to be able to isolate step tests * chore: pr comments * feat: revert mock.patch --------- Co-authored-by: Szymon Szyszkowski --- pyproject.toml | 1 + src/gentropy/config.py | 1 + .../datasource/finngen/study_index.py | 19 +- src/gentropy/finngen_studies.py | 4 + .../finngen/test_finngen_study_index.py | 359 +++++++++++++++--- 5 files changed, 322 insertions(+), 62 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8e5469c6a..f61d82116 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -126,6 +126,7 @@ exclude = ["dist"] addopts = "-n auto --doctest-modules --cov=src/ --cov-report=xml" pythonpath = ["."] testpaths = ["tests/gentropy", "src/gentropy"] +marks = ["step_test"] # Semi-strict mode for mypy [tool.mypy] diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 86bfc7afe..3293a882a 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -146,6 +146,7 @@ class FinngenStudiesConfig(StepConfig): ) finngen_summary_stats_url_suffix: str = ".gz" efo_curation_mapping_url: str = "https://raw.githubusercontent.com/opentargets/curation/24.09.1/mappings/disease/manual_string.tsv" + sample_size: int = 453733 # https://www.finngen.fi/en/access_results#:~:text=Total%20sample%20size%3A%C2%A0453%2C733%C2%A0(254%2C618%C2%A0females%20and%C2%A0199%2C115%20males) _target_: str = "gentropy.finngen_studies.FinnGenStudiesStep" diff --git a/src/gentropy/datasource/finngen/study_index.py b/src/gentropy/datasource/finngen/study_index.py index 1e2e71f72..3946323f4 100644 --- a/src/gentropy/datasource/finngen/study_index.py +++ b/src/gentropy/datasource/finngen/study_index.py @@ -1,4 +1,4 @@ -"""Study Index for Finngen data source.""" +"""Study Index for FinnGen data source.""" from __future__ import annotations @@ -8,7 +8,6 @@ import pyspark.sql.functions as f from pyspark.sql import DataFrame, SparkSession -from gentropy.config import FinngenStudiesConfig from gentropy.dataset.study_index import StudyIndex @@ -30,7 +29,7 @@ class FinnGenStudyIndex: def join_efo_mapping( study_index: StudyIndex, efo_curation_mapping: DataFrame, - finngen_release_prefix: str = FinngenStudiesConfig().finngen_release_prefix, + finngen_release_prefix: str, ) -> StudyIndex: """Add EFO mapping to the Finngen study index table. @@ -88,10 +87,11 @@ def join_efo_mapping( def from_source( cls: type[FinnGenStudyIndex], spark: SparkSession, - finngen_phenotype_table_url: str = FinngenStudiesConfig().finngen_phenotype_table_url, - finngen_release_prefix: str = FinngenStudiesConfig().finngen_release_prefix, - finngen_summary_stats_url_prefix: str = FinngenStudiesConfig().finngen_summary_stats_url_prefix, - finngen_summary_stats_url_suffix: str = FinngenStudiesConfig().finngen_summary_stats_url_suffix, + finngen_phenotype_table_url: str, + finngen_release_prefix: str, + finngen_summary_stats_url_prefix: str, + finngen_summary_stats_url_suffix: str, + sample_size: int, ) -> StudyIndex: """This function ingests study level metadata from FinnGen. @@ -101,6 +101,7 @@ def from_source( finngen_release_prefix (str): FinnGen release prefix. finngen_summary_stats_url_prefix (str): FinnGen summary stats URL prefix. finngen_summary_stats_url_suffix (str): FinnGen summary stats URL suffix. + sample_size (int): Number of individuals participated in sample collection. Returns: StudyIndex: Parsed and annotated FinnGen study table. @@ -120,12 +121,12 @@ def from_source( f.lit(finngen_release_prefix).alias("projectId"), f.lit("gwas").alias("studyType"), f.lit(True).alias("hasSumstats"), - f.lit("377,277 (210,870 females and 166,407 males)").alias( + f.lit("453,733 (254,618 females and 199,115 males)").alias( "initialSampleSize" ), f.array( f.struct( - f.lit(377277).cast("integer").alias("sampleSize"), + f.lit(sample_size).cast("integer").alias("sampleSize"), f.lit("Finnish").alias("ancestry"), ) ).alias("discoverySamples"), diff --git a/src/gentropy/finngen_studies.py b/src/gentropy/finngen_studies.py index 706fa3d39..80866ac99 100644 --- a/src/gentropy/finngen_studies.py +++ b/src/gentropy/finngen_studies.py @@ -21,6 +21,7 @@ def __init__( finngen_summary_stats_url_prefix: str = FinngenStudiesConfig().finngen_summary_stats_url_prefix, finngen_summary_stats_url_suffix: str = FinngenStudiesConfig().finngen_summary_stats_url_suffix, efo_curation_mapping_url: str = FinngenStudiesConfig().efo_curation_mapping_url, + sample_size: int = FinngenStudiesConfig().sample_size, ) -> None: """Run FinnGen study index generation step. @@ -32,6 +33,7 @@ def __init__( finngen_summary_stats_url_prefix (str): FinnGen summary stats URL prefix. finngen_summary_stats_url_suffix (str): FinnGen summary stats URL suffix. efo_curation_mapping_url (str): URL to the EFO curation mapping file + sample_size (int): Number of individuals that participated in sample collection, derived from finngen release metadata. """ study_index = FinnGenStudyIndex.from_source( session.spark, @@ -39,12 +41,14 @@ def __init__( finngen_release_prefix, finngen_summary_stats_url_prefix, finngen_summary_stats_url_suffix, + sample_size, ) # NOTE: hack to allow spark to read directly from the URL. csv_data = urlopen(efo_curation_mapping_url).readlines() csv_rows = [row.decode("utf8") for row in csv_data] rdd = session.spark.sparkContext.parallelize(csv_rows) + # NOTE: type annotations for spark.read.csv miss the fact that the first param can be [RDD[str]] efo_curation_mapping = session.spark.read.csv(rdd, header=True, sep="\t") study_index_with_efo = FinnGenStudyIndex.join_efo_mapping( diff --git a/tests/gentropy/datasource/finngen/test_finngen_study_index.py b/tests/gentropy/datasource/finngen/test_finngen_study_index.py index 5b2be30c4..07d014d13 100644 --- a/tests/gentropy/datasource/finngen/test_finngen_study_index.py +++ b/tests/gentropy/datasource/finngen/test_finngen_study_index.py @@ -2,85 +2,338 @@ from __future__ import annotations -from pyspark.sql import SparkSession -from pyspark.sql import types as t +import json +from typing import TYPE_CHECKING +from unittest.mock import MagicMock + +import pytest +from pyspark.sql import types as T from gentropy.dataset.study_index import StudyIndex from gentropy.datasource.finngen.study_index import FinnGenStudyIndex +from gentropy.finngen_studies import FinnGenStudiesStep +if TYPE_CHECKING: + from pathlib import Path + from typing import Callable -def test_finngen_study_index_from_source(spark: SparkSession) -> None: - """Test study index from source.""" - assert isinstance(FinnGenStudyIndex.from_source(spark), StudyIndex) + from pyspark.sql import DataFrame, SparkSession + from gentropy.common.session import Session -def test_finngen_study_index_add_efos(spark: SparkSession) -> None: - """Test finngen study index add efo ids.""" - study_index_table_data = [ + +@pytest.fixture() +def finngen_study_index_mock(spark: SparkSession) -> StudyIndex: + """Finngen minimal example for mocking join to the efo mappings.""" + data = [ + # NOTE: Study maps to a single EFO trait ( - "AB1_1", + "STUDY_1", "Actinomycosis", "FINNGEN_R11", "gwas", ), + # NOTE: Study does not map to EFO traits ( - "AB1_2", - "Some unknown trait", - "FINNGEN_R11", - "gwas", - ), - ( - "AB1_1", - "Some unknown trait", + "STUDY_2", + "Some other trait", "FINNGEN_R11", "gwas", ), + # NOTE: Study maps to two EFO traits ( - "AB1_1", - "Bleeding", + "STUDY_3", + "Glucose", "FINNGEN_R11", "gwas", ), ] - study_index_df = spark.createDataFrame( - data=study_index_table_data, - schema=t.StructType( - [ - t.StructField("studyId", t.StringType(), nullable=False), - t.StructField("traitFromSource", t.StringType(), nullable=False), - t.StructField("projectId", t.StringType(), nullable=False), - t.StructField("studyType", t.StringType(), nullable=False), - ] - ), + schema = T.StructType( + [ + T.StructField("studyId", T.StringType(), nullable=False), + T.StructField("traitFromSource", T.StringType(), nullable=False), + T.StructField("projectId", T.StringType(), nullable=False), + T.StructField("studyType", T.StringType(), nullable=False), + ] ) + df = spark.createDataFrame(data=data, schema=schema) + return StudyIndex(_df=df, _schema=StudyIndex.get_schema()) - curation_table_data = [ - ("FinnGen r11", "Actinomycosis", "http://www.ebi.ac.uk/efo/EFO_0007128"), - ("FinnGen r11", "bleeding", "http://purl.obolibrary.org/obo/MP_0001914"), - ("FinnGen r11", "Bruxism", "http://purl.obolibrary.org/obo/MONDO_0002443"), + +@pytest.fixture() +def finngen_phenotype_table_mock() -> str: + """This is the data extracted from https://r11.finngen.fi/api/phenos.""" + data = json.dumps( + [ + # NOTE: Study maps to single EFO trait. + { + "assoc_files": [ + "/cromwell_root/pheweb/generated-by-pheweb/pheno_gz/AB1_ACTINOMYCOSIS.gz" + ], + "category": "I Certain infectious and parasitic diseases (AB1_)", + "category_index": 1, + "gc_lambda": { + "0.001": 0.93878, + "0.01": 0.96727, + "0.1": 0.85429, + "0.5": 0.52544, + }, + "num_cases": 113, + "num_cases_prev": 101, + "num_controls": 399149, + "num_controls_prev": 363227, + "num_gw_significant": 0, + "num_gw_significant_prev": 0, + "phenocode": "AB1_ACTINOMYCOSIS", + "phenostring": "Actinomycosis", + }, + # NOTE: Study maps to multiple EFO traits. + { + "assoc_files": [ + "/cromwell_root/pheweb/generated-by-pheweb/pheno_gz/GLUCOSE.gz" + ], + "category": "Glucose", + "category_index": 28, + "gc_lambda": { + "0.001": 1.1251, + "0.01": 1.062, + "0.1": 1.0531, + "0.5": 1.0599, + }, + "num_cases": 43764, + "num_cases_prev": 39231, + "num_controls": 409969, + "num_controls_prev": 372950, + "num_gw_significant": 3, + "num_gw_significant_prev": 3, + "phenocode": "GLUCOSE", + "phenostring": "Glucose", + }, + # NOTE: Study does not map to EFO traits + { + "assoc_files": [ + "/cromwell_root/pheweb/generated-by-pheweb/pheno_gz/SOME_OTHER_TRAIT.gz" + ], + "category": "SomeOtherTrait", + "category_index": 28, + "gc_lambda": { + "0.001": 1.1251, + "0.01": 1.062, + "0.1": 1.0531, + "0.5": 1.0599, + }, + "num_cases": 43764, + "num_cases_prev": 39231, + "num_controls": 409969, + "num_controls_prev": 372950, + "num_gw_significant": 3, + "num_gw_significant_prev": 3, + "phenocode": "SOME_OTHER_TRAIT", + "phenostring": "Some other trait", + }, + ] + ) + return data + + +@pytest.fixture() +def efo_mappings_mock() -> list[tuple[str, str, str]]: + """EFO mappings mock based on https://raw.githubusercontent.com/opentargets/curation/24.09.1/mappings/disease/manual_string.tsv. + + Only required fields are extracted. + """ + data = [ ( - "PheWAS 2024", - "20161#Pack years of smoking", - "http://www.ebi.ac.uk/efo/EFO_0005671", + "STUDY", + "PROPERTY_VALUE", + "SEMANTIC_TAG", ), + ("FinnGen r11", "Actinomycosis", "http://www.ebi.ac.uk/efo/EFO_0007128"), + # NOTE: EFO does not map, as it's missing from the StudyIndex - hypothetical example. + ("FinnGen r11", "Bleeding", "http://purl.obolibrary.org/obo/MP_0001914"), + # NOTE: Two EFO traits for one disease should be collected to array - hypothetical example: + # Glucose tolerance test & NMR Glucose + ("FinnGen r11", "Glucose", "http://www.ebi.ac.uk/efo/EFO_0002571"), + ("FinnGen r11", "Glucose", "http://www.ebi.ac.uk/efo/EFO_0004468"), + # NOTE: EFO that does not map, due to study not from Finngen - hypothetical example. + ("PheWAS 2024", "Glucose", "http://www.ebi.ac.uk/efo/EFO_0000001"), ] - curation_df = spark.createDataFrame( - data=curation_table_data, - schema=t.StructType( - [ - t.StructField("STUDY", t.StringType(), nullable=False), - t.StructField("PROPERTY_VALUE", t.StringType(), nullable=False), - t.StructField("SEMANTIC_TAG", t.StringType(), nullable=False), - ] - ), + return data + + +@pytest.fixture() +def efo_mappings_df_mock( + spark: SparkSession, efo_mappings_mock: list[tuple[str, str, str]] +) -> DataFrame: + """EFO mappings dataframe mock.""" + schema = T.StructType( + [ + T.StructField("STUDY", T.StringType(), nullable=False), + T.StructField("PROPERTY_VALUE", T.StringType(), nullable=False), + T.StructField("SEMANTIC_TAG", T.StringType(), nullable=False), + ] ) + data = spark.createDataFrame(data=efo_mappings_mock, schema=schema) + return data - study_index = StudyIndex(_df=study_index_df, _schema=study_index_df.schema) - assert isinstance( - FinnGenStudyIndex.join_efo_mapping( - study_index, - finngen_release_prefix="FINNGEN_R11_", - efo_curation_mapping=curation_df, - ), - StudyIndex, + +@pytest.fixture() +def urlopen_mock( + efo_mappings_mock: list[tuple[str, str, str, str]], + finngen_phenotype_table_mock: str, +) -> Callable[[str], MagicMock]: + """Mock object for requesting urlopen objects with proper encoding. + + This mock object allows to call `read` and `readlines` methods on two endpoints: + - https://finngen_phenotypes -> finngen_phenotype_table_mock + - https://efo_mappings -> efo_mappings_mock + + The return values are mocks of the source data respectively. + """ + + def mock_response(url: str) -> MagicMock: + """Mock urllib.request.urlopen.""" + match url: + case "https://finngen_phenotypes": + value = finngen_phenotype_table_mock + case "https://efo_mappings": + value = "\n".join(["\t".join(row) + "\n" for row in efo_mappings_mock]) + case _: + value = "" + mock_open = MagicMock() + mock_open.read.return_value = value.encode() + mock_open.readlines.return_value = value.encode().splitlines(keepends=True) + return mock_open + + return mock_response + + +@pytest.mark.step_test +def test_finngen_study_index_step( + monkeypatch: pytest.MonkeyPatch, + session: Session, + tmp_path: Path, + urlopen_mock: Callable[[str], MagicMock], +) -> None: + """Test step that generates finngen study index. + + FIXME: Currently we miss following columns when reading from source. + 'biosampleFromSourceId' + 'publicationTitle' + 'diseaseIds' + 'publicationDate' + 'geneId' + 'backgroundDiseaseIds' + 'pubmedId' + 'publicationJournal' + 'qualityControls' + 'backgroundTraitFromSourceMappedIds' + 'publicationFirstAuthor' + 'replicationSamples' + 'analysisFlags' + 'condition' + """ + with monkeypatch.context() as m: + m.setattr("gentropy.datasource.finngen.study_index.urlopen", urlopen_mock) + m.setattr("gentropy.finngen_studies.urlopen", urlopen_mock) + output_path = str(tmp_path / "study_index") + FinnGenStudiesStep( + session=session, + finngen_study_index_out=output_path, + finngen_phenotype_table_url="https://finngen_phenotypes", + finngen_release_prefix="FINNGEN_R11", + finngen_summary_stats_url_prefix="gs://finngen_data/sumstats", + finngen_summary_stats_url_suffix=".gz", + efo_curation_mapping_url="https://efo_mappings", + sample_size=5_000_000, + ) + study_index = StudyIndex.from_parquet(session=session, path=output_path) + # fmt: off + assert study_index.df.count() == 3, "Expected 3 rows that come from the input table." + assert "traitFromSourceMappedIds" in study_index.df.columns, "Expected that EFO terms were joined to the study_index table." + # fmt: on + + +def test_finngen_study_index_from_source( + monkeypatch: pytest.MonkeyPatch, + spark: SparkSession, + urlopen_mock: Callable[[str], MagicMock], +) -> None: + """Test study index from source.""" + with monkeypatch.context() as m: + m.setattr("gentropy.datasource.finngen.study_index.urlopen", urlopen_mock) + expected_sample_size = 5_000_000 + expected_project_id = "FINNGEN_R11" + study_index = FinnGenStudyIndex.from_source( + spark, + finngen_phenotype_table_url="https://finngen_phenotypes", + finngen_release_prefix=expected_project_id, + finngen_summary_stats_url_prefix="gs://finngen-public-data-r11/summary_stats/finngen_R11_", + finngen_summary_stats_url_suffix=".gz", + sample_size=expected_sample_size, + ) + # fmt: off + assert isinstance(study_index, StudyIndex), "Expect that we deal with StudyIndex object." + + all_columns = StudyIndex.get_schema().fieldNames() + assert set(all_columns).issuperset(set(study_index.df.columns)), "Expect all columns can be found in the schema of StudyIndex." + assert study_index.df.count() == 3, "Expect two rows at the study_index, as in the input." + + rows = study_index.df.collect() + expected_study_ids = ["AB1_ACTINOMYCOSIS", "GLUCOSE", "SOME_OTHER_TRAIT"] + assert "studyId" in study_index.df.columns, "Expect that studyId column exists." + assert sorted([v["studyId"] for v in rows]) == expected_study_ids, "Expect that studyIds are populated from input." + + assert "projectId" in study_index.df.columns, "Expect that projectId column exists." + assert {v["projectId"] for v in rows} == {expected_project_id}, "Expect projectId column is correctly populated." + + expected_sumstat_locations = [ + "gs://finngen-public-data-r11/summary_stats/finngen_R11_AB1_ACTINOMYCOSIS.gz", + "gs://finngen-public-data-r11/summary_stats/finngen_R11_GLUCOSE.gz", + "gs://finngen-public-data-r11/summary_stats/finngen_R11_SOME_OTHER_TRAIT.gz", + ] + assert "summarystatsLocation" in study_index.df.columns, "Expect that summarystatsLocation column exists." + sumstat_locations = sorted([v["summarystatsLocation"] for v in rows]) + assert sumstat_locations == expected_sumstat_locations, "Expect that summarystatsLocation is populated." + assert "ldPopulationStructure" in study_index.df.columns, "Expect that ldPopulationStructure column exists." + for row in rows: + ld_struct = row["ldPopulationStructure"][0] + assert ld_struct["ldPopulation"] == "fin", "Expect fin ld population structure." + assert ld_struct["relativeSampleSize"] == pytest.approx(1.0), "Expect relative sample size if fixed to be 1.0." + + assert "discoverySamples" in study_index.df.columns, "Expect that discoverySamples column exists." + for row in rows: + ds_struct = row["discoverySamples"][0] + assert ds_struct["ancestry"] == "Finnish", "Expect Finnish ancestry." + assert ds_struct["sampleSize"] == expected_sample_size, "Expect sample size to be fixed." + # fmt: on + + +def test_finngen_study_index_add_efos( + finngen_study_index_mock: StudyIndex, + efo_mappings_df_mock: DataFrame, +) -> None: + """Test finngen study index add efo ids.""" + efo_column_name = "traitFromSourceMappedIds" + # Expect that EFO column is not present when study index is generated. + assert efo_column_name not in finngen_study_index_mock.df.columns + study_index = FinnGenStudyIndex.join_efo_mapping( + finngen_study_index_mock, + finngen_release_prefix="FINNGEN_R11_", + efo_curation_mapping=efo_mappings_df_mock, ) + # fmt: off + assert isinstance(study_index, StudyIndex), "Expect we have the StudyIndex object after joining EFOs." + assert efo_column_name in study_index.df.columns, "Expect that EFO column is present after joining EFOs." + assert study_index.df.count() == 3, "Expect we do not drop any studies, even if no EFO has been found." + # fmt: on + efos = { + row["studyId"]: sorted(row[efo_column_name]) + for row in study_index.df.select(efo_column_name, "studyId").collect() + } + expected_efos = { + "STUDY_1": ["EFO_0007128"], + "STUDY_2": [], + "STUDY_3": ["EFO_0002571", "EFO_0004468"], + } + assert expected_efos == efos, "Expect that EFOs are correctly assigned." From b525117be9ed75f3bde2b7934145654b4d018f2c Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Wed, 25 Sep 2024 11:41:43 +0100 Subject: [PATCH 058/188] fix: clean unused study_locus step parameter (#786) --- src/gentropy/config.py | 1 - src/gentropy/study_locus_validation.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 3293a882a..c56a9dfb3 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -503,7 +503,6 @@ class StudyLocusValidationStepConfig(StepConfig): valid_study_locus_path: str = MISSING invalid_study_locus_path: str = MISSING invalid_qc_reasons: list[str] = MISSING - gwas_significance: float = WindowBasedClumpingStepConfig.gwas_significance _target_: str = "gentropy.study_locus_validation.StudyLocusValidationStep" diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index 287cd5645..fc69f6855 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -19,7 +19,6 @@ def __init__( session: Session, study_index_path: str, study_locus_path: list[str], - gwas_significance: float, valid_study_locus_path: str, invalid_study_locus_path: str, invalid_qc_reasons: list[str] = [], @@ -30,7 +29,6 @@ def __init__( session (Session): Session object. study_index_path (str): Path to study index file. study_locus_path (list[str]): Path to study locus dataset. - gwas_significance (float): GWAS significance threshold. valid_study_locus_path (str): Path to write the valid records. invalid_study_locus_path (str): Path to write the output file. invalid_qc_reasons (list[str]): List of invalid quality check reason names from `StudyLocusQualityCheck` (e.g. ['SUBSIGNIFICANT_FLAG']). From 51125c77e5d837a049bdf8dc141f40263b301302 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:00:54 +0200 Subject: [PATCH 059/188] fix(vep_parser): use nested schema for insilico predictors (#789) --- src/gentropy/common/spark_helpers.py | 52 ++++++++++++++++++- src/gentropy/datasource/ensembl/vep_parser.py | 23 ++++---- tests/gentropy/data_samples/vep_sample.jsonl | 2 + .../datasource/ensembl/test_vep_variants.py | 37 +++++++++++++ 4 files changed, 102 insertions(+), 12 deletions(-) diff --git a/src/gentropy/common/spark_helpers.py b/src/gentropy/common/spark_helpers.py index 791fb913d..680975ef6 100644 --- a/src/gentropy/common/spark_helpers.py +++ b/src/gentropy/common/spark_helpers.py @@ -382,6 +382,8 @@ def order_array_of_structs_by_two_fields( """Sort array of structs by a field in descending order and by an other field in an ascending order. This function doesn't deal with null values, assumes the sort columns are not nullable. + The sorting function compares the descending_column first, in case when two values from descending_column are equal + it compares the ascending_column. When values in both columns are equal, the rows order is preserved. Args: array_name (str): Column name with array of structs @@ -406,6 +408,20 @@ def order_array_of_structs_by_two_fields( |[{1.0, 45, First}, {1.0, 125, Second}, {0.5, 232, Third}, {0.5, 233, Fourth}]| +-----------------------------------------------------------------------------+ + >>> data = [(1.0, 45, 'First'), (1.0, 45, 'Second'), (0.5, 233, 'Fourth'), (1.0, 125, 'Third'),] + >>> ( + ... spark.createDataFrame(data, ['col1', 'col2', 'ranking']) + ... .groupBy(f.lit('c')) + ... .agg(f.collect_list(f.struct('col1','col2', 'ranking')).alias('list')) + ... .select(order_array_of_structs_by_two_fields('list', 'col1', 'col2').alias('sorted_list')) + ... .show(truncate=False) + ... ) + +----------------------------------------------------------------------------+ + |sorted_list | + +----------------------------------------------------------------------------+ + |[{1.0, 45, First}, {1.0, 45, Second}, {1.0, 125, Third}, {0.5, 233, Fourth}]| + +----------------------------------------------------------------------------+ + """ return f.expr( f""" @@ -425,6 +441,7 @@ def order_array_of_structs_by_two_fields( when left.{descending_column} > right.{descending_column} then -1 when left.{descending_column} == right.{descending_column} and left.{ascending_column} > right.{ascending_column} then 1 when left.{descending_column} == right.{descending_column} and left.{ascending_column} < right.{ascending_column} then -1 + when left.{ascending_column} == right.{ascending_column} and left.{descending_column} == right.{descending_column} then 0 end) """ ) @@ -525,7 +542,7 @@ def get_value_from_row(row: Row, column: str) -> Any: def enforce_schema( - expected_schema: t.StructType, + expected_schema: t.ArrayType | t.StructType | Column | str, ) -> Callable[..., Any]: """A function to enforce the schema of a function output follows expectation. @@ -541,7 +558,7 @@ def my_function() -> t.StructType: return ... Args: - expected_schema (t.StructType): The expected schema of the output. + expected_schema (t.ArrayType | t.StructType | Column | str): The expected schema of the output. Returns: Callable[..., Any]: A decorator function. @@ -687,3 +704,34 @@ def get_standard_error_from_confidence_interval(lower: Column, upper: Column) -> """ return (upper - lower) / (2 * 1.96) + + +def get_nested_struct_schema(dtype: t.DataType) -> t.StructType: + """Get the bottom StructType from a nested ArrayType type. + + Args: + dtype (t.DataType): The nested data structure. + + Returns: + t.StructType: The nested struct schema. + + Raises: + TypeError: If the input data type is not a nested struct. + + Examples: + >>> get_nested_struct_schema(t.ArrayType(t.StructType([t.StructField('a', t.StringType())]))) + StructType([StructField('a', StringType(), True)]) + + >>> get_nested_struct_schema(t.ArrayType(t.ArrayType(t.StructType([t.StructField("a", t.StringType())])))) + StructType([StructField('a', StringType(), True)]) + """ + if isinstance(dtype, t.StructField): + dtype = dtype.dataType + + match dtype: + case t.StructType(fields=_): + return dtype + case t.ArrayType(elementType=dtype): + return get_nested_struct_schema(dtype) + case _: + raise TypeError("The input data type must be a nested struct.") diff --git a/src/gentropy/datasource/ensembl/vep_parser.py b/src/gentropy/datasource/ensembl/vep_parser.py index 4b70a36e6..d84b58407 100644 --- a/src/gentropy/datasource/ensembl/vep_parser.py +++ b/src/gentropy/datasource/ensembl/vep_parser.py @@ -14,6 +14,7 @@ from gentropy.common.schemas import parse_spark_schema from gentropy.common.spark_helpers import ( enforce_schema, + get_nested_struct_schema, map_column_by_dictionary, order_array_of_structs_by_field, order_array_of_structs_by_two_fields, @@ -26,14 +27,16 @@ class VariantEffectPredictorParser: """Collection of methods to parse VEP output in json format.""" + # NOTE: Due to the fact that the comparison of the xrefs is done om the base of rsids + # if the field `colocalised_variants` have multiple rsids, this extracting xrefs will result in + # an array of xref structs, rather then the struct itself. - # Schema description of the dbXref object: DBXREF_SCHEMA = VariantIndex.get_schema()["dbXrefs"].dataType # Schema description of the in silico predictor object: - IN_SILICO_PREDICTOR_SCHEMA = VariantIndex.get_schema()[ - "inSilicoPredictors" - ].dataType + IN_SILICO_PREDICTOR_SCHEMA = get_nested_struct_schema( + VariantIndex.get_schema()["inSilicoPredictors"] + ) # Schema for the allele frequency column: ALLELE_FREQUENCY_SCHEMA = VariantIndex.get_schema()["alleleFrequencies"].dataType @@ -350,12 +353,12 @@ def _get_max_alpha_missense(transcripts: Column) -> Column: ... .select(VariantEffectPredictorParser._get_max_alpha_missense(f.col('transcripts')).alias('am')) ... .show(truncate=False) ... ) - +------------------------------------------------------+ - |am | - +------------------------------------------------------+ - |[{max alpha missense, assessment 1, 0.4, null, gene1}]| - |[{max alpha missense, null, null, null, gene1}] | - +------------------------------------------------------+ + +----------------------------------------------------+ + |am | + +----------------------------------------------------+ + |{max alpha missense, assessment 1, 0.4, null, gene1}| + |{max alpha missense, null, null, null, gene1} | + +----------------------------------------------------+ """ return f.transform( diff --git a/tests/gentropy/data_samples/vep_sample.jsonl b/tests/gentropy/data_samples/vep_sample.jsonl index 2a3cb05dc..ec8ab7dbe 100644 --- a/tests/gentropy/data_samples/vep_sample.jsonl +++ b/tests/gentropy/data_samples/vep_sample.jsonl @@ -1,2 +1,4 @@ {"most_severe_consequence":"missense_variant","input":"17\t29510931\trs2153029597\tT\tC","assembly_name":"GRCh38","transcript_consequences":[{"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000238007","hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"strand":1,"canonical":1,"impact":"MODIFIER","tssdistance":498066,"variant_allele":"C","cadd_raw":5.156509,"transcript_id":"ENST00000436028","distance":494419},{"hgvsg":"17:g.29510931T>C","consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000222363","strand":1,"impact":"MODIFIER","canonical":1,"cadd_phred":28.9,"variant_allele":"C","tssdistance":122371,"transcript_id":"ENST00000410431","distance":122248,"cadd_raw":5.156509},{"cadd_raw":5.156509,"transcript_id":"ENST00000581240","distance":128696,"tssdistance":128696,"variant_allele":"C","cadd_phred":28.9,"strand":1,"impact":"MODIFIER","canonical":1,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000263370","hgvsg":"17:g.29510931T>C"},{"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000264007","hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"strand":-1,"impact":"MODIFIER","canonical":1,"tssdistance":111323,"variant_allele":"C","cadd_raw":5.156509,"transcript_id":"ENST00000582367","distance":110686},{"cadd_raw":5.156509,"distance":49616,"transcript_id":"ENST00000307201","appris":"P1","tssdistance":56106,"uniparc":["UPI00001C1FC9"],"swissprot":["Q6UXT9.120"],"variant_allele":"C","cadd_phred":28.9,"mane_select":"NM_198147.3","impact":"MODIFIER","canonical":1,"strand":-1,"gene_id":"ENSG00000168792","consequence_terms":["downstream_gene_variant"],"hgvsg":"17:g.29510931T>C"},{"strand":1,"impact":"MODIFIER","canonical":1,"cadd_phred":28.9,"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000264290","transcript_id":"ENST00000579050","distance":58649,"cadd_raw":5.156509,"variant_allele":"C","tssdistance":58649},{"strand":1,"canonical":1,"impact":"MODIFIER","cadd_phred":28.9,"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000263657","transcript_id":"ENST00000577846","distance":250172,"cadd_raw":5.156509,"variant_allele":"C","tssdistance":250172},{"variant_allele":"C","swissprot":["Q6QEF8.143"],"uniparc":["UPI0000DA4C55"],"tssdistance":111981,"transcript_id":"ENST00000388767","distance":103831,"cadd_raw":5.156509,"uniprot_isoform":["Q6QEF8-5"],"hgvsg":"17:g.29510931T>C","consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000167549","strand":-1,"canonical":1,"impact":"MODIFIER","mane_select":"NM_032854.4","cadd_phred":28.9},{"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000263781","strand":-1,"impact":"MODIFIER","canonical":1,"cadd_phred":28.9,"variant_allele":"C","tssdistance":489291,"transcript_id":"ENST00000580924","distance":489291,"cadd_raw":5.156509},{"strand":1,"impact":"MODIFIER","canonical":1,"mane_select":"NM_198529.4","cadd_phred":28.9,"uniprot_isoform":["A4FU69-1"],"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000176927","appris":"P1","transcript_id":"ENST00000394835","distance":430703,"cadd_raw":5.156509,"variant_allele":"C","swissprot":["A4FU69.119"],"uniparc":["UPI0000E59EF5"],"tssdistance":430703},{"cdna_start":2399,"tssdistance":120568,"amino_acids":"L/P","swissprot":["Q7L7X3.173"],"transcript_id":"ENST00000261716","appris":"P1","consequence_terms":["missense_variant"],"trembl":["A0A024QZ70.65"],"cds_start":1643,"mane_select":"NM_020791.4","cadd_phred":28.9,"strand":1,"cds_end":1643,"impact":"MODERATE","canonical":1,"uniparc":["UPI000004A033"],"variant_allele":"C","cadd_raw":5.156509,"sift_score":0,"protein_start":548,"cdna_end":2399,"gene_id":"ENSG00000160551","uniprot_isoform":["Q7L7X3-1"],"codons":"cTg/cCg","sift_prediction":"deleterious_low_confidence","hgvsg":"17:g.29510931T>C","alphamissense":{"am_class":"likely_pathogenic","am_pathogenicity":0.9994},"protein_end":548,"polyphen_score":0.999,"polyphen_prediction":"probably_damaging"},{"uniparc":["UPI0000246D82"],"tssdistance":82201,"variant_allele":"C","swissprot":["Q86YJ7.146"],"cadd_raw":5.156509,"transcript_id":"ENST00000394859","appris":"P1","distance":82201,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000198720","uniprot_isoform":["Q86YJ7-1"],"hgvsg":"17:g.29510931T>C","trembl":["A0A024QZ29.60"],"mane_select":"NM_152345.5","cadd_phred":28.9,"strand":1,"impact":"MODIFIER","canonical":1},{"transcript_id":"ENST00000459235","distance":372075,"cadd_raw":5.156509,"variant_allele":"C","tssdistance":372075,"strand":1,"impact":"MODIFIER","canonical":1,"cadd_phred":28.9,"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000239129"},{"tssdistance":205754,"variant_allele":"C","cadd_raw":5.156509,"distance":205348,"transcript_id":"ENST00000493028","gene_id":"ENSG00000240531","consequence_terms":["downstream_gene_variant"],"hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"impact":"MODIFIER","canonical":1,"strand":-1},{"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000284162","hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"strand":1,"canonical":1,"impact":"MODIFIER","tssdistance":120269,"variant_allele":"C","cadd_raw":5.156509,"transcript_id":"ENST00000580425","distance":120201},{"tssdistance":49616,"variant_allele":"C","cadd_raw":5.156509,"transcript_id":"ENST00000581474","distance":49616,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000264031","hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"strand":1,"canonical":1,"impact":"MODIFIER"},{"hgvsg":"17:g.29510931T>C","gene_id":"ENSG00000222858","consequence_terms":["downstream_gene_variant"],"impact":"MODIFIER","canonical":1,"strand":-1,"cadd_phred":28.9,"variant_allele":"C","tssdistance":130771,"distance":130680,"transcript_id":"ENST00000410926","cadd_raw":5.156509},{"tssdistance":461583,"variant_allele":"C","cadd_raw":5.156509,"transcript_id":"ENST00000581995","distance":461583,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000264435","hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"strand":1,"canonical":1,"impact":"MODIFIER"},{"gene_id":"ENSG00000179761","consequence_terms":["downstream_gene_variant"],"hgvsg":"17:g.29510931T>C","mane_select":"NM_016518.3","cadd_phred":28.9,"canonical":1,"impact":"MODIFIER","strand":1,"tssdistance":467790,"uniparc":["UPI00001410B0"],"swissprot":["Q9P0Z9.165"],"variant_allele":"C","cadd_raw":5.156509,"distance":453715,"appris":"P1","transcript_id":"ENST00000323372"},{"strand":-1,"canonical":1,"impact":"MODIFIER","cadd_phred":28.9,"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000253064","transcript_id":"ENST00000517255","distance":86064,"cadd_raw":5.156509,"variant_allele":"C","tssdistance":86064},{"tssdistance":313146,"variant_allele":"C","cadd_raw":5.156509,"distance":313146,"transcript_id":"ENST00000580309","gene_id":"ENSG00000264050","consequence_terms":["upstream_gene_variant"],"hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"canonical":1,"impact":"MODIFIER","strand":-1},{"variant_allele":"C","tssdistance":134916,"transcript_id":"ENST00000582881","distance":133865,"cadd_raw":5.156509,"hgvsg":"17:g.29510931T>C","consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000265625","strand":-1,"impact":"MODIFIER","canonical":1,"cadd_phred":28.9},{"hgvsg":"17:g.29510931T>C","gene_id":"ENSG00000240074","consequence_terms":["upstream_gene_variant"],"impact":"MODIFIER","canonical":1,"strand":1,"cadd_phred":28.9,"variant_allele":"C","tssdistance":344828,"distance":344828,"transcript_id":"ENST00000478775","cadd_raw":5.156509},{"cadd_phred":28.9,"canonical":1,"impact":"MODIFIER","strand":-1,"gene_id":"ENSG00000264647","consequence_terms":["downstream_gene_variant"],"hgvsg":"17:g.29510931T>C","cadd_raw":5.156509,"distance":80772,"transcript_id":"ENST00000584986","tssdistance":81310,"variant_allele":"C"},{"variant_allele":"C","tssdistance":65531,"transcript_id":"ENST00000365335","distance":65531,"cadd_raw":5.156509,"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000202205","strand":-1,"canonical":1,"impact":"MODIFIER","cadd_phred":28.9},{"gene_id":"ENSG00000266111","consequence_terms":["downstream_gene_variant"],"hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"impact":"MODIFIER","canonical":1,"strand":1,"tssdistance":158862,"variant_allele":"C","cadd_raw":5.156509,"distance":106826,"transcript_id":"ENST00000584958"},{"variant_allele":"C","tssdistance":265770,"distance":264816,"transcript_id":"ENST00000580031","cadd_raw":5.156509,"hgvsg":"17:g.29510931T>C","gene_id":"ENSG00000265713","consequence_terms":["downstream_gene_variant"],"canonical":1,"impact":"MODIFIER","strand":-1,"cadd_phred":28.9},{"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000108255","uniprot_isoform":["P05813-1"],"hgvsg":"17:g.29510931T>C","mane_select":"NM_005208.5","cadd_phred":28.9,"strand":1,"impact":"MODIFIER","canonical":1,"uniparc":["UPI00001283CF"],"tssdistance":264072,"variant_allele":"C","swissprot":["P05813.205"],"cadd_raw":5.156509,"transcript_id":"ENST00000225387","appris":"P1","distance":256437},{"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000264808","hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"strand":-1,"canonical":1,"impact":"MODIFIER","tssdistance":120702,"variant_allele":"C","cadd_raw":5.156509,"transcript_id":"ENST00000685798","distance":120702},{"cadd_phred":28.9,"strand":-1,"canonical":1,"impact":"MODIFIER","consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000290082","hgvsg":"17:g.29510931T>C","cadd_raw":5.156509,"transcript_id":"ENST00000702873","distance":56892,"tssdistance":57602,"variant_allele":"C"},{"mane_select":"NM_078471.4","cadd_phred":28.9,"strand":-1,"canonical":1,"impact":"MODIFIER","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000196535","uniprot_isoform":["Q92614-1"],"hgvsg":"17:g.29510931T>C","cadd_raw":5.156509,"appris":"P4","transcript_id":"ENST00000527372","distance":330533,"uniparc":["UPI0000167F32"],"tssdistance":330533,"variant_allele":"C","swissprot":["Q92614.216"]},{"cadd_raw":5.156509,"transcript_id":"ENST00000492004","distance":170109,"tssdistance":170109,"variant_allele":"C","cadd_phred":28.9,"strand":-1,"impact":"MODIFIER","canonical":1,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000239256","hgvsg":"17:g.29510931T>C"},{"gene_id":"ENSG00000263709","consequence_terms":["downstream_gene_variant"],"hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"canonical":1,"impact":"MODIFIER","strand":1,"tssdistance":370448,"variant_allele":"C","cadd_raw":5.156509,"distance":355442,"transcript_id":"ENST00000582196"},{"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000252657","hgvsg":"17:g.29510931T>C","cadd_phred":28.9,"strand":1,"impact":"MODIFIER","canonical":1,"tssdistance":266688,"variant_allele":"C","cadd_raw":5.156509,"transcript_id":"ENST00000516848","distance":266688},{"trembl":["F5H527.88"],"mane_select":"NM_001282129.2","cadd_phred":28.9,"strand":-1,"impact":"MODIFIER","canonical":1,"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000141298","hgvsg":"17:g.29510931T>C","cadd_raw":5.156509,"transcript_id":"ENST00000540801","appris":"A1","distance":115007,"uniparc":["UPI0002065A97"],"tssdistance":419297,"variant_allele":"C"},{"tssdistance":216783,"uniparc":["UPI00001B078D"],"swissprot":["Q7Z417.159"],"variant_allele":"C","cadd_raw":5.156509,"distance":216783,"transcript_id":"ENST00000225388","appris":"P1","gene_id":"ENSG00000108256","consequence_terms":["upstream_gene_variant"],"hgvsg":"17:g.29510931T>C","uniprot_isoform":["Q7Z417-1"],"mane_select":"NM_020772.3","cadd_phred":28.9,"impact":"MODIFIER","canonical":1,"strand":-1},{"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000108262","uniprot_isoform":["Q9Y2X7-1"],"hgvsg":"17:g.29510931T>C","mane_select":"NM_014030.4","cadd_phred":28.9,"strand":-1,"impact":"MODIFIER","canonical":1,"uniparc":["UPI000013C867"],"tssdistance":78717,"variant_allele":"C","swissprot":["Q9Y2X7.219"],"cadd_raw":5.156509,"transcript_id":"ENST00000225394","appris":"A1","distance":62544},{"variant_allele":"C","tssdistance":499922,"transcript_id":"ENST00000581964","distance":499922,"cadd_raw":5.156509,"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000263613","strand":-1,"impact":"MODIFIER","canonical":1,"cadd_phred":28.9},{"variant_allele":"C","tssdistance":306457,"transcript_id":"ENST00000580812","distance":306457,"cadd_raw":5.156509,"hgvsg":"17:g.29510931T>C","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000178082","strand":-1,"impact":"MODIFIER","canonical":1,"cadd_phred":28.9},{"variant_allele":"C","tssdistance":352471,"distance":352471,"transcript_id":"ENST00000584258","cadd_raw":5.156509,"hgvsg":"17:g.29510931T>C","gene_id":"ENSG00000263477","consequence_terms":["upstream_gene_variant"],"canonical":1,"impact":"MODIFIER","strand":1,"cadd_phred":28.9},{"cadd_raw":5.156509,"transcript_id":"ENST00000301057","appris":"P1","distance":57770,"uniparc":["UPI000003B08D"],"tssdistance":57770,"variant_allele":"C","swissprot":["Q8NBR0.130"],"mane_select":"NM_138349.4","cadd_phred":28.9,"strand":1,"impact":"MODIFIER","canonical":1,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000167543","hgvsg":"17:g.29510931T>C"}],"allele_string":"T/C","seq_region_name":"17","strand":1,"end":29510931,"start":29510931,"colocated_variants":[{"clin_sig":["pathogenic"],"clin_sig_allele":"C:pathogenic","phenotype_or_disease":1,"strand":1,"allele_string":"T/C","start":29510931,"id":"rs2153029597","seq_region_name":"17","pubmed":[33565190],"end":29510931,"var_synonyms":{"ClinVar":["RCV001731168","VCV001300172"],"OMIM":[610266.0003]}}],"id":"rs2153029597"} {"strand":1,"seq_region_name":"9","allele_string":"C/T","transcript_consequences":[{"hgvsg":"9:g.82445881C>T","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000228046","strand":-1,"impact":"MODIFIER","canonical":1,"cadd_phred":7.002,"variant_allele":"T","tssdistance":17856,"transcript_id":"ENST00000392516","distance":17856,"cadd_raw":0.6583},{"cadd_phred":7.002,"impact":"MODIFIER","canonical":1,"strand":1,"gene_id":"ENSG00000225085","consequence_terms":["downstream_gene_variant"],"hgvsg":"9:g.82445881C>T","cadd_raw":0.6583,"distance":39642,"transcript_id":"ENST00000436084","tssdistance":40693,"variant_allele":"T"},{"cadd_raw":0.6583,"transcript_id":"ENST00000637606","tssdistance":468267,"variant_allele":"T","cadd_phred":7.002,"canonical":1,"impact":"MODIFIER","strand":1,"gene_id":"ENSG00000290551","consequence_terms":["intron_variant","non_coding_transcript_variant"],"hgvsg":"9:g.82445881C>T"},{"gene_id":"ENSG00000278988","consequence_terms":["upstream_gene_variant"],"hgvsg":"9:g.82445881C>T","cadd_phred":7.002,"canonical":1,"impact":"MODIFIER","strand":1,"tssdistance":97837,"variant_allele":"T","cadd_raw":0.6583,"distance":97837,"transcript_id":"ENST00000623079"},{"swissprot":["Q6ZQQ2.115"],"variant_allele":"T","tssdistance":457109,"uniparc":["UPI00001C10A6"],"distance":450628,"appris":"P1","transcript_id":"ENST00000344803","cadd_raw":0.6583,"hgvsg":"9:g.82445881C>T","gene_id":"ENSG00000214929","consequence_terms":["downstream_gene_variant"],"impact":"MODIFIER","canonical":1,"strand":1,"cadd_phred":7.002,"mane_select":"NM_001001670.3"},{"cadd_phred":7.002,"impact":"MODIFIER","canonical":1,"strand":-1,"gene_id":"ENSG00000230360","consequence_terms":["upstream_gene_variant"],"hgvsg":"9:g.82445881C>T","cadd_raw":0.6583,"distance":357113,"transcript_id":"ENST00000417796","tssdistance":357113,"variant_allele":"T"},{"cadd_raw":0.6583,"transcript_id":"ENST00000422010","distance":5775,"tssdistance":5775,"variant_allele":"T","cadd_phred":7.002,"strand":1,"canonical":1,"impact":"MODIFIER","consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000232749","hgvsg":"9:g.82445881C>T"},{"tssdistance":12976,"variant_allele":"T","cadd_raw":0.6583,"transcript_id":"ENST00000438986","distance":12976,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000228123","hgvsg":"9:g.82445881C>T","cadd_phred":7.002,"strand":-1,"canonical":1,"impact":"MODIFIER"},{"tssdistance":382199,"variant_allele":"T","cadd_raw":0.6583,"transcript_id":"ENST00000434692","distance":382199,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000231649","hgvsg":"9:g.82445881C>T","cadd_phred":7.002,"strand":-1,"canonical":1,"impact":"MODIFIER"},{"tssdistance":298911,"variant_allele":"T","cadd_raw":0.6583,"transcript_id":"ENST00000432491","distance":298186,"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000233309","hgvsg":"9:g.82445881C>T","cadd_phred":7.002,"strand":-1,"canonical":1,"impact":"MODIFIER"},{"canonical":1,"impact":"MODIFIER","strand":1,"cadd_phred":7.002,"hgvsg":"9:g.82445881C>T","gene_id":"ENSG00000235377","consequence_terms":["downstream_gene_variant"],"distance":314249,"transcript_id":"ENST00000445918","cadd_raw":0.6583,"variant_allele":"T","tssdistance":315129},{"variant_allele":"T","tssdistance":17682,"distance":17588,"transcript_id":"ENST00000636401","cadd_raw":0.6583,"hgvsg":"9:g.82445881C>T","gene_id":"ENSG00000228430","consequence_terms":["downstream_gene_variant"],"impact":"MODIFIER","canonical":1,"strand":1,"cadd_phred":7.002},{"cadd_raw":0.6583,"transcript_id":"ENST00000585776","distance":468792,"tssdistance":468792,"variant_allele":"T","cadd_phred":7.002,"strand":-1,"impact":"MODIFIER","canonical":1,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000267559","hgvsg":"9:g.82445881C>T"},{"impact":"MODIFIER","canonical":1,"strand":1,"cadd_phred":7.002,"hgvsg":"9:g.82445881C>T","gene_id":"ENSG00000237770","consequence_terms":["downstream_gene_variant"],"distance":473751,"transcript_id":"ENST00000429999","cadd_raw":0.6583,"variant_allele":"T","tssdistance":479317},{"distance":166671,"transcript_id":"ENST00000661177","cadd_raw":0.6583,"variant_allele":"T","tssdistance":200703,"impact":"MODIFIER","canonical":1,"strand":-1,"cadd_phred":7.002,"hgvsg":"9:g.82445881C>T","gene_id":"ENSG00000286612","consequence_terms":["downstream_gene_variant"]},{"mane_select":"NM_207416.3","cadd_phred":7.002,"impact":"MODIFIER","canonical":1,"strand":1,"gene_id":"ENSG00000186788","consequence_terms":["downstream_gene_variant"],"hgvsg":"9:g.82445881C>T","cadd_raw":0.6583,"distance":495788,"appris":"P1","transcript_id":"ENST00000445385","tssdistance":502381,"uniparc":["UPI000048D678"],"swissprot":["P0C874.81"],"variant_allele":"T"}],"assembly_name":"GRCh38","input":"9\t82445881\t9_82445881_C_T\tC\tT","most_severe_consequence":"intron_variant","id":"9_82445881_C_T","colocated_variants":[{"phenotype_or_disease":1,"strand":1,"allele_string":"C/G/T","frequencies":{"T":{"gnomadg":0.01197,"gnomadg_amr":0.0191,"gnomadg_afr":0.003331,"gnomadg_asj":0.02364,"eas":0,"amr":0.0216,"gnomadg_eas":0,"sas":0,"gnomadg_nfe":0.01704,"gnomadg_fin":0.009992,"gnomadg_mid":0.006329,"afr":0,"gnomadg_oth":0.01772,"gnomadg_ami":0.003289,"af":0.0068,"eur":0.0189,"gnomadg_sas":0.0004142}},"start":82445881,"id":"rs117517710","seq_region_name":"9","pubmed":[31073882],"end":82445881}],"end":82445881,"start":82445881} +{"assembly_name":"GRCh38","seq_region_name":"20","id":"rs1555828246","start":10645397,"strand":1,"transcript_consequences":[{"hgvsg":"20:g.10645397C>T","canonical":1,"strand":-1,"distance":295769,"consequence_terms":["upstream_gene_variant"],"cadd_raw":4.925023,"cadd_phred":27.5,"variant_allele":"T","gene_id":"ENSG00000214835","impact":"MODIFIER","transcript_id":"ENST00000446637","tssdistance":295769},{"impact":"MODIFIER","transcript_id":"ENST00000417299","tssdistance":51552,"gene_id":"ENSG00000224961","distance":51552,"consequence_terms":["upstream_gene_variant"],"cadd_raw":4.925023,"variant_allele":"T","cadd_phred":27.5,"canonical":1,"hgvsg":"20:g.10645397C>T","strand":1},{"consequence_terms":["upstream_gene_variant"],"distance":355907,"variant_allele":"T","cadd_phred":27.5,"cadd_raw":4.925023,"strand":1,"hgvsg":"20:g.10645397C>T","canonical":1,"transcript_id":"ENST00000605338","impact":"MODIFIER","tssdistance":355907,"gene_id":"ENSG00000270777"},{"gene_id":"ENSG00000235036","transcript_id":"ENST00000456064","tssdistance":31168,"impact":"MODIFIER","canonical":1,"hgvsg":"20:g.10645397C>T","strand":-1,"distance":31168,"consequence_terms":["upstream_gene_variant"],"cadd_phred":27.5,"variant_allele":"T","cadd_raw":4.925023},{"hgvsg":"20:g.10645397C>T","canonical":1,"strand":-1,"cadd_phred":27.5,"variant_allele":"T","cadd_raw":4.925023,"distance":107881,"consequence_terms":["downstream_gene_variant"],"gene_id":"ENSG00000234900","transcript_id":"ENST00000418690","impact":"MODIFIER","tssdistance":119889},{"strand":1,"hgvsg":"20:g.10645397C>T","canonical":1,"cadd_raw":4.925023,"cadd_phred":27.5,"variant_allele":"T","consequence_terms":["downstream_gene_variant"],"distance":444573,"gene_id":"ENSG00000230506","transcript_id":"ENST00000662058","impact":"MODIFIER","tssdistance":472875},{"cadd_phred":27.5,"variant_allele":"T","cadd_raw":4.925023,"consequence_terms":["upstream_gene_variant"],"distance":211175,"strand":-1,"canonical":1,"hgvsg":"20:g.10645397C>T","uniparc":["UPI0000D483F7"],"mane_select":"NM_001394149.2","tssdistance":211175,"transcript_id":"ENST00000713549","impact":"MODIFIER","gene_id":"ENSG00000285508"},{"impact":"MODIFIER","transcript_id":"ENST00000666915","tssdistance":472696,"gene_id":"ENSG00000232448","consequence_terms":["downstream_gene_variant"],"distance":458657,"cadd_raw":4.925023,"variant_allele":"T","cadd_phred":27.5,"strand":1,"canonical":1,"hgvsg":"20:g.10645397C>T"},{"consequence_terms":["downstream_gene_variant"],"distance":310699,"cadd_raw":4.925023,"variant_allele":"T","cadd_phred":27.5,"strand":1,"canonical":1,"hgvsg":"20:g.10645397C>T","transcript_id":"ENST00000416198","impact":"MODIFIER","tssdistance":310978,"gene_id":"ENSG00000237005"},{"cadd_phred":27.5,"variant_allele":"T","cadd_raw":4.925023,"distance":229936,"consequence_terms":["downstream_gene_variant"],"hgvsg":"20:g.10645397C>T","canonical":1,"strand":-1,"tssdistance":263882,"transcript_id":"ENST00000448859","impact":"MODIFIER","gene_id":"ENSG00000232900"},{"impact":"MODIFIER","transcript_id":"ENST00000649912","tssdistance":211175,"appris":"P1","gene_id":"ENSG00000285723","trembl":["Q9HB66.115"],"cadd_raw":4.925023,"variant_allele":"T","cadd_phred":27.5,"distance":211175,"consequence_terms":["upstream_gene_variant"],"canonical":1,"hgvsg":"20:g.10645397C>T","strand":-1,"mane_select":"NM_001394148.2","uniparc":["UPI000006FBAA"]},{"transcript_id":"ENST00000347364","tssdistance":211175,"impact":"MODIFIER","appris":"P1","gene_id":"ENSG00000125863","cadd_phred":27.5,"variant_allele":"T","cadd_raw":4.925023,"swissprot":["Q9NPJ1.195"],"consequence_terms":["upstream_gene_variant"],"distance":211175,"strand":-1,"canonical":1,"hgvsg":"20:g.10645397C>T","uniparc":["UPI000012F199"],"mane_select":"NM_170784.3"},{"tssdistance":4298,"transcript_id":"ENST00000615931","impact":"MODIFIER","gene_id":"ENSG00000273745","cadd_raw":4.925023,"variant_allele":"T","cadd_phred":27.5,"distance":4239,"consequence_terms":["downstream_gene_variant"],"canonical":1,"hgvsg":"20:g.10645397C>T","strand":-1},{"cadd_phred":27.5,"variant_allele":"T","cadd_raw":4.925023,"consequence_terms":["downstream_gene_variant"],"distance":257591,"strand":1,"hgvsg":"20:g.10645397C>T","canonical":1,"impact":"MODIFIER","transcript_id":"ENST00000441308","tssdistance":259618,"gene_id":"ENSG00000230750"},{"consequence_terms":["downstream_gene_variant"],"distance":17367,"cadd_raw":4.925023,"variant_allele":"T","cadd_phred":27.5,"swissprot":["Q5VYV7.120"],"uniparc":["UPI00001D8318"],"mane_select":"NM_001009608.3","strand":1,"hgvsg":"20:g.10645397C>T","canonical":1,"appris":"P1","transcript_id":"ENST00000334534","impact":"MODIFIER","tssdistance":210092,"gene_id":"ENSG00000149346"},{"gene_id":"ENSG00000125899","transcript_id":"ENST00000659767","tssdistance":350896,"impact":"MODIFIER","hgvsg":"20:g.10645397C>T","canonical":1,"strand":1,"cadd_raw":4.925023,"cadd_phred":27.5,"variant_allele":"T","distance":350896,"consequence_terms":["upstream_gene_variant"]},{"cadd_raw":4.925023,"variant_allele":"T","cadd_phred":27.5,"distance":335492,"consequence_terms":["downstream_gene_variant"],"canonical":1,"hgvsg":"20:g.10645397C>T","strand":1,"transcript_id":"ENST00000688853","tssdistance":336975,"impact":"MODIFIER","gene_id":"ENSG00000289505"},{"appris":"P3","impact":"MODIFIER","transcript_id":"ENST00000254976","tssdistance":426567,"gene_id":"ENSG00000132639","uniprot_isoform":["P60880-1"],"distance":337979,"consequence_terms":["downstream_gene_variant"],"swissprot":["P60880.188"],"cadd_raw":4.925023,"cadd_phred":27.5,"variant_allele":"T","mane_select":"NM_130811.4","uniparc":["UPI0000001103"],"hgvsg":"20:g.10645397C>T","canonical":1,"strand":1},{"strand":1,"canonical":1,"hgvsg":"20:g.10645397C>T","consequence_terms":["upstream_gene_variant"],"distance":27531,"cadd_raw":4.925023,"variant_allele":"T","cadd_phred":27.5,"gene_id":"ENSG00000270792","transcript_id":"ENST00000605292","impact":"MODIFIER","tssdistance":27531},{"hgvsg":"20:g.10645397C>T","canonical":1,"strand":-1,"distance":276678,"consequence_terms":["upstream_gene_variant"],"cadd_phred":27.5,"variant_allele":"T","cadd_raw":4.925023,"gene_id":"ENSG00000227906","impact":"MODIFIER","transcript_id":"ENST00000421143","tssdistance":276678},{"distance":243185,"consequence_terms":["downstream_gene_variant"],"variant_allele":"T","cadd_phred":27.5,"cadd_raw":4.925023,"canonical":1,"hgvsg":"20:g.10645397C>T","strand":-1,"tssdistance":243691,"transcript_id":"ENST00000406588","impact":"MODIFIER","gene_id":"ENSG00000217809"},{"uniprot_isoform":["P78504-1"],"cdna_end":2541,"appris":"P1","transcript_id":"ENST00000254958","impact":"MODERATE","tssdistance":28602,"sift_prediction":"deleterious","codons":"tGt/tAt","uniparc":["UPI00000498B5"],"strand":-1,"canonical":1,"swissprot":["P78504.228"],"cds_start":2072,"gene_id":"ENSG00000101384","cds_end":2072,"protein_start":691,"sift_score":0,"mane_select":"NM_000214.3","hgvsg":"20:g.10645397C>T","consequence_terms":["missense_variant"],"protein_end":691,"cadd_raw":4.925023,"cadd_phred":27.5,"variant_allele":"T","cdna_start":2541,"amino_acids":"C/Y"},{"canonical":1,"hgvsg":"20:g.10645397C>T","strand":-1,"cadd_phred":27.5,"variant_allele":"T","cadd_raw":4.925023,"distance":326644,"consequence_terms":["upstream_gene_variant"],"gene_id":"ENSG00000286936","transcript_id":"ENST00000664194","tssdistance":326644,"impact":"MODIFIER"}],"input":"20\t10645397\trs1555828246\tC\tT\t.\t.\t.","allele_string":"C/T","end":10645397,"colocated_variants":[{"end":10645397,"allele_string":"C/T","strand":1,"id":"rs1555828246","seq_region_name":"20","start":10645397},{"end":10645397,"allele_string":"C/T","phenotype_or_disease":1,"clin_sig":["uncertain_significance"],"pubmed":[26076142,21752016],"strand":1,"clin_sig_allele":"T:uncertain_significance","start":10645397,"seq_region_name":"20","var_synonyms":{"ClinVar":["RCV002260566","VCV001693298"]}}],"most_severe_consequence":"missense_variant"} +{"input":"20\t10649087\trs863223652\tG\tA\t.\t.\t.","transcript_consequences":[{"strand":1,"canonical":1,"hgvsg":"20:g.10649087G>A","cadd_phred":40,"variant_allele":"A","cadd_raw":9.333171,"consequence_terms":["upstream_gene_variant"],"distance":352217,"gene_id":"ENSG00000270777","transcript_id":"ENST00000605338","impact":"MODIFIER","tssdistance":352217},{"impact":"MODIFIER","transcript_id":"ENST00000417299","tssdistance":47862,"gene_id":"ENSG00000224961","cadd_raw":9.333171,"cadd_phred":40,"variant_allele":"A","distance":47862,"consequence_terms":["upstream_gene_variant"],"hgvsg":"20:g.10649087G>A","canonical":1,"strand":1},{"gene_id":"ENSG00000214835","impact":"MODIFIER","transcript_id":"ENST00000446637","tssdistance":299459,"strand":-1,"hgvsg":"20:g.10649087G>A","canonical":1,"variant_allele":"A","cadd_phred":40,"cadd_raw":9.333171,"consequence_terms":["upstream_gene_variant"],"distance":299459},{"transcript_id":"ENST00000456064","impact":"MODIFIER","tssdistance":34858,"gene_id":"ENSG00000235036","distance":34858,"consequence_terms":["upstream_gene_variant"],"cadd_phred":40,"variant_allele":"A","cadd_raw":9.333171,"canonical":1,"hgvsg":"20:g.10649087G>A","strand":-1},{"transcript_id":"ENST00000662058","tssdistance":476565,"impact":"MODIFIER","gene_id":"ENSG00000230506","consequence_terms":["downstream_gene_variant"],"distance":448263,"variant_allele":"A","cadd_phred":40,"cadd_raw":9.333171,"strand":1,"canonical":1,"hgvsg":"20:g.10649087G>A"},{"impact":"MODIFIER","transcript_id":"ENST00000418690","tssdistance":116199,"gene_id":"ENSG00000234900","cadd_raw":9.333171,"variant_allele":"A","cadd_phred":40,"distance":104191,"consequence_terms":["downstream_gene_variant"],"hgvsg":"20:g.10649087G>A","canonical":1,"strand":-1},{"gene_id":"ENSG00000232448","transcript_id":"ENST00000666915","tssdistance":476386,"impact":"MODIFIER","hgvsg":"20:g.10649087G>A","canonical":1,"strand":1,"cadd_raw":9.333171,"cadd_phred":40,"variant_allele":"A","distance":462347,"consequence_terms":["downstream_gene_variant"]},{"variant_allele":"A","cadd_phred":40,"cadd_raw":9.333171,"distance":214865,"consequence_terms":["upstream_gene_variant"],"hgvsg":"20:g.10649087G>A","canonical":1,"strand":-1,"mane_select":"NM_001394149.2","uniparc":["UPI0000D483F7"],"impact":"MODIFIER","transcript_id":"ENST00000713549","tssdistance":214865,"gene_id":"ENSG00000285508"},{"transcript_id":"ENST00000416198","impact":"MODIFIER","tssdistance":314668,"gene_id":"ENSG00000237005","consequence_terms":["downstream_gene_variant"],"distance":314389,"cadd_raw":9.333171,"cadd_phred":40,"variant_allele":"A","strand":1,"canonical":1,"hgvsg":"20:g.10649087G>A"},{"gene_id":"ENSG00000289505","tssdistance":340665,"transcript_id":"ENST00000688853","impact":"MODIFIER","hgvsg":"20:g.10649087G>A","canonical":1,"strand":1,"distance":339182,"consequence_terms":["downstream_gene_variant"],"variant_allele":"A","cadd_phred":40,"cadd_raw":9.333171},{"impact":"MODIFIER","transcript_id":"ENST00000659767","tssdistance":347206,"gene_id":"ENSG00000125899","distance":347206,"consequence_terms":["upstream_gene_variant"],"cadd_phred":40,"variant_allele":"A","cadd_raw":9.333171,"hgvsg":"20:g.10649087G>A","canonical":1,"strand":1},{"consequence_terms":["upstream_gene_variant"],"distance":214865,"variant_allele":"A","cadd_phred":40,"cadd_raw":9.333171,"swissprot":["Q9NPJ1.195"],"uniparc":["UPI000012F199"],"mane_select":"NM_170784.3","strand":-1,"hgvsg":"20:g.10649087G>A","canonical":1,"appris":"P1","transcript_id":"ENST00000347364","tssdistance":214865,"impact":"MODIFIER","gene_id":"ENSG00000125863"},{"gene_id":"ENSG00000285723","appris":"P1","tssdistance":214865,"transcript_id":"ENST00000649912","impact":"MODIFIER","mane_select":"NM_001394148.2","uniparc":["UPI000006FBAA"],"hgvsg":"20:g.10649087G>A","canonical":1,"strand":-1,"distance":214865,"consequence_terms":["upstream_gene_variant"],"trembl":["Q9HB66.115"],"cadd_phred":40,"variant_allele":"A","cadd_raw":9.333171},{"strand":-1,"hgvsg":"20:g.10649087G>A","canonical":1,"consequence_terms":["downstream_gene_variant"],"distance":549,"variant_allele":"A","cadd_phred":40,"cadd_raw":9.333171,"gene_id":"ENSG00000273745","impact":"MODIFIER","transcript_id":"ENST00000615931","tssdistance":608},{"consequence_terms":["downstream_gene_variant"],"distance":261281,"cadd_raw":9.333171,"variant_allele":"A","cadd_phred":40,"strand":1,"canonical":1,"hgvsg":"20:g.10649087G>A","transcript_id":"ENST00000441308","tssdistance":263308,"impact":"MODIFIER","gene_id":"ENSG00000230750"},{"strand":-1,"hgvsg":"20:g.10649087G>A","canonical":1,"cadd_phred":40,"variant_allele":"A","cadd_raw":9.333171,"consequence_terms":["downstream_gene_variant"],"distance":226246,"gene_id":"ENSG00000232900","transcript_id":"ENST00000448859","impact":"MODIFIER","tssdistance":260192},{"appris":"P1","transcript_id":"ENST00000334534","tssdistance":213782,"impact":"MODIFIER","gene_id":"ENSG00000149346","consequence_terms":["downstream_gene_variant"],"distance":21057,"cadd_raw":9.333171,"variant_allele":"A","cadd_phred":40,"swissprot":["Q5VYV7.120"],"uniparc":["UPI00001D8318"],"mane_select":"NM_001009608.3","strand":1,"hgvsg":"20:g.10649087G>A","canonical":1},{"consequence_terms":["downstream_gene_variant"],"distance":341669,"cadd_phred":40,"variant_allele":"A","cadd_raw":9.333171,"swissprot":["P60880.188"],"uniparc":["UPI0000001103"],"mane_select":"NM_130811.4","strand":1,"canonical":1,"hgvsg":"20:g.10649087G>A","appris":"P3","transcript_id":"ENST00000254976","tssdistance":430257,"impact":"MODIFIER","gene_id":"ENSG00000132639","uniprot_isoform":["P60880-1"]},{"gene_id":"ENSG00000227906","transcript_id":"ENST00000421143","impact":"MODIFIER","tssdistance":280368,"strand":-1,"canonical":1,"hgvsg":"20:g.10649087G>A","cadd_raw":9.333171,"variant_allele":"A","cadd_phred":40,"consequence_terms":["upstream_gene_variant"],"distance":280368},{"consequence_terms":["downstream_gene_variant"],"distance":239495,"cadd_phred":40,"variant_allele":"A","cadd_raw":9.333171,"strand":-1,"canonical":1,"hgvsg":"20:g.10649087G>A","transcript_id":"ENST00000406588","tssdistance":240001,"impact":"MODIFIER","gene_id":"ENSG00000217809"},{"strand":1,"hgvsg":"20:g.10649087G>A","canonical":1,"consequence_terms":["upstream_gene_variant"],"distance":23841,"variant_allele":"A","cadd_phred":40,"cadd_raw":9.333171,"gene_id":"ENSG00000270792","impact":"MODIFIER","transcript_id":"ENST00000605292","tssdistance":23841},{"gene_id":"ENSG00000286936","transcript_id":"ENST00000664194","impact":"MODIFIER","tssdistance":330334,"strand":-1,"canonical":1,"hgvsg":"20:g.10649087G>A","cadd_raw":9.333171,"cadd_phred":40,"variant_allele":"A","consequence_terms":["upstream_gene_variant"],"distance":330334},{"transcript_id":"ENST00000254958","impact":"HIGH","tssdistance":24912,"lof_info":"PERCENTILE:0.374350560568772,GERP_DIST:2349.53755103406,BP_DIST:2273,DIST_FROM_LAST_EXON:1816,50_BP_RULE:PASS,ANN_ORF:237.018,MAX_ORF:237.018","codons":"Cag/Tag","cdna_end":1838,"appris":"P1","uniprot_isoform":["P78504-1"],"swissprot":["P78504.228"],"lof":"HC","strand":-1,"canonical":1,"uniparc":["UPI00000498B5"],"cds_end":1369,"protein_start":457,"cds_start":1369,"gene_id":"ENSG00000101384","cadd_raw":9.333171,"variant_allele":"A","cadd_phred":40,"cdna_start":1838,"amino_acids":"Q/*","consequence_terms":["stop_gained"],"protein_end":457,"hgvsg":"20:g.10649087G>A","mane_select":"NM_000214.3"}],"strand":1,"colocated_variants":[{"strand":1,"start":10649087,"seq_region_name":"20","id":"rs1555828721","end":10649087,"allele_string":"G/A"},{"id":"rs863223652","var_synonyms":{"ClinVar":["RCV002383649","VCV001770992"]},"seq_region_name":"20","clin_sig_allele":"A:pathogenic","start":10649087,"strand":1,"clin_sig":["pathogenic"],"phenotype_or_disease":1,"allele_string":"G/A","end":10649087}],"most_severe_consequence":"stop_gained","allele_string":"G/A","end":10649087,"id":"rs863223652","assembly_name":"GRCh38","seq_region_name":"20","start":10649087} diff --git a/tests/gentropy/datasource/ensembl/test_vep_variants.py b/tests/gentropy/datasource/ensembl/test_vep_variants.py index 97f255cf0..5757fa2f5 100644 --- a/tests/gentropy/datasource/ensembl/test_vep_variants.py +++ b/tests/gentropy/datasource/ensembl/test_vep_variants.py @@ -7,6 +7,7 @@ import pytest from pyspark.sql import DataFrame from pyspark.sql import functions as f +from pyspark.sql import types as t from gentropy.dataset.variant_index import VariantIndex from gentropy.datasource.ensembl.vep_parser import VariantEffectPredictorParser @@ -118,6 +119,21 @@ def test_extract_variant_index_from_vep( assert isinstance( variant_index, VariantIndex ), "VariantIndex object not created." + in_silico_schema = t.ArrayType( + t.StructType( + [ + t.StructField("method", t.StringType(), True), + t.StructField("assessment", t.StringType(), True), + t.StructField("score", t.FloatType(), True), + t.StructField("assessmentFlag", t.StringType(), True), + t.StructField("targetId", t.StringType(), True), + ] + ) + ) + assert ( + variant_index.df.select("inSilicoPredictors").schema.fields[0].dataType + == in_silico_schema + ), "In silico schema is not correct." def test_process(self: TestVEPParser) -> None: """Test process method.""" @@ -144,3 +160,24 @@ def test_variant_count(self: TestVEPParser) -> None: assert ( self.raw_vep_output.count() == self.processed_vep_output.count() ), f"Incorrect number of variants in processed VEP output: expected {self.raw_vep_output.count()}, got {self.processed_vep_output.count()}." + + def test_collection(self: TestVEPParser) -> None: + """Test if the collection of VEP variantIndex runs without failures.""" + assert ( + len(self.processed_vep_output.collect()) + == self.processed_vep_output.count() + ), "Collection performed incorrectly." + + def test_ensembl_transcripts_no_duplicates(self: TestVEPParser) -> None: + """Test if in single row all ensembl target ids (gene ids) do not have duplicates.""" + targets = ( + self.processed_vep_output.limit(1) + .select(f.explode("transcriptConsequences").alias("t")) + .select("t.targetId") + .collect() + ) + + asserted_targets = [t["targetId"] for t in targets] + assert len(asserted_targets) == len( + set(asserted_targets) + ), "Duplicate ensembl transcripts in a single row." From 9f833297a1c374d6cb61fdcbfade51b5efb5203e Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Thu, 26 Sep 2024 18:16:29 +0200 Subject: [PATCH 060/188] fix: remove study_index_path from coloc step (#791) --- src/gentropy/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index c56a9dfb3..3a67e7868 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -36,7 +36,6 @@ class ColocalisationConfig(StepConfig): """Colocalisation step configuration.""" credible_set_path: str = MISSING - study_index_path: str = MISSING coloc_path: str = MISSING colocalisation_method: str = MISSING _target_: str = "gentropy.colocalisation.ColocalisationStep" From a135d26001acf9d5abc4b9b4b0906de956bfae93 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:35:51 +0200 Subject: [PATCH 061/188] fix(safe_array_union): allow for sorting nested structs (#793) * fix: remove study_index_path from coloc step * fix(safe_array_union): sort struct fields in array --------- Co-authored-by: Szymon Szyszkowski --- src/gentropy/common/spark_helpers.py | 88 ++++++++++++++++++- src/gentropy/dataset/variant_index.py | 14 ++- .../datasource/open_targets/variants.py | 1 - 3 files changed, 98 insertions(+), 5 deletions(-) diff --git a/src/gentropy/common/spark_helpers.py b/src/gentropy/common/spark_helpers.py index 680975ef6..3fdabfbcc 100644 --- a/src/gentropy/common/spark_helpers.py +++ b/src/gentropy/common/spark_helpers.py @@ -614,14 +614,21 @@ def rename_all_columns(df: DataFrame, prefix: str) -> DataFrame: ) -def safe_array_union(a: Column, b: Column) -> Column: +def safe_array_union( + a: Column, b: Column, fields_order: list[str] | None = None +) -> Column: """Merge the content of two optional columns. - The function assumes the array columns have the same schema. Otherwise, the function will fail. + The function assumes the array columns have the same schema. + If the `fields_order` is passed, the function assumes that it deals with array of structs and sorts the nested + struct fields by the provided `fields_order` before conducting array_merge. + If the `fields_order` is not passed and both columns are > type then function assumes struct fields have the same order, + otherwise the function will raise an AnalysisException. Args: a (Column): One optional array column. b (Column): The other optional array column. + fields_order (list[str] | None): The order of the fields in the struct. Defaults to None. Returns: Column: array column with merged content. @@ -644,12 +651,89 @@ def safe_array_union(a: Column, b: Column) -> Column: | null| +------+ + >>> schema="arr2: array>, arr: array>" + >>> data = [([(1,"a",), (2, "c")],[("a", 1,)]),] + >>> df = spark.createDataFrame(data=data, schema=schema) + >>> df.select(safe_array_union(f.col("arr"), f.col("arr2"), fields_order=["a", "b"]).alias("merged")).show() + +----------------+ + | merged| + +----------------+ + |[{a, 1}, {c, 2}]| + +----------------+ + + >>> schema="arr2: array>, arr: array>" + >>> data = [([(1,"a",), (2, "c")],[("a", 1,)]),] + >>> df = spark.createDataFrame(data=data, schema=schema) + >>> df.select(safe_array_union(f.col("arr"), f.col("arr2")).alias("merged")).show() # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + pyspark.sql.utils.AnalysisException: ... """ + if fields_order: + # sort the nested struct fields by the provided order + a = sort_array_struct_by_columns(a, fields_order) + b = sort_array_struct_by_columns(b, fields_order) return f.when(a.isNotNull() & b.isNotNull(), f.array_union(a, b)).otherwise( f.coalesce(a, b) ) + +def sort_array_struct_by_columns(column: Column, fields_order: list[str]) -> Column: + """Sort nested struct fields by provided fields order. + + Args: + column (Column): Column with array of structs. + fields_order (list[str]): List of field names to sort by. + + Returns: + Column: Sorted column. + + Examples: + >>> schema="arr: array>" + >>> data = [([(1,"a",), (2, "c")],)] + >>> fields_order = ["a", "b"] + >>> df = spark.createDataFrame(data=data, schema=schema) + >>> df.select(sort_array_struct_by_columns(f.col("arr"), fields_order).alias("sorted")).show() + +----------------+ + | sorted| + +----------------+ + |[{c, 2}, {a, 1}]| + +----------------+ + + """ + column_name = extract_column_name(column) + fields_order_expr = ", ".join([f"x.{field}" for field in fields_order]) + return f.expr( + f"sort_array(transform({column_name}, x -> struct({fields_order_expr})), False)" + ).alias(column_name) + + +def extract_column_name(column: Column) -> str: + """Extract column name from a column expression. + + Args: + column (Column): Column expression. + + Returns: + str: Column name. + + Raises: + ValueError: If the column name cannot be extracted. + + Examples: + >>> extract_column_name(f.col('col1')) + 'col1' + >>> extract_column_name(f.sort_array(f.col('col1'))) + 'sort_array(col1, true)' + """ + pattern = re.compile("^Column<'(?P.*)'>?") + + _match = pattern.search(str(column)) + if not _match: + raise ValueError(f"Cannot extract column name from {column}") + return _match.group("name") + + def create_empty_column_if_not_exists( col_name: str, col_schema: t.DataType = t.NullType() ) -> Column: diff --git a/src/gentropy/dataset/variant_index.py b/src/gentropy/dataset/variant_index.py index 1cc1eac1b..2f24cd985 100644 --- a/src/gentropy/dataset/variant_index.py +++ b/src/gentropy/dataset/variant_index.py @@ -6,9 +6,11 @@ from typing import TYPE_CHECKING import pyspark.sql.functions as f +import pyspark.sql.types as t from gentropy.common.schemas import parse_spark_schema from gentropy.common.spark_helpers import ( + get_nested_struct_schema, get_record_with_maximum_value, normalise_column, rename_all_columns, @@ -131,6 +133,7 @@ def add_annotation( # Prefix for renaming columns: prefix = "annotation_" + # Generate select expressions that to merge and import columns from annotation: select_expressions = [] @@ -141,10 +144,17 @@ def add_annotation( # If an annotation column can be found in both datasets: if (column in self.df.columns) and (column in annotation_source.df.columns): # Arrays are merged: - if "ArrayType" in field.dataType.__str__(): + if isinstance(field.dataType, t.ArrayType): + fields_order = None + if isinstance(field.dataType.elementType, t.StructType): + # Extract the schema of the array to get the order of the fields: + array_schema = [ + field for field in VariantIndex.get_schema().fields if field.name == column + ][0].dataType + fields_order = get_nested_struct_schema(array_schema).fieldNames() select_expressions.append( safe_array_union( - f.col(column), f.col(f"{prefix}{column}") + f.col(column), f.col(f"{prefix}{column}"), fields_order ).alias(column) ) # Non-array columns are coalesced: diff --git a/src/gentropy/datasource/open_targets/variants.py b/src/gentropy/datasource/open_targets/variants.py index 03018438b..5b6822ae6 100644 --- a/src/gentropy/datasource/open_targets/variants.py +++ b/src/gentropy/datasource/open_targets/variants.py @@ -95,7 +95,6 @@ def as_vcf_df( variant_df = variant_df.withColumn( col, create_empty_column_if_not_exists(col) ) - return ( variant_df.filter(f.col("variantId").isNotNull()) .withColumn( From 26483c9866ca44ffc952d49c37075326baa218eb Mon Sep 17 00:00:00 2001 From: Yakov Date: Fri, 27 Sep 2024 23:06:43 +0100 Subject: [PATCH 062/188] fix: fix bag in neglog_pvalue_to_mantissa_and_exponent (#795) --- src/gentropy/common/spark_helpers.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/gentropy/common/spark_helpers.py b/src/gentropy/common/spark_helpers.py index 3fdabfbcc..4d24212a2 100644 --- a/src/gentropy/common/spark_helpers.py +++ b/src/gentropy/common/spark_helpers.py @@ -270,13 +270,13 @@ def neglog_pvalue_to_mantissa_and_exponent(p_value: Column) -> tuple[Column, Col +--------+--------------+--------------+ |negLogPv|pValueMantissa|pValueExponent| +--------+--------------+--------------+ - | 4.56| 3.6307805| -5| - | 2109.23| 1.6982436| -2110| + | 4.56| 2.7542286| -5| + | 2109.23| 5.8884363| -2110| +--------+--------------+--------------+ """ exponent: Column = f.ceil(p_value) - mantissa: Column = f.pow(f.lit(10), (p_value - exponent + f.lit(1))) + mantissa: Column = f.pow(f.lit(10), (exponent - p_value)) return ( mantissa.cast(t.FloatType()).alias("pValueMantissa"), @@ -677,7 +677,6 @@ def safe_array_union( ) - def sort_array_struct_by_columns(column: Column, fields_order: list[str]) -> Column: """Sort nested struct fields by provided fields order. From 88f62d44010ebd44e8b2e6957f7694e1d750cd12 Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Mon, 30 Sep 2024 12:31:56 +0100 Subject: [PATCH 063/188] fix(schema): recursive validation of arbitrarily deep nested structure (#790) * fix(schema): recursive validation of arbitrariliy deep nested structure * chore: pre-commit auto fixes [...] * fix: docstring issues * fix: failing tests are fixed * test: skipping tests that are failing because bug in vep parser * test(schemas): adding tests for schema comparison functions * test: removing skip as the vep parser logic is fixed * fix: addressing review comments * fix(schemas): removing schema flattening function * chore: pre-commit auto fixes [...] --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- src/gentropy/common/schemas.py | 224 +++++++++++--- src/gentropy/dataset/dataset.py | 52 +--- tests/gentropy/common/test_schema_methods.py | 303 +++++++++++++++++++ tests/gentropy/dataset/test_study_index.py | 14 +- tests/gentropy/method/test_clump.py | 4 +- tests/gentropy/test_schemas.py | 10 +- 6 files changed, 510 insertions(+), 97 deletions(-) create mode 100644 tests/gentropy/common/test_schema_methods.py diff --git a/src/gentropy/common/schemas.py b/src/gentropy/common/schemas.py index 1dcd75a22..624e3e0e1 100644 --- a/src/gentropy/common/schemas.py +++ b/src/gentropy/common/schemas.py @@ -1,66 +1,212 @@ """Methods for handling schemas.""" + from __future__ import annotations import importlib.resources as pkg_resources import json -from collections import namedtuple -from typing import Any +from collections import defaultdict -import pyspark.sql.types as t +from pyspark.sql.types import ArrayType, StructType from gentropy.assets import schemas -def parse_spark_schema(schema_json: str) -> t.StructType: +class SchemaValidationError(Exception): + """This exception is raised when a schema validation fails.""" + + def __init__( + self: SchemaValidationError, message: str, errors: defaultdict[str, list[str]] + ) -> None: + """Initialize the SchemaValidationError. + + Args: + message (str): The message to be displayed. + errors (defaultdict[str, list[str]]): The collection of observed discrepancies + """ + super().__init__(message) + self.message = message # Explicitly set the message attribute + self.errors = errors + + def __str__(self: SchemaValidationError) -> str: + """Return a string representation of the exception. + + Returns: + str: The string representation of the exception. + """ + stringified_errors = "\n ".join( + [f'{k}: {",".join(v)}' for k, v in self.errors.items()] + ) + return f"{self.message}\nErrors:\n {stringified_errors}" + + +def parse_spark_schema(schema_json: str) -> StructType: """Parse Spark schema from JSON. Args: schema_json (str): JSON filename containing spark schema in the schemas package Returns: - t.StructType: Spark schema + StructType: Spark schema """ core_schema = json.loads( pkg_resources.read_text(schemas, schema_json, encoding="utf-8") ) - return t.StructType.fromJson(core_schema) + return StructType.fromJson(core_schema) -def flatten_schema(schema: t.StructType, prefix: str = "") -> list[Any]: - """It takes a Spark schema and returns a list of all fields in the schema once flattened. +def compare_array_schemas( + observed_schema: ArrayType, + expected_schema: ArrayType, + parent_field_name: str | None = None, + schema_issues: defaultdict[str, list[str]] | None = None, +) -> defaultdict[str, list[str]]: + """Compare two array schemas. + + The comparison is done recursively, so nested structs are also compared. Args: - schema (t.StructType): The schema of the dataframe - prefix (str): The prefix to prepend to the field names. Defaults to "". + observed_schema (ArrayType): The observed schema. + expected_schema (ArrayType): The expected schema. + parent_field_name (str | None): The parent field name. Defaults to None. + schema_issues (defaultdict[str, list[str]] | None): The schema issues. Defaults to None. Returns: - list[Any]: A list of all the columns in the dataframe. - - Examples: - >>> from pyspark.sql.types import ArrayType, StringType, StructField, StructType - >>> schema = StructType( - ... [ - ... StructField("studyLocusId", StringType(), False), - ... StructField("locus", ArrayType(StructType([StructField("variantId", StringType(), False)])), False) - ... ] - ... ) - >>> df = spark.createDataFrame([("A", [{"variantId": "varA"}]), ("B", [{"variantId": "varB"}])], schema) - >>> flatten_schema(df.schema) - [Field(name='studyLocusId', dataType=StringType()), Field(name='locus', dataType=ArrayType(StructType([]), True)), Field(name='locus.variantId', dataType=StringType())] + defaultdict[str, list[str]]: The schema issues. """ - Field = namedtuple("Field", ["name", "dataType"]) - fields = [] - for field in schema.fields: - name = f"{prefix}.{field.name}" if prefix else field.name - dtype = field.dataType - if isinstance(dtype, t.StructType): - fields.append(Field(name, t.ArrayType(t.StructType()))) - fields += flatten_schema(dtype, prefix=name) - elif isinstance(dtype, t.ArrayType) and isinstance( - dtype.elementType, t.StructType - ): - fields.append(Field(name, t.ArrayType(t.StructType()))) - fields += flatten_schema(dtype.elementType, prefix=name) - else: - fields.append(Field(name, dtype)) - return fields + # Create default values if not provided: + if schema_issues is None: + schema_issues = defaultdict(list) + + if parent_field_name is None: + parent_field_name = "" + + observed_type = observed_schema.elementType.typeName() + expected_type = expected_schema.elementType.typeName() + + # If element types are not matching, no further tests are needed: + if observed_type != expected_type: + schema_issues["columns_with_non_matching_type"].append( + f'For column "{parent_field_name}[]" found {observed_type} instead of {expected_type}' + ) + + # If element type is a struct, resolve nesting: + elif (observed_type == "struct") and (expected_type == "struct"): + schema_issues = compare_struct_schemas( + observed_schema.elementType, + expected_schema.elementType, + f"{parent_field_name}[].", + schema_issues, + ) + + # If element type is an array, resolve nesting: + elif (observed_type == "array") and (expected_type == "array"): + schema_issues = compare_array_schemas( + observed_schema.elementType, + expected_schema.elementType, + parent_field_name, + schema_issues, + ) + + return schema_issues + + +def compare_struct_schemas( + observed_schema: StructType, + expected_schema: StructType, + parent_field_name: str | None = None, + schema_issues: defaultdict[str, list[str]] | None = None, +) -> defaultdict[str, list[str]]: + """Compare two struct schemas. + + The comparison is done recursively, so nested structs are also compared. + + Checking logic: + 1. Checking for duplicated columns in the observed schema. + 2. Checking for missing mandatory columns in the observed schema. + 3. Now we know that all mandatory columns are present, we can iterate over the observed schema and compare the types. + 4. Flagging unexpected columns in the observed schema. + 5. Flagging columns with non-matching types. + 6. If a column is a struct -> call compare_struct_schemas + 7. If a column is an array -> call compare_array_schemas + 8. Return dictionary with issues. + + Args: + observed_schema (StructType): The observed schema. + expected_schema (StructType): The expected schema. + parent_field_name (str | None): The parent field name. Defaults to None. + schema_issues (defaultdict[str, list[str]] | None): The schema issues. Defaults to None. + + Returns: + defaultdict[str, list[str]]: The schema issues. + """ + # Create default values if not provided: + if schema_issues is None: + schema_issues = defaultdict(list) + + if parent_field_name is None: + parent_field_name = "" + + # Flagging duplicated columns if present: + if duplicated_columns := list( + { + f"{parent_field_name}{field.name}" + for field in observed_schema + if list(observed_schema).count(field) > 1 + } + ): + schema_issues["duplicated_columns"] += duplicated_columns + + # Testing mandatory fields: + required_fields = [x.name for x in expected_schema if not x.nullable] + if missing_required_fields := [ + f"{parent_field_name}{req}" + for req in required_fields + if not any(field.name == req for field in observed_schema) + ]: + schema_issues["missing_mandatory_columns"] += missing_required_fields + + # Converting schema to dictionaries for easier comparison: + observed_schema_dict = {field.name: field for field in observed_schema} + expected_schema_dict = {field.name: field for field in expected_schema} + + # Testing optional fields and types: + for field_name, field in observed_schema_dict.items(): + # Testing observed field name, if name is not matched, no further tests are needed: + if field_name not in expected_schema_dict: + schema_issues["unexpected_columns"].append( + f"{parent_field_name}{field_name}" + ) + continue + + # When we made sure the field is in both schemas, extracting field type information: + observed_type = field.dataType + observed_type_name = field.dataType.typeName() + + expected_type = expected_schema_dict[field_name].dataType + expected_type_name = expected_schema_dict[field_name].dataType.typeName() + + # Flagging non-matching types if types don't match, jumping to next field: + if observed_type_name != expected_type_name: + schema_issues["columns_with_non_matching_type"].append( + f'For column "{parent_field_name}{field_name}" found {observed_type_name} instead of {expected_type_name}' + ) + continue + + # If column is a struct, resolve nesting: + if observed_type_name == "struct": + schema_issues = compare_struct_schemas( + observed_type, + expected_type, + f"{parent_field_name}{field_name}.", + schema_issues, + ) + # If column is an array, resolve nesting: + elif observed_type_name == "array": + schema_issues = compare_array_schemas( + observed_type, + expected_type, + f"{parent_field_name}{field_name}[]", + schema_issues, + ) + + return schema_issues diff --git a/src/gentropy/dataset/dataset.py b/src/gentropy/dataset/dataset.py index e56ef2ecc..f49d062d3 100644 --- a/src/gentropy/dataset/dataset.py +++ b/src/gentropy/dataset/dataset.py @@ -13,7 +13,7 @@ from pyspark.sql.window import Window from typing_extensions import Self -from gentropy.common.schemas import flatten_schema +from gentropy.common.schemas import SchemaValidationError, compare_struct_schemas if TYPE_CHECKING: from enum import Enum @@ -142,57 +142,15 @@ def validate_schema(self: Dataset) -> None: """Validate DataFrame schema against expected class schema. Raises: - ValueError: DataFrame schema is not valid + SchemaValidationError: If the DataFrame schema does not match the expected schema """ expected_schema = self._schema - expected_fields = flatten_schema(expected_schema) observed_schema = self._df.schema - observed_fields = flatten_schema(observed_schema) # Unexpected fields in dataset - if unexpected_field_names := [ - x.name - for x in observed_fields - if x.name not in [y.name for y in expected_fields] - ]: - raise ValueError( - f"The {unexpected_field_names} fields are not included in DataFrame schema: {expected_fields}" - ) - - # Required fields not in dataset - required_fields = [x.name for x in expected_schema if not x.nullable] - if missing_required_fields := [ - req - for req in required_fields - if not any(field.name == req for field in observed_fields) - ]: - raise ValueError( - f"The {missing_required_fields} fields are required but missing: {required_fields}" - ) - - # Fields with duplicated names - if duplicated_fields := [ - x for x in set(observed_fields) if observed_fields.count(x) > 1 - ]: - raise ValueError( - f"The following fields are duplicated in DataFrame schema: {duplicated_fields}" - ) - - # Fields with different datatype - observed_field_types = { - field.name: type(field.dataType) for field in observed_fields - } - expected_field_types = { - field.name: type(field.dataType) for field in expected_fields - } - if fields_with_different_observed_datatype := [ - name - for name, observed_type in observed_field_types.items() - if name in expected_field_types - and observed_type != expected_field_types[name] - ]: - raise ValueError( - f"The following fields present differences in their datatypes: {fields_with_different_observed_datatype}." + if discrepancies := compare_struct_schemas(observed_schema, expected_schema): + raise SchemaValidationError( + f"Schema validation failed for {type(self).__name__}", discrepancies ) def valid_rows(self: Self, invalid_flags: list[str], invalid: bool = False) -> Self: diff --git a/tests/gentropy/common/test_schema_methods.py b/tests/gentropy/common/test_schema_methods.py new file mode 100644 index 000000000..8ed1342b5 --- /dev/null +++ b/tests/gentropy/common/test_schema_methods.py @@ -0,0 +1,303 @@ +"""Tests methods dealing with schema comparison.""" + +from __future__ import annotations + +from collections import defaultdict + +from pyspark.sql.types import ( + ArrayType, + IntegerType, + StringType, + StructField, + StructType, +) + +from gentropy.common.schemas import ( + compare_array_schemas, + compare_struct_schemas, +) + + +class TestSchemaComparisonMethods: + """Class for testing schema comparison methods.""" + + STRUCT_FIELD_STRING = StructField("a", StringType(), True) + STRUCT_FIELD_STRING_MANDATORY = StructField("a", StringType(), False) + STRUCT_FIELD_INTEGER = StructField("b", IntegerType(), True) + STRUCT_FIELD_WRONGTYPE = StructField("a", IntegerType(), True) + + def test_struct_validation_return_type(self: TestSchemaComparisonMethods) -> None: + """Test successful validation of StructType.""" + observed = StructType([self.STRUCT_FIELD_STRING, self.STRUCT_FIELD_INTEGER]) + expected = StructType([self.STRUCT_FIELD_STRING, self.STRUCT_FIELD_INTEGER]) + + discrepancy = compare_struct_schemas(observed, expected) + assert isinstance(discrepancy, defaultdict) + + def test_struct_validation_success(self: TestSchemaComparisonMethods) -> None: + """Test successful validation of StructType.""" + observed = StructType([self.STRUCT_FIELD_STRING, self.STRUCT_FIELD_INTEGER]) + expected = StructType([self.STRUCT_FIELD_STRING, self.STRUCT_FIELD_INTEGER]) + + discrepancy = compare_struct_schemas(observed, expected) + assert not discrepancy + + def test_struct_validation_non_matching_type( + self: TestSchemaComparisonMethods, + ) -> None: + """Test unsuccessful validation of StructType.""" + observed = StructType([self.STRUCT_FIELD_STRING]) + expected = StructType([self.STRUCT_FIELD_WRONGTYPE]) + + discrepancy = compare_struct_schemas(observed, expected) + + # Test there's a discrepancy: + assert discrepancy + + # Test that the discrepancy is in the field name: + assert "columns_with_non_matching_type" in discrepancy + + def test_struct_validation_missing_mandatory( + self: TestSchemaComparisonMethods, + ) -> None: + """Test unsuccessful validation of StructType.""" + observed = StructType([self.STRUCT_FIELD_INTEGER]) + expected = StructType( + [self.STRUCT_FIELD_STRING_MANDATORY, self.STRUCT_FIELD_INTEGER] + ) + + discrepancy = compare_struct_schemas(observed, expected) + + # Test there's a discrepancy: + assert discrepancy + + # Test that the discrepancy is in the field name: + assert "missing_mandatory_columns" in discrepancy + + # Test that the right column is flagged as missing: + assert ( + self.STRUCT_FIELD_STRING_MANDATORY.name + in discrepancy["missing_mandatory_columns"] + ) + + def test_struct_validation_unexpected_column( + self: TestSchemaComparisonMethods, + ) -> None: + """Test unsuccessful validation of StructType.""" + observed = StructType( + [self.STRUCT_FIELD_STRING_MANDATORY, self.STRUCT_FIELD_INTEGER] + ) + expected = StructType([self.STRUCT_FIELD_STRING_MANDATORY]) + + discrepancy = compare_struct_schemas(observed, expected) + + # Test there's a discrepancy: + assert discrepancy + + # Test that the discrepancy is in the field name: + assert "unexpected_columns" in discrepancy + + # Test that the right column is flagged as unexpected: + assert self.STRUCT_FIELD_INTEGER.name in discrepancy["unexpected_columns"] + + def test_struct_validation_duplicated_columns( + self: TestSchemaComparisonMethods, + ) -> None: + """Test unsuccessful validation of StructType.""" + observed = StructType( + [ + self.STRUCT_FIELD_STRING, + self.STRUCT_FIELD_STRING, + self.STRUCT_FIELD_INTEGER, + ] + ) + expected = StructType([self.STRUCT_FIELD_STRING, self.STRUCT_FIELD_INTEGER]) + + discrepancy = compare_struct_schemas(observed, expected) + + # Test there's a discrepancy: + assert discrepancy + + # Test that the discrepancy is in the field name: + assert "duplicated_columns" in discrepancy + + # Test that the right column is flagged as duplicated: + assert self.STRUCT_FIELD_STRING.name in discrepancy["duplicated_columns"] + + def test_struct_validation_success_nested_struct( + self: TestSchemaComparisonMethods, + ) -> None: + """Test successful validation of nested StructType.""" + nested_struct = StructType( + [self.STRUCT_FIELD_STRING, self.STRUCT_FIELD_INTEGER] + ) + + observed = StructType([StructField("c", nested_struct)]) + expected = StructType([StructField("c", nested_struct)]) + + discrepancy = compare_struct_schemas(observed, expected) + assert not discrepancy + + def test_struct_validation_non_matching_type_nested_struct( + self: TestSchemaComparisonMethods, + ) -> None: + """Test unsuccessful validation of nested StructType.""" + nested_struct = StructType([self.STRUCT_FIELD_STRING]) + + observed = StructType([StructField("c", nested_struct)]) + expected = StructType( + [StructField("c", StructType([self.STRUCT_FIELD_WRONGTYPE]))] + ) + + discrepancy = compare_struct_schemas(observed, expected) + + # Test there's a discrepancy: + assert discrepancy + + # Test that the discrepancy is in the field name: + assert "columns_with_non_matching_type" in discrepancy + + def test_array_validation_success(self: TestSchemaComparisonMethods) -> None: + """Test successful validation of ArrayType.""" + observed = ArrayType(StringType()) + expected = ArrayType(StringType()) + + discrepancy = compare_array_schemas(observed, expected) + assert not discrepancy + + def test_array_validation_non_matching_type( + self: TestSchemaComparisonMethods, + ) -> None: + """Test unsuccessful validation of ArrayType.""" + observed = ArrayType(StringType()) + expected = ArrayType(IntegerType()) + + discrepancy = compare_array_schemas(observed, expected) + + # Test there's a discrepancy: + assert discrepancy + + # Test that the discrepancy is in the field name: + assert "columns_with_non_matching_type" in discrepancy + + def test_array_validation_nested_array(self: TestSchemaComparisonMethods) -> None: + """Test successful validation of nested ArrayType.""" + nested_array = ArrayType(StringType()) + + observed = ArrayType(nested_array) + expected = ArrayType(nested_array) + + discrepancy = compare_array_schemas(observed, expected) + assert not discrepancy + + def test_array_validation_non_matching_type_nested_array( + self: TestSchemaComparisonMethods, + ) -> None: + """Test unsuccessful validation of nested ArrayType.""" + observed = ArrayType(ArrayType(StringType())) + expected = ArrayType(ArrayType(IntegerType())) + + discrepancy = compare_array_schemas(observed, expected) + + # Test there's a discrepancy: + assert discrepancy + + # Test that the discrepancy is in the field name: + assert "columns_with_non_matching_type" in discrepancy + + def test_struct_validation_success_nested_with_array( + self: TestSchemaComparisonMethods, + ) -> None: + """Test successful validation of nested StructType with ArrayType.""" + nested_array = StructField("a", ArrayType(StringType()), True) + nested_struct = StructType([self.STRUCT_FIELD_STRING, nested_array]) + + observed = StructType([StructField("c", nested_struct, True)]) + expected = StructType([StructField("c", nested_struct, True)]) + + discrepancy = compare_struct_schemas(observed, expected) + assert not discrepancy + + def test_struct_validation_non_matching_type_nested_with_array( + self: TestSchemaComparisonMethods, + ) -> None: + """Test unsuccessful validation of nested StructType with ArrayType.""" + nested_array = StructField("a", ArrayType(StringType()), True) + nested_array_wrong_type = StructField("a", ArrayType(IntegerType()), True) + nested_struct = StructType([self.STRUCT_FIELD_STRING, nested_array]) + nested_struct_wrong_type = StructType( + [self.STRUCT_FIELD_STRING, nested_array_wrong_type] + ) + observed = StructType([StructField("c", nested_struct, True)]) + expected = StructType([StructField("c", nested_struct_wrong_type, True)]) + + discrepancy = compare_struct_schemas(observed, expected) + + # Test there's a discrepancy: + assert discrepancy + + # Test that the discrepancy is in the field name: + assert "columns_with_non_matching_type" in discrepancy + + def test_struct_validation_failing_with_multiple_reasons( + self: TestSchemaComparisonMethods, + ) -> None: + """Test unsuccessful validation of StructType with multiple issues.""" + observed = StructType( + [ + StructField( + "a", + ArrayType( + ArrayType( + StructType( + [ + StructField("a", IntegerType(), False), + StructField("c", StringType(), True), + StructField("c", StringType(), True), + ] + ), + False, + ), + False, + ), + False, + ), + ] + ) + + expected = StructType( + [ + StructField( + "a", + ArrayType( + ArrayType( + StructType( + [ + StructField("b", IntegerType(), False), + StructField("c", StringType(), True), + StructField("d", StringType(), True), + ] + ), + False, + ), + False, + ), + False, + ), + ] + ) + + discrepancy = compare_struct_schemas(observed, expected) + + # Test there's a discrepancy: + assert discrepancy + + # Test if the returned list of discrepancies is correct: + assert discrepancy == defaultdict( + list, + { + "duplicated_columns": ["a[][].c"], + "missing_mandatory_columns": ["a[][].b"], + "unexpected_columns": ["a[][].a"], + }, + ) diff --git a/tests/gentropy/dataset/test_study_index.py b/tests/gentropy/dataset/test_study_index.py index 3bdd7a5cb..fee3a2557 100644 --- a/tests/gentropy/dataset/test_study_index.py +++ b/tests/gentropy/dataset/test_study_index.py @@ -167,14 +167,14 @@ def _setup(self: TestGeneValidation, spark: SparkSession) -> None: """Setup fixture.""" self.study_index = StudyIndex( _df=spark.createDataFrame(self.STUDY_DATA, self.STUDY_COLUMNS).withColumn( - "qualityControls", f.array() + "qualityControls", f.array().cast("array") ), _schema=StudyIndex.get_schema(), ) self.study_index_no_gene = StudyIndex( _df=spark.createDataFrame(self.STUDY_DATA, self.STUDY_COLUMNS) - .withColumn("qualityControls", f.array()) + .withColumn("qualityControls", f.array().cast("array")) .drop("geneId"), _schema=StudyIndex.get_schema(), ) @@ -231,7 +231,7 @@ def _setup(self: TestUniquenessValidation, spark: SparkSession) -> None: """Setup fixture.""" self.study_index = StudyIndex( _df=spark.createDataFrame(self.STUDY_DATA, self.STUDY_COLUMNS).withColumn( - "qualityControls", f.array() + "qualityControls", f.array().cast("array") ), _schema=StudyIndex.get_schema(), ) @@ -279,7 +279,7 @@ def _setup(self: TestStudyTypeValidation, spark: SparkSession) -> None: """Setup fixture.""" self.study_index = StudyIndex( _df=spark.createDataFrame(self.STUDY_DATA, self.STUDY_COLUMNS).withColumn( - "qualityControls", f.array() + "qualityControls", f.array().cast("array") ), _schema=StudyIndex.get_schema(), ) @@ -346,8 +346,10 @@ def _setup(self: TestDiseaseValidation, spark: SparkSession) -> None: spark.createDataFrame(self.STUDY_DATA, self.STUDY_COLUMNS) .groupBy("studyId", "studyType", "projectId") .agg(f.collect_set("efo").alias("traitFromSourceMappedIds")) - .withColumn("qualityControls", f.array()) - .withColumn("backgroundTraitFromSourceMappedIds", f.array()) + .withColumn("qualityControls", f.array().cast("array")) + .withColumn( + "backgroundTraitFromSourceMappedIds", f.array().cast("array") + ) ) study_df.show() # Mock study index: diff --git a/tests/gentropy/method/test_clump.py b/tests/gentropy/method/test_clump.py index 1e754df3a..757c79305 100644 --- a/tests/gentropy/method/test_clump.py +++ b/tests/gentropy/method/test_clump.py @@ -135,7 +135,9 @@ def test_flagging(self: TestIsLeadLinked) -> None: """Test flagging of lead variants.""" # Create the study locus and clump: sl_flagged = StudyLocus( - _df=self.df.drop("expected_flag").withColumn("qualityControls", f.array()), + _df=self.df.drop("expected_flag").withColumn( + "qualityControls", f.array().cast("array") + ), _schema=StudyLocus.get_schema(), ).clump() diff --git a/tests/gentropy/test_schemas.py b/tests/gentropy/test_schemas.py index 1af72c149..6840e3207 100644 --- a/tests/gentropy/test_schemas.py +++ b/tests/gentropy/test_schemas.py @@ -12,6 +12,8 @@ import pytest from pyspark.sql.types import StructType +from gentropy.common.schemas import SchemaValidationError + if TYPE_CHECKING: from _pytest.fixtures import FixtureRequest @@ -90,7 +92,7 @@ def test_validate_schema_extra_field( mock_dataset_instance: V2G | GeneIndex, ) -> None: """Test that validate_schema raises an error if the observed schema has an extra field.""" - with pytest.raises(ValueError, match="extraField"): + with pytest.raises(SchemaValidationError, match="extraField"): mock_dataset_instance.df = mock_dataset_instance.df.withColumn( "extraField", f.lit("extra") ) @@ -103,7 +105,7 @@ def test_validate_schema_missing_field( mock_dataset_instance: V2G | GeneIndex, ) -> None: """Test that validate_schema raises an error if the observed schema is missing a required field, geneId in this case.""" - with pytest.raises(ValueError, match="geneId"): + with pytest.raises(SchemaValidationError, match="geneId"): mock_dataset_instance.df = mock_dataset_instance.df.drop("geneId") @pytest.mark.parametrize( @@ -114,7 +116,7 @@ def test_validate_schema_duplicated_field( mock_dataset_instance: V2G | GeneIndex, ) -> None: """Test that validate_schema raises an error if the observed schema has a duplicated field, geneId in this case.""" - with pytest.raises(ValueError, match="geneId"): + with pytest.raises(SchemaValidationError, match="geneId"): mock_dataset_instance.df = mock_dataset_instance.df.select( "*", f.lit("A").alias("geneId") ) @@ -127,7 +129,7 @@ def test_validate_schema_different_datatype( mock_dataset_instance: V2G | GeneIndex, ) -> None: """Test that validate_schema raises an error if any field in the observed schema has a different type than expected.""" - with pytest.raises(ValueError, match="geneId"): + with pytest.raises(SchemaValidationError, match="geneId"): mock_dataset_instance.df = mock_dataset_instance.df.withColumn( "geneId", f.lit(1) ) From 8b253a5fc1c050e5018be4575c36a2ff49b2a408 Mon Sep 17 00:00:00 2001 From: Yakov Date: Mon, 30 Sep 2024 13:55:57 +0100 Subject: [PATCH 064/188] fix: adding data specific p-value filters (#788) * fix: adding data specific fillters * fix: removing variables * fix: adding options to init --------- Co-authored-by: Daniel Suveges --- src/gentropy/config.py | 7 ++++++ src/gentropy/eqtl_catalogue.py | 23 ++++++++++++------ src/gentropy/finngen_finemapping_ingestion.py | 24 ++++++++++++------- src/gentropy/pics.py | 15 ++++++++---- 4 files changed, 49 insertions(+), 20 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 3a67e7868..0a1f9438a 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -121,10 +121,16 @@ class GWASCatalogSumstatsPreprocessConfig(StepConfig): class EqtlCatalogueConfig(StepConfig): """eQTL Catalogue step configuration.""" + session: Any = field( + default_factory=lambda: { + "start_hail": True, + } + ) eqtl_catalogue_paths_imported: str = MISSING eqtl_catalogue_study_index_out: str = MISSING eqtl_catalogue_credible_sets_out: str = MISSING mqtl_quantification_methods_blacklist: list[str] = field(default_factory=lambda: []) + eqtl_lead_pvalue_threshold: float = 1e-3 _target_: str = "gentropy.eqtl_catalogue.EqtlCatalogueStep" @@ -168,6 +174,7 @@ class FinngenFinemappingConfig(StepConfig): _target_: str = ( "gentropy.finngen_finemapping_ingestion.FinnGenFinemappingIngestionStep" ) + finngen_finemapping_lead_pvalue_threshold: float = 1e-5 @dataclass diff --git a/src/gentropy/eqtl_catalogue.py b/src/gentropy/eqtl_catalogue.py index 7adc5d8a2..3ad61ddea 100644 --- a/src/gentropy/eqtl_catalogue.py +++ b/src/gentropy/eqtl_catalogue.py @@ -3,6 +3,7 @@ from __future__ import annotations from gentropy.common.session import Session +from gentropy.config import EqtlCatalogueConfig from gentropy.datasource.eqtl_catalogue.finemapping import EqtlCatalogueFinemapping from gentropy.datasource.eqtl_catalogue.study_index import EqtlCatalogueStudyIndex @@ -20,6 +21,7 @@ def __init__( eqtl_catalogue_paths_imported: str, eqtl_catalogue_study_index_out: str, eqtl_catalogue_credible_sets_out: str, + eqtl_lead_pvalue_threshold: float = EqtlCatalogueConfig().eqtl_lead_pvalue_threshold, ) -> None: """Run eQTL Catalogue ingestion step. @@ -29,6 +31,7 @@ def __init__( eqtl_catalogue_paths_imported (str): Input eQTL Catalogue fine mapping results path. eqtl_catalogue_study_index_out (str): Output eQTL Catalogue study index path. eqtl_catalogue_credible_sets_out (str): Output eQTL Catalogue credible sets path. + eqtl_lead_pvalue_threshold (float, optional): Lead p-value threshold. Defaults to EqtlCatalogueConfig().eqtl_lead_pvalue_threshold. """ # Extract studies_metadata = EqtlCatalogueStudyIndex.read_studies_from_source( @@ -58,13 +61,19 @@ def __init__( processed_susie_df = EqtlCatalogueFinemapping.parse_susie_results( credible_sets_df, lbf_df, studies_metadata ) - credible_sets = EqtlCatalogueFinemapping.from_susie_results(processed_susie_df) - study_index = EqtlCatalogueStudyIndex.from_susie_results(processed_susie_df) - # Load - study_index.df.write.mode(session.write_mode).parquet( - eqtl_catalogue_study_index_out + ( + EqtlCatalogueStudyIndex.from_susie_results(processed_susie_df) + # Writing the output: + .df.write.mode(session.write_mode) + .parquet(eqtl_catalogue_study_index_out) ) - credible_sets.df.write.mode(session.write_mode).parquet( - eqtl_catalogue_credible_sets_out + + ( + EqtlCatalogueFinemapping.from_susie_results(processed_susie_df) + # Flagging sub-significnat loci: + .validate_lead_pvalue(pvalue_cutoff=eqtl_lead_pvalue_threshold) + # Writing the output: + .df.write.mode(session.write_mode) + .parquet(eqtl_catalogue_credible_sets_out) ) diff --git a/src/gentropy/finngen_finemapping_ingestion.py b/src/gentropy/finngen_finemapping_ingestion.py index 80089cf68..ca5ca1656 100644 --- a/src/gentropy/finngen_finemapping_ingestion.py +++ b/src/gentropy/finngen_finemapping_ingestion.py @@ -20,6 +20,7 @@ def __init__( finngen_finemapping_out: str, finngen_susie_finemapping_snp_files: str = FinngenFinemappingConfig().finngen_susie_finemapping_snp_files, finngen_susie_finemapping_cs_summary_files: str = FinngenFinemappingConfig().finngen_susie_finemapping_cs_summary_files, + finngen_finemapping_lead_pvalue_threshold: float = FinngenFinemappingConfig().finngen_finemapping_lead_pvalue_threshold, ) -> None: """Run FinnGen finemapping ingestion step. @@ -28,16 +29,21 @@ def __init__( finngen_finemapping_out (str): Output path for the finemapping results in StudyLocus format. finngen_susie_finemapping_snp_files(str): Path to the FinnGen SuSIE finemapping results. finngen_susie_finemapping_cs_summary_files (str): FinnGen SuSIE summaries for CS filters(LBF>2). + finngen_finemapping_lead_pvalue_threshold (float): Lead p-value threshold. """ # Read finemapping outputs from the input paths. - finngen_finemapping_df = FinnGenFinemapping.from_finngen_susie_finemapping( - spark=session.spark, - finngen_susie_finemapping_snp_files=finngen_susie_finemapping_snp_files, - finngen_susie_finemapping_cs_summary_files=finngen_susie_finemapping_cs_summary_files, - ) - - # Write the output. - finngen_finemapping_df.df.write.mode(session.write_mode).parquet( - finngen_finemapping_out + ( + FinnGenFinemapping.from_finngen_susie_finemapping( + spark=session.spark, + finngen_susie_finemapping_snp_files=finngen_susie_finemapping_snp_files, + finngen_susie_finemapping_cs_summary_files=finngen_susie_finemapping_cs_summary_files, + ) + # Flagging sub-significnat loci: + .validate_lead_pvalue( + pvalue_cutoff=finngen_finemapping_lead_pvalue_threshold + ) + # Writing the output: + .df.write.mode(session.write_mode) + .parquet(finngen_finemapping_out) ) diff --git a/src/gentropy/pics.py b/src/gentropy/pics.py index e80a37eb6..f96f54997 100644 --- a/src/gentropy/pics.py +++ b/src/gentropy/pics.py @@ -3,6 +3,7 @@ from __future__ import annotations from gentropy.common.session import Session +from gentropy.config import WindowBasedClumpingStepConfig from gentropy.dataset.study_locus import CredibleInterval, StudyLocus from gentropy.method.pics import PICS @@ -28,8 +29,14 @@ def __init__( session, study_locus_ld_annotated_in ) # PICS - picsed_sl = PICS.finemap(study_locus_ld_annotated).filter_credible_set( - credible_interval=CredibleInterval.IS99 + ( + PICS.finemap(study_locus_ld_annotated) + .filter_credible_set(credible_interval=CredibleInterval.IS99) + # Flagging sub-significnat loci: + .validate_lead_pvalue( + pvalue_cutoff=WindowBasedClumpingStepConfig().gwas_significance + ) + # Writing the output: + .df.write.mode(session.write_mode) + .parquet(picsed_study_locus_out) ) - # Write - picsed_sl.df.write.mode(session.write_mode).parquet(picsed_study_locus_out) From 5c58e58cd23b4d9566503ae7ebca32f7f8d26c4f Mon Sep 17 00:00:00 2001 From: Vivien Ho <56025826+vivienho@users.noreply.github.com> Date: Mon, 30 Sep 2024 14:29:13 +0100 Subject: [PATCH 065/188] feat: change `StudyLocusId` hashing method to md5 (and change `StudyLocusId` to string type) (#783) * feat: change studyLocusId to string in schema * feat: change studyLocusId of example data to string in tests * feat: change hashing method to md5 * test: remove test_assign_study_locus_id__null_variant_id as validation will have removed null ids * fix: change studyLocusId to string in remaining files * fix: ensure inputs to assign_study_locus_id are columns and not strings * fix: change studyLocusId to string in remaining files * chore: update assign_study_locus_id docstring with updated output * chore: update assign_study_locus_id docstring with updated output (again) * fix: change studyLocusId to string in recently merged files * feat: move hashing logic to generate_identifier function in Dataset class --------- Co-authored-by: Daniel Suveges --- .../assets/schemas/colocalisation.json | 4 +- src/gentropy/assets/schemas/l2g_feature.json | 2 +- .../assets/schemas/l2g_gold_standard.json | 2 +- .../assets/schemas/l2g_predictions.json | 2 +- src/gentropy/assets/schemas/study_locus.json | 2 +- .../assets/schemas/study_locus_overlap.json | 4 +- src/gentropy/dataset/dataset.py | 15 +++ src/gentropy/dataset/study_locus.py | 34 ++--- .../datasource/eqtl_catalogue/finemapping.py | 2 +- .../datasource/finngen/finemapping.py | 2 +- .../datasource/gwas_catalog/associations.py | 6 +- .../open_targets/l2g_gold_standard.py | 2 +- src/gentropy/l2g.py | 17 ++- src/gentropy/method/locus_breaker_clumping.py | 4 +- src/gentropy/method/pics.py | 2 +- src/gentropy/method/window_based_clumping.py | 2 +- src/gentropy/susie_finemapper.py | 4 +- tests/gentropy/conftest.py | 6 +- tests/gentropy/dataset/test_colocalisation.py | 6 +- tests/gentropy/dataset/test_l2g.py | 46 +++---- .../dataset/test_l2g_feature_matrix.py | 9 +- tests/gentropy/dataset/test_study_locus.py | 125 ++++++++---------- .../dataset/test_study_locus_overlap.py | 10 +- .../dataset/test_study_locus_overlaps.py | 22 +-- .../test_gwas_catalog_associations.py | 4 +- .../datasource/open_targets/test_variants.py | 2 +- tests/gentropy/method/test_clump.py | 2 +- .../method/test_colocalisation_method.py | 22 +-- 28 files changed, 177 insertions(+), 183 deletions(-) diff --git a/src/gentropy/assets/schemas/colocalisation.json b/src/gentropy/assets/schemas/colocalisation.json index 6e1163cfe..7d05c849a 100644 --- a/src/gentropy/assets/schemas/colocalisation.json +++ b/src/gentropy/assets/schemas/colocalisation.json @@ -4,13 +4,13 @@ { "name": "leftStudyLocusId", "nullable": false, - "type": "long", + "type": "string", "metadata": {} }, { "name": "rightStudyLocusId", "nullable": false, - "type": "long", + "type": "string", "metadata": {} }, { diff --git a/src/gentropy/assets/schemas/l2g_feature.json b/src/gentropy/assets/schemas/l2g_feature.json index 3139a57e4..314b4dde0 100644 --- a/src/gentropy/assets/schemas/l2g_feature.json +++ b/src/gentropy/assets/schemas/l2g_feature.json @@ -3,7 +3,7 @@ "fields": [ { "name": "studyLocusId", - "type": "long", + "type": "string", "nullable": false, "metadata": {} }, diff --git a/src/gentropy/assets/schemas/l2g_gold_standard.json b/src/gentropy/assets/schemas/l2g_gold_standard.json index cf19d6b52..6af921d61 100644 --- a/src/gentropy/assets/schemas/l2g_gold_standard.json +++ b/src/gentropy/assets/schemas/l2g_gold_standard.json @@ -3,7 +3,7 @@ "fields": [ { "name": "studyLocusId", - "type": "long", + "type": "string", "nullable": false, "metadata": {} }, diff --git a/src/gentropy/assets/schemas/l2g_predictions.json b/src/gentropy/assets/schemas/l2g_predictions.json index 16b274207..238ff4087 100644 --- a/src/gentropy/assets/schemas/l2g_predictions.json +++ b/src/gentropy/assets/schemas/l2g_predictions.json @@ -3,7 +3,7 @@ "fields": [ { "name": "studyLocusId", - "type": "long", + "type": "string", "nullable": false, "metadata": {} }, diff --git a/src/gentropy/assets/schemas/study_locus.json b/src/gentropy/assets/schemas/study_locus.json index a8d15aba6..52a19f941 100644 --- a/src/gentropy/assets/schemas/study_locus.json +++ b/src/gentropy/assets/schemas/study_locus.json @@ -4,7 +4,7 @@ "metadata": {}, "name": "studyLocusId", "nullable": false, - "type": "long" + "type": "string" }, { "metadata": {}, diff --git a/src/gentropy/assets/schemas/study_locus_overlap.json b/src/gentropy/assets/schemas/study_locus_overlap.json index 22ba7705e..0c4319827 100644 --- a/src/gentropy/assets/schemas/study_locus_overlap.json +++ b/src/gentropy/assets/schemas/study_locus_overlap.json @@ -4,13 +4,13 @@ "metadata": {}, "name": "leftStudyLocusId", "nullable": false, - "type": "long" + "type": "string" }, { "metadata": {}, "name": "rightStudyLocusId", "nullable": false, - "type": "long" + "type": "string" }, { "metadata": {}, diff --git a/src/gentropy/dataset/dataset.py b/src/gentropy/dataset/dataset.py index f49d062d3..d033e129d 100644 --- a/src/gentropy/dataset/dataset.py +++ b/src/gentropy/dataset/dataset.py @@ -310,3 +310,18 @@ def flag_duplicates(test_column: Column) -> Column: ) > 1 ) + + @staticmethod + def generate_identifier(uniqueness_defining_columns: list[str]) -> Column: + """Hashes the provided columns to generate a unique identifier. + + Args: + uniqueness_defining_columns (list[str]): list of columns defining uniqueness + + Returns: + Column: column with a unique identifier + """ + hashable_columns = [f.when(f.col(column).cast("string").isNull(), f.lit("None")) + .otherwise(f.col(column).cast("string")) + for column in uniqueness_defining_columns] + return f.md5(f.concat(*hashable_columns)) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index e3706eaf0..a4d35e7d5 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -447,38 +447,28 @@ def _align_overlapping_tags( ) @staticmethod - def assign_study_locus_id( - study_id_col: Column, - variant_id_col: Column, - finemapping_col: Column = None, - ) -> Column: - """Hashes a column with a variant ID and a study ID to extract a consistent studyLocusId. + def assign_study_locus_id(uniqueness_defining_columns: list[str]) -> Column: + """Hashes the provided columns to extract a consistent studyLocusId. Args: - study_id_col (Column): column name with a study ID - variant_id_col (Column): column name with a variant ID - finemapping_col (Column, optional): column with fine mapping methodology + uniqueness_defining_columns (list[str]): list of columns defining uniqueness Returns: Column: column with a study locus ID Examples: >>> df = spark.createDataFrame([("GCST000001", "1_1000_A_C", "SuSiE-inf"), ("GCST000002", "1_1000_A_C", "pics")]).toDF("studyId", "variantId", "finemappingMethod") - >>> df.withColumn("study_locus_id", StudyLocus.assign_study_locus_id(f.col("studyId"), f.col("variantId"), f.col("finemappingMethod"))).show() - +----------+----------+-----------------+-------------------+ - | studyId| variantId|finemappingMethod| study_locus_id| - +----------+----------+-----------------+-------------------+ - |GCST000001|1_1000_A_C| SuSiE-inf|3801266831619496075| - |GCST000002|1_1000_A_C| pics|1581844826999194430| - +----------+----------+-----------------+-------------------+ + >>> df.withColumn("study_locus_id", StudyLocus.assign_study_locus_id(["studyId", "variantId", "finemappingMethod"])).show(truncate=False) + +----------+----------+-----------------+--------------------------------+ + |studyId |variantId |finemappingMethod|study_locus_id | + +----------+----------+-----------------+--------------------------------+ + |GCST000001|1_1000_A_C|SuSiE-inf |109804fe1e20c94231a31bafd71b566e| + |GCST000002|1_1000_A_C|pics |de310be4558e0482c9cc359c97d37773| + +----------+----------+-----------------+--------------------------------+ """ - if finemapping_col is None: - finemapping_col = f.lit(None).cast(StringType()) - variant_id_col = f.coalesce(variant_id_col, f.rand().cast("string")) - return f.xxhash64(study_id_col, variant_id_col, finemapping_col).alias( - "studyLocusId" - ) + return Dataset.generate_identifier(uniqueness_defining_columns).alias("studyLocusId") + @classmethod def calculate_credible_set_log10bf(cls: type[StudyLocus], logbfs: Column) -> Column: diff --git a/src/gentropy/datasource/eqtl_catalogue/finemapping.py b/src/gentropy/datasource/eqtl_catalogue/finemapping.py index 11ec5bef1..0808b7016 100644 --- a/src/gentropy/datasource/eqtl_catalogue/finemapping.py +++ b/src/gentropy/datasource/eqtl_catalogue/finemapping.py @@ -260,7 +260,7 @@ def from_susie_results( .select( *study_locus_cols, StudyLocus.assign_study_locus_id( - f.col("studyId"), f.col("variantId"), f.col("finemappingMethod") + ["studyId", "variantId", "finemappingMethod"] ), StudyLocus.calculate_credible_set_log10bf( f.col("locus.logBF") diff --git a/src/gentropy/datasource/finngen/finemapping.py b/src/gentropy/datasource/finngen/finemapping.py index 092a79372..3c83ba8ff 100644 --- a/src/gentropy/datasource/finngen/finemapping.py +++ b/src/gentropy/datasource/finngen/finemapping.py @@ -471,7 +471,7 @@ def from_finngen_susie_finemapping( ).withColumn( "studyLocusId", StudyLocus.assign_study_locus_id( - f.col("studyId"), f.col("variantId"), f.col("finemappingMethod") + ["studyId", "variantId", "finemappingMethod"] ), ) diff --git a/src/gentropy/datasource/gwas_catalog/associations.py b/src/gentropy/datasource/gwas_catalog/associations.py index dd9aa3fe2..b34944b11 100644 --- a/src/gentropy/datasource/gwas_catalog/associations.py +++ b/src/gentropy/datasource/gwas_catalog/associations.py @@ -9,7 +9,7 @@ from typing import TYPE_CHECKING import pyspark.sql.functions as f -from pyspark.sql.types import DoubleType, FloatType, IntegerType, LongType +from pyspark.sql.types import DoubleType, FloatType, IntegerType, StringType from pyspark.sql.window import Window from gentropy.assets import data @@ -1109,7 +1109,7 @@ def from_source( """ return StudyLocusGWASCatalog( _df=gwas_associations.withColumn( - "studyLocusId", f.monotonically_increasing_id().cast(LongType()) + "studyLocusId", f.monotonically_increasing_id().cast(StringType()) ) .transform( # Map/harmonise variants to variant annotation dataset: @@ -1188,7 +1188,7 @@ def update_study_id( .drop("subStudyDescription", "updatedStudyId") ).withColumn( "studyLocusId", - StudyLocus.assign_study_locus_id(f.col("studyId"), f.col("variantId")), + StudyLocus.assign_study_locus_id(["studyId", "variantId"]), ) return self diff --git a/src/gentropy/datasource/open_targets/l2g_gold_standard.py b/src/gentropy/datasource/open_targets/l2g_gold_standard.py index 2cfcd62f8..26d5a0253 100644 --- a/src/gentropy/datasource/open_targets/l2g_gold_standard.py +++ b/src/gentropy/datasource/open_targets/l2g_gold_standard.py @@ -52,7 +52,7 @@ def parse_positive_curation( ) .withColumn( "studyLocusId", - StudyLocus.assign_study_locus_id(f.col("studyId"), f.col("variantId")), + StudyLocus.assign_study_locus_id(["studyId", "variantId"]), ) .groupBy("studyLocusId", "studyId", "variantId", "geneId") .agg(f.collect_set("source").alias("sources")) diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 6f80d826e..ff8c6c8ff 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -207,17 +207,22 @@ def _generate_feature_matrix(self, write_feature_matrix: bool) -> L2GFeatureMatr study_locus_overlap = StudyLocus( _df=self.credible_set.df.join( f.broadcast( - self.gs_curation.select( - StudyLocus.assign_study_locus_id( - f.col("association_info.otg_id"), # studyId - f.concat_ws( # variantId + self.gs_curation + .withColumn( + "variantId", + f.concat_ws( "_", f.col("sentinel_variant.locus_GRCh38.chromosome"), f.col("sentinel_variant.locus_GRCh38.position"), f.col("sentinel_variant.alleles.reference"), f.col("sentinel_variant.alleles.alternative"), - ), - ).alias("studyLocusId"), + ) + ) + .select( + StudyLocus.assign_study_locus_id( + ["association_info.otg_id", # studyId + "variantId"] + ), ) ), "studyLocusId", diff --git a/src/gentropy/method/locus_breaker_clumping.py b/src/gentropy/method/locus_breaker_clumping.py index 0ca7ae29b..fd7661a22 100644 --- a/src/gentropy/method/locus_breaker_clumping.py +++ b/src/gentropy/method/locus_breaker_clumping.py @@ -112,8 +112,8 @@ def locus_breaker( .cast(t.ArrayType(t.StringType())) .alias("qualityControls"), StudyLocus.assign_study_locus_id( - f.col("studyId"), f.col("variantId") - ).alias("studyLocusId"), + ["studyId", "variantId"] + ), ) ), _schema=StudyLocus.get_schema(), diff --git a/src/gentropy/method/pics.py b/src/gentropy/method/pics.py index 2de06f512..5fd084efd 100644 --- a/src/gentropy/method/pics.py +++ b/src/gentropy/method/pics.py @@ -257,7 +257,7 @@ def finemap( .withColumn( "studyLocusId", StudyLocus.assign_study_locus_id( - "studyId", "variantId", "finemappingMethod" + ["studyId", "variantId", "finemappingMethod"] ), ) .drop("neglog_pvalue") diff --git a/src/gentropy/method/window_based_clumping.py b/src/gentropy/method/window_based_clumping.py index 9ef747abf..3ab15d42f 100644 --- a/src/gentropy/method/window_based_clumping.py +++ b/src/gentropy/method/window_based_clumping.py @@ -247,7 +247,7 @@ def clump( .withColumn( "studyLocusId", StudyLocus.assign_study_locus_id( - f.col("studyId"), f.col("variantId") + ["studyId", "variantId"] ), ) # Initialize QC column as array of strings: diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index 587ea7963..26c73e20f 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -95,7 +95,7 @@ def __init__( .df.withColumn( "studyLocusId", StudyLocus.assign_study_locus_id( - "studyId", "variantId", "finemappingMethod" + ["studyId", "variantId", "finemappingMethod"] ), ) .collect()[0] @@ -247,7 +247,7 @@ def susie_inf_to_studylocus( .withColumn( "studyLocusId", StudyLocus.assign_study_locus_id( - f.col("studyId"), f.col("variantId"), f.col("finemappingMethod") + ["studyId", "variantId", "finemappingMethod"] ), ) .select( diff --git a/tests/gentropy/conftest.py b/tests/gentropy/conftest.py index 4045833f9..21f05dcf3 100644 --- a/tests/gentropy/conftest.py +++ b/tests/gentropy/conftest.py @@ -617,10 +617,10 @@ def mock_l2g_feature_matrix(spark: SparkSession) -> L2GFeatureMatrix: return L2GFeatureMatrix( _df=spark.createDataFrame( [ - (1, "gene1", 100.0, None), - (2, "gene2", 1000.0, 0.0), + ("1", "gene1", 100.0, None), + ("2", "gene2", 1000.0, 0.0), ], - "studyLocusId LONG, geneId STRING, distanceTssMean FLOAT, distanceTssMinimum FLOAT", + "studyLocusId STRING, geneId STRING, distanceTssMean FLOAT, distanceTssMinimum FLOAT", ), with_gold_standard=False, ) diff --git a/tests/gentropy/dataset/test_colocalisation.py b/tests/gentropy/dataset/test_colocalisation.py index 8f2766fb4..c15653787 100644 --- a/tests/gentropy/dataset/test_colocalisation.py +++ b/tests/gentropy/dataset/test_colocalisation.py @@ -72,12 +72,12 @@ def _setup(self: TestAppendStudyMetadata, spark: SparkSession) -> None: _df=spark.createDataFrame( [ ( - 1, + "1", "var1", "gwas1", ), ( - 2, + "2", "var2", "eqtl1", ), @@ -100,7 +100,7 @@ def _setup(self: TestAppendStudyMetadata, spark: SparkSession) -> None: ) self.sample_colocalisation = Colocalisation( _df=spark.createDataFrame( - [(1, 2, "eqtl", "X", "COLOC", 1, 0.9)], + [("1", "2", "eqtl", "X", "COLOC", 1, 0.9)], [ "leftStudyLocusId", "rightStudyLocusId", diff --git a/tests/gentropy/dataset/test_l2g.py b/tests/gentropy/dataset/test_l2g.py index 2523b97dd..125352f8e 100644 --- a/tests/gentropy/dataset/test_l2g.py +++ b/tests/gentropy/dataset/test_l2g.py @@ -43,44 +43,44 @@ def test_filter_unique_associations(spark: SparkSession) -> None: """Test filter_unique_associations.""" mock_l2g_gs_df = spark.createDataFrame( [ - (1, "variant1", "study1", "gene1", "positive"), + ("1", "variant1", "study1", "gene1", "positive"), ( - 2, + "2", "variant2", "study1", "gene1", "negative", ), # in the same locus as sl1 and pointing to same gene, has to be dropped ( - 3, + "3", "variant3", "study1", "gene1", "positive", ), # in diff locus as sl1 and pointing to same gene, has to be kept ( - 4, + "4", "variant4", "study1", "gene2", "positive", ), # in same locus as sl1 and pointing to diff gene, has to be kept ], - "studyLocusId LONG, variantId STRING, studyId STRING, geneId STRING, goldStandardSet STRING", + "studyLocusId STRING, variantId STRING, studyId STRING, geneId STRING, goldStandardSet STRING", ) mock_sl_overlap_df = spark.createDataFrame( - [(1, 2, "eqtl", "variant2"), (1, 4, "eqtl", "variant4")], - "leftStudyLocusId LONG, rightStudyLocusId LONG, rightStudyType STRING, tagVariantId STRING", + [("1", "2", "eqtl", "variant2"), ("1", "4", "eqtl", "variant4")], + "leftStudyLocusId STRING, rightStudyLocusId STRING, rightStudyType STRING, tagVariantId STRING", ) expected_df = spark.createDataFrame( [ - (1, "variant1", "study1", "gene1", "positive"), - (3, "variant3", "study1", "gene1", "positive"), - (4, "variant4", "study1", "gene2", "positive"), + ("1", "variant1", "study1", "gene1", "positive"), + ("3", "variant3", "study1", "gene1", "positive"), + ("4", "variant4", "study1", "gene2", "positive"), ], - "studyLocusId LONG, variantId STRING, studyId STRING, geneId STRING, goldStandardSet STRING", + "studyLocusId STRING, variantId STRING, studyId STRING, geneId STRING, goldStandardSet STRING", ) mock_l2g_gs = L2GGoldStandard( @@ -99,30 +99,30 @@ def test_remove_false_negatives(spark: SparkSession) -> None: """Test `remove_false_negatives`.""" mock_l2g_gs_df = spark.createDataFrame( [ - (1, "variant1", "study1", "gene1", "positive"), + ("1", "variant1", "study1", "gene1", "positive"), ( - 2, + "2", "variant2", "study1", "gene2", "negative", ), # gene2 is a partner of gene1, has to be dropped ( - 3, + "3", "variant3", "study1", "gene3", "negative", ), # gene 3 is not a partner of gene1, has to be kept ( - 4, + "4", "variant4", "study1", "gene4", "positive", ), # gene 4 is a partner of gene1, has to be kept because it's positive ], - "studyLocusId LONG, variantId STRING, studyId STRING, geneId STRING, goldStandardSet STRING", + "studyLocusId STRING, variantId STRING, studyId STRING, geneId STRING, goldStandardSet STRING", ) mock_interactions_df = spark.createDataFrame( @@ -136,11 +136,11 @@ def test_remove_false_negatives(spark: SparkSession) -> None: expected_df = spark.createDataFrame( [ - (1, "variant1", "study1", "gene1", "positive"), - (3, "variant3", "study1", "gene3", "negative"), - (4, "variant4", "study1", "gene4", "positive"), + ("1", "variant1", "study1", "gene1", "positive"), + ("3", "variant3", "study1", "gene3", "negative"), + ("4", "variant4", "study1", "gene4", "positive"), ], - "studyLocusId LONG, variantId STRING, studyId STRING, geneId STRING, goldStandardSet STRING", + "studyLocusId STRING, variantId STRING, studyId STRING, geneId STRING, goldStandardSet STRING", ) mock_l2g_gs = L2GGoldStandard( @@ -161,10 +161,10 @@ def test_l2g_feature_constructor_with_schema_mismatch( fm = L2GFeatureMatrix( _df=spark.createDataFrame( [ - (1, "gene1", 100.0), - (2, "gene2", 1000.0), + ("1", "gene1", 100.0), + ("2", "gene2", 1000.0), ], - "studyLocusId LONG, geneId STRING, distanceTssMean DOUBLE", + "studyLocusId STRING, geneId STRING, distanceTssMean DOUBLE", ), with_gold_standard=False, ) diff --git a/tests/gentropy/dataset/test_l2g_feature_matrix.py b/tests/gentropy/dataset/test_l2g_feature_matrix.py index 09460ee85..b74b6330a 100644 --- a/tests/gentropy/dataset/test_l2g_feature_matrix.py +++ b/tests/gentropy/dataset/test_l2g_feature_matrix.py @@ -8,7 +8,6 @@ from pyspark.sql.types import ( ArrayType, DoubleType, - LongType, StringType, StructField, StructType, @@ -81,7 +80,7 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: _df=spark.createDataFrame( [ ( - 1, + "1", "var1", "gwas1", [ @@ -90,7 +89,7 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: ], ), ( - 2, + "2", "var2", "eqtl1", [ @@ -100,7 +99,7 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: ], schema=StructType( [ - StructField("studyLocusId", LongType(), True), + StructField("studyLocusId", StringType(), True), StructField("variantId", StringType(), True), StructField("studyId", StringType(), True), StructField( @@ -136,7 +135,7 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: ) self.sample_colocalisation = Colocalisation( _df=spark.createDataFrame( - [(1, 2, "eqtl", "X", "COLOC", 1, 0.9)], + [("1", "2", "eqtl", "X", "COLOC", 1, 0.9)], [ "leftStudyLocusId", "rightStudyLocusId", diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index 29cbffad2..3240cdb02 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -12,7 +12,6 @@ ArrayType, BooleanType, DoubleType, - LongType, StringType, StructField, StructType, @@ -41,8 +40,8 @@ True, [ { - "leftStudyLocusId": 1, - "rightStudyLocusId": 2, + "leftStudyLocusId": "1", + "rightStudyLocusId": "2", "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "commonTag", @@ -52,8 +51,8 @@ }, }, { - "leftStudyLocusId": 1, - "rightStudyLocusId": 2, + "leftStudyLocusId": "1", + "rightStudyLocusId": "2", "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "nonCommonTag", @@ -78,7 +77,7 @@ def test_find_overlaps_semantic( # 2 associations with a common variant in the locus [ { - "studyLocusId": 1, + "studyLocusId": "1", "variantId": "lead1", "studyId": "study1", "studyType": "gwas", @@ -88,7 +87,7 @@ def test_find_overlaps_semantic( "chromosome": "1", }, { - "studyLocusId": 2, + "studyLocusId": "2", "variantId": "lead2", "studyId": "study2", "studyType": "eqtl", @@ -109,7 +108,7 @@ def test_find_overlaps_semantic( # 2 associations with no common variants in the locus [ { - "studyLocusId": 1, + "studyLocusId": "1", "variantId": "lead1", "studyId": "study1", "studyType": "gwas", @@ -119,7 +118,7 @@ def test_find_overlaps_semantic( "chromosome": "1", }, { - "studyLocusId": 2, + "studyLocusId": "2", "variantId": "lead2", "studyId": "study2", "studyType": "eqtl", @@ -164,14 +163,14 @@ def test_filter_by_study_type( [ { # from gwas - "studyLocusId": 1, + "studyLocusId": "1", "variantId": "lead1", "studyId": "study1", "studyType": "gwas", }, { # from eqtl - "studyLocusId": 2, + "studyLocusId": "2", "variantId": "lead2", "studyId": "study2", "studyType": "eqtl", @@ -203,20 +202,6 @@ def test_filter_credible_set(mock_study_locus: StudyLocus) -> None: ) -def test_assign_study_locus_id__null_variant_id(spark: SparkSession) -> None: - """Test assign study locus id when variant id is null for the same study.""" - df = spark.createDataFrame( - [("GCST000001", None), ("GCST000001", None)], - schema="studyId: string, variantId: string", - ).withColumn( - "studyLocusId", - StudyLocus.assign_study_locus_id(f.col("studyId"), f.col("variantId")), - ) - assert ( - df.select("studyLocusId").distinct().count() == 2 - ), "studyLocusId is not unique when variantId is null" - - @pytest.mark.parametrize( ("observed", "expected"), [ @@ -224,7 +209,7 @@ def test_assign_study_locus_id__null_variant_id(spark: SparkSession) -> None: # Locus is not null, should return union between variants in locus and lead variant [ ( - 1, + "1", "traitA", "22_varA", [ @@ -247,7 +232,7 @@ def test_assign_study_locus_id__null_variant_id(spark: SparkSession) -> None: ( # locus is null, should return lead variant [ - (1, "traitA", "22_varA", None), + ("1", "traitA", "22_varA", None), ], [ ( @@ -265,7 +250,7 @@ def test_unique_variants_in_locus( # assert isinstance(mock_study_locus.test_unique_variants_in_locus(), DataFrame) schema = StructType( [ - StructField("studyLocusId", LongType(), True), + StructField("studyLocusId", StringType(), True), StructField("studyId", StringType(), True), StructField("variantId", StringType(), True), StructField( @@ -308,7 +293,7 @@ def test_clump(mock_study_locus: StudyLocus) -> None: [ # Observed ( - 1, + "1", "traitA", "leadB", [{"variantId": "tagVariantA", "posteriorProbability": 1.0}], @@ -317,7 +302,7 @@ def test_clump(mock_study_locus: StudyLocus) -> None: [ # Expected ( - 1, + "1", "traitA", "leadB", [ @@ -336,7 +321,7 @@ def test_clump(mock_study_locus: StudyLocus) -> None: [ # Observed ( - 1, + "1", "traitA", "leadA", [ @@ -353,7 +338,7 @@ def test_clump(mock_study_locus: StudyLocus) -> None: [ # Expected ( - 1, + "1", "traitA", "leadA", [ @@ -408,7 +393,7 @@ def test_clump(mock_study_locus: StudyLocus) -> None: [ # Observed ( - 1, + "1", "traitA", "leadB", None, @@ -417,7 +402,7 @@ def test_clump(mock_study_locus: StudyLocus) -> None: [ # Expected ( - 1, + "1", "traitA", "leadB", None, @@ -429,7 +414,7 @@ def test_clump(mock_study_locus: StudyLocus) -> None: [ # Observed ( - 1, + "1", "traitA", "leadB", [], @@ -438,7 +423,7 @@ def test_clump(mock_study_locus: StudyLocus) -> None: [ # Expected ( - 1, + "1", "traitA", "leadB", None, @@ -453,7 +438,7 @@ def test_annotate_credible_sets( """Test annotate_credible_sets.""" schema = StructType( [ - StructField("studyLocusId", LongType(), True), + StructField("studyLocusId", StringType(), True), StructField("studyId", StringType(), True), StructField("variantId", StringType(), True), StructField( @@ -556,12 +541,12 @@ class TestStudyLocusVariantValidation: STUDYLOCUS_DATA = [ # First studylocus passes qc: - (1, "v1", "s1", "v1"), - (1, "v1", "s1", "v2"), - (1, "v1", "s1", "v3"), + ("1", "v1", "s1", "v1"), + ("1", "v1", "s1", "v2"), + ("1", "v1", "s1", "v3"), # Second studylocus passes qc: - (2, "v1", "s1", "v1"), - (2, "v1", "s1", "v5"), + ("2", "v1", "s1", "v1"), + ("2", "v1", "s1", "v5"), ] STUDYLOCUS_HEADER = ["studyLocusId", "variantId", "studyId", "tagVariantId"] @@ -578,7 +563,7 @@ def _setup(self: TestStudyLocusVariantValidation, spark: SparkSession) -> None: self.credible_set = StudyLocus( _df=( spark.createDataFrame(self.STUDYLOCUS_DATA, self.STUDYLOCUS_HEADER) - .withColumn("studyLocusId", f.col("studyLocusId").cast(t.LongType())) + .withColumn("studyLocusId", f.col("studyLocusId").cast(t.StringType())) .withColumn("qualityControls", f.array()) .groupBy("studyLocusId", "variantId", "studyId") .agg( @@ -619,7 +604,7 @@ def test_validation_correctness(self: TestStudyLocusVariantValidation) -> None: # Check that the right one is flagged: assert ( validated.filter( - (f.size("qualityControls") > 0) & (f.col("studyLocusId") == 2) + (f.size("qualityControls") > 0) & (f.col("studyLocusId") == "2") ).count() == 1 ) @@ -630,17 +615,17 @@ class TestStudyLocusValidation: STUDY_LOCUS_DATA = [ # Won't be flagged: - (1, "v1", "s1", 1.0, -8, []), + ("1", "v1", "s1", 1.0, -8, []), # Already flagged, needs to be tested if the flag reamins unique: - (2, "v2", "s2", 5.0, -4, [StudyLocusQualityCheck.SUBSIGNIFICANT_FLAG.value]), + ("2", "v2", "s2", 5.0, -4, [StudyLocusQualityCheck.SUBSIGNIFICANT_FLAG.value]), # To be flagged: - (3, "v3", "s3", 1.0, -4, []), - (4, "v4", "s4", 5.0, -3, []), + ("3", "v3", "s3", 1.0, -4, []), + ("4", "v4", "s4", 5.0, -3, []), ] STUDY_LOCUS_SCHEMA = t.StructType( [ - t.StructField("studyLocusId", t.LongType(), False), + t.StructField("studyLocusId", t.StringType(), False), t.StructField("variantId", t.StringType(), False), t.StructField("studyId", t.StringType(), False), t.StructField("pValueMantissa", t.FloatType(), False), @@ -779,7 +764,7 @@ def _setup(self: TestStudyLocusWindowClumping, spark: SparkSession) -> None: ).withColumns( { "studyLocusId": f.monotonically_increasing_id().cast( - t.LongType() + t.StringType() ), "pValueMantissa": f.lit(1).cast(t.FloatType()), "variantId": f.concat( @@ -839,23 +824,23 @@ class TestStudyLocusRedundancyFlagging: """Collection of tests related to flagging redundant credible sets.""" STUDY_LOCUS_DATA = [ - (1, "v1", "s1", "pics", []), - (2, "v2", "s1", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), - (3, "v3", "s1", "pics", []), - (3, "v3", "s1", "pics", []), - (1, "v1", "s1", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), - (1, "v1", "s2", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), - (1, "v1", "s2", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), - (1, "v1", "s3", "SuSie", []), - (1, "v1", "s3", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), - (1, "v1", "s4", "pics", []), - (1, "v1", "s4", "SuSie", []), - (1, "v1", "s4", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s1", "pics", []), + ("2", "v2", "s1", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + ("3", "v3", "s1", "pics", []), + ("3", "v3", "s1", "pics", []), + ("1", "v1", "s1", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s2", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s2", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s3", "SuSie", []), + ("1", "v1", "s3", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s4", "pics", []), + ("1", "v1", "s4", "SuSie", []), + ("1", "v1", "s4", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), ] STUDY_LOCUS_SCHEMA = t.StructType( [ - t.StructField("studyLocusId", t.LongType(), False), + t.StructField("studyLocusId", t.StringType(), False), t.StructField("variantId", t.StringType(), False), t.StructField("studyId", t.StringType(), False), t.StructField("finemappingMethod", t.StringType(), False), @@ -912,7 +897,7 @@ class TestStudyLocusSuSiERedundancyFlagging: STUDY_LOCUS_DATA: Any = [ # to be flagged due to v4 ( - 1, + "1", "v1", "s1", "X", @@ -928,7 +913,7 @@ class TestStudyLocusSuSiERedundancyFlagging: ), # to be flagged due to v4 ( - 2, + "2", "v2", "s1", "X", @@ -943,7 +928,7 @@ class TestStudyLocusSuSiERedundancyFlagging: ), # NOT to be flagged (outside regions) ( - 3, + "3", "v3", "s1", "X", @@ -958,7 +943,7 @@ class TestStudyLocusSuSiERedundancyFlagging: ), # NOT to be flagged (SuSie-Inf credible set) ( - 4, + "4", "v4", "s1", "X", @@ -970,7 +955,7 @@ class TestStudyLocusSuSiERedundancyFlagging: ), # NOT to be flagged (Unresolved LD) ( - 5, + "5", "v5", "s1", "X", @@ -984,7 +969,7 @@ class TestStudyLocusSuSiERedundancyFlagging: ), # NOT to be flagged (different study) ( - 6, + "6", "v6", "s2", "X", @@ -1001,7 +986,7 @@ class TestStudyLocusSuSiERedundancyFlagging: STUDY_LOCUS_SCHEMA = t.StructType( [ - t.StructField("studyLocusId", t.LongType(), False), + t.StructField("studyLocusId", t.StringType(), False), t.StructField("variantId", t.StringType(), False), t.StructField("studyId", t.StringType(), False), t.StructField("chromosome", t.StringType(), False), diff --git a/tests/gentropy/dataset/test_study_locus_overlap.py b/tests/gentropy/dataset/test_study_locus_overlap.py index 7e591df30..5dcba19c9 100644 --- a/tests/gentropy/dataset/test_study_locus_overlap.py +++ b/tests/gentropy/dataset/test_study_locus_overlap.py @@ -19,19 +19,19 @@ def test_convert_to_square_matrix(spark: SparkSession) -> None: mock_sl_overlap = StudyLocusOverlap( _df=spark.createDataFrame( [ - (1, 2, "eqtl", "variant2"), + ("1", "2", "eqtl", "variant2"), ], - "leftStudyLocusId LONG, rightStudyLocusId LONG, rightStudyType STRING, tagVariantId STRING", + "leftStudyLocusId STRING, rightStudyLocusId STRING, rightStudyType STRING, tagVariantId STRING", ), _schema=StudyLocusOverlap.get_schema(), ) expected_df = spark.createDataFrame( [ - (1, 2, "eqtl", "variant2"), - (2, 1, "eqtl", "variant2"), + ("1", "2", "eqtl", "variant2"), + ("2", "1", "eqtl", "variant2"), ], - "leftStudyLocusId LONG, rightStudyLocusId LONG, rightStudyType STRING, tagVariantId STRING", + "leftStudyLocusId STRING, rightStudyLocusId STRING, rightStudyType STRING, tagVariantId STRING", ) observed_df = mock_sl_overlap._convert_to_square_matrix().df diff --git a/tests/gentropy/dataset/test_study_locus_overlaps.py b/tests/gentropy/dataset/test_study_locus_overlaps.py index 745f07ed2..58dc95039 100644 --- a/tests/gentropy/dataset/test_study_locus_overlaps.py +++ b/tests/gentropy/dataset/test_study_locus_overlaps.py @@ -34,21 +34,21 @@ def test_study_locus_overlap_from_associations(mock_study_locus: StudyLocus) -> # observed - input DataFrame representing gwas and nongwas data to find overlapping signals [ { - "studyLocusId": 1, + "studyLocusId": "1", "studyId": "A", "studyType": "gwas", "chromosome": "1", "tagVariantId": "A", }, { - "studyLocusId": 2, + "studyLocusId": "2", "studyId": "B", "studyType": "eqtl", "chromosome": "1", "tagVariantId": "A", }, { - "studyLocusId": 3, + "studyLocusId": "3", "studyId": "C", "studyType": "gwas", "chromosome": "1", @@ -59,14 +59,14 @@ def test_study_locus_overlap_from_associations(mock_study_locus: StudyLocus) -> False, # expected - output DataFrame with overlapping signals [ - {"leftStudyLocusId": 1, "rightStudyLocusId": 2, "chromosome": "1"}, + {"leftStudyLocusId": "1", "rightStudyLocusId": "2", "chromosome": "1"}, ], ), ( # observed - input DataFrame representing intra-study data to find overlapping signals in the same study [ { - "studyLocusId": 1, + "studyLocusId": "1", "studyId": "A", "studyType": "gwas", "chromosome": "1", @@ -74,7 +74,7 @@ def test_study_locus_overlap_from_associations(mock_study_locus: StudyLocus) -> "tagVariantId": "A", }, { - "studyLocusId": 2, + "studyLocusId": "2", "studyId": "A", "studyType": "gwas", "chromosome": "1", @@ -82,7 +82,7 @@ def test_study_locus_overlap_from_associations(mock_study_locus: StudyLocus) -> "tagVariantId": "A", }, { - "studyLocusId": 3, + "studyLocusId": "3", "studyId": "B", "studyType": "gwas", "chromosome": "1", @@ -93,7 +93,7 @@ def test_study_locus_overlap_from_associations(mock_study_locus: StudyLocus) -> # intrastudy - bool of whether or not to use inter-study or intra-study logic True, # expected - output DataFrame with overlapping signals - [{"leftStudyLocusId": 2, "rightStudyLocusId": 1, "chromosome": "1"}], + [{"leftStudyLocusId": "2", "rightStudyLocusId": "1", "chromosome": "1"}], ), ], ) @@ -106,7 +106,7 @@ def test_overlapping_peaks( """Test overlapping signals between GWAS-GWAS and GWAS-Molecular trait to make sure that mQTLs are always on the right.""" mock_schema = t.StructType( [ - t.StructField("studyLocusId", t.LongType()), + t.StructField("studyLocusId", t.StringType()), t.StructField("studyId", t.StringType()), t.StructField("studyType", t.StringType()), t.StructField("chromosome", t.StringType()), @@ -116,8 +116,8 @@ def test_overlapping_peaks( ) expected_schema = t.StructType( [ - t.StructField("leftStudyLocusId", t.LongType()), - t.StructField("rightStudyLocusId", t.LongType()), + t.StructField("leftStudyLocusId", t.StringType()), + t.StructField("rightStudyLocusId", t.StringType()), t.StructField("chromosome", t.StringType()), ] ) diff --git a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py index 130097f25..fe9608bf0 100644 --- a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py +++ b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py @@ -4,7 +4,7 @@ from pyspark.sql import DataFrame from pyspark.sql import functions as f -from pyspark.sql.types import LongType +from pyspark.sql.types import StringType from gentropy.dataset.variant_index import VariantIndex from gentropy.datasource.gwas_catalog.associations import ( @@ -71,7 +71,7 @@ def test_map_variants_to_variant_index( assert isinstance( GWASCatalogCuratedAssociationsParser._map_variants_to_gnomad_variants( sample_gwas_catalog_associations.withColumn( - "studyLocusId", f.monotonically_increasing_id().cast(LongType()) + "studyLocusId", f.monotonically_increasing_id().cast(StringType()) ), mock_variant_index, ), diff --git a/tests/gentropy/datasource/open_targets/test_variants.py b/tests/gentropy/datasource/open_targets/test_variants.py index 247a9d81e..6aa22e628 100644 --- a/tests/gentropy/datasource/open_targets/test_variants.py +++ b/tests/gentropy/datasource/open_targets/test_variants.py @@ -25,7 +25,7 @@ def test_as_vcf_df_credible_set( df_credible_set_df = spark.createDataFrame( [ { - "studyLocusId": 1, + "studyLocusId": "1", "variantId": "1_2_C_G", "studyId": "study1", "locus": [ diff --git a/tests/gentropy/method/test_clump.py b/tests/gentropy/method/test_clump.py index 757c79305..ed07608db 100644 --- a/tests/gentropy/method/test_clump.py +++ b/tests/gentropy/method/test_clump.py @@ -88,7 +88,7 @@ class TestIsLeadLinked: SCHEMA = t.StructType( [ t.StructField("studyId", t.StringType(), True), - t.StructField("studyLocusId", t.LongType(), True), + t.StructField("studyLocusId", t.StringType(), True), t.StructField("chromosome", t.StringType(), True), t.StructField("variantId", t.StringType(), True), t.StructField("pValueMantissa", t.FloatType(), True), diff --git a/tests/gentropy/method/test_colocalisation_method.py b/tests/gentropy/method/test_colocalisation_method.py index e292784c1..1d788eb1f 100644 --- a/tests/gentropy/method/test_colocalisation_method.py +++ b/tests/gentropy/method/test_colocalisation_method.py @@ -7,7 +7,7 @@ import pytest from pandas.testing import assert_frame_equal from pyspark.sql import SparkSession -from pyspark.sql.types import DoubleType, LongType, StringType, StructField, StructType +from pyspark.sql.types import DoubleType, StringType, StructField, StructType from gentropy.dataset.colocalisation import Colocalisation from gentropy.dataset.study_locus_overlap import StudyLocusOverlap @@ -27,8 +27,8 @@ def test_coloc(mock_study_locus_overlap: StudyLocusOverlap) -> None: # observed overlap [ { - "leftStudyLocusId": 1, - "rightStudyLocusId": 2, + "leftStudyLocusId": "1", + "rightStudyLocusId": "2", "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "snp", @@ -51,16 +51,16 @@ def test_coloc(mock_study_locus_overlap: StudyLocusOverlap) -> None: # observed overlap [ { - "leftStudyLocusId": 1, - "rightStudyLocusId": 2, + "leftStudyLocusId": "1", + "rightStudyLocusId": "2", "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "snp1", "statistics": {"left_logBF": 10.3, "right_logBF": 10.5}, }, { - "leftStudyLocusId": 1, - "rightStudyLocusId": 2, + "leftStudyLocusId": "1", + "rightStudyLocusId": "2", "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "snp2", @@ -120,8 +120,8 @@ def test_coloc_no_logbf( spark.createDataFrame( [ { - "leftStudyLocusId": 1, - "rightStudyLocusId": 2, + "leftStudyLocusId": "1", + "rightStudyLocusId": "2", "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "snp", @@ -133,8 +133,8 @@ def test_coloc_no_logbf( ], schema=StructType( [ - StructField("leftStudyLocusId", LongType(), False), - StructField("rightStudyLocusId", LongType(), False), + StructField("leftStudyLocusId", StringType(), False), + StructField("rightStudyLocusId", StringType(), False), StructField("rightStudyType", StringType(), False), StructField("chromosome", StringType(), False), StructField("tagVariantId", StringType(), False), From b7ccfae18d30982022e31bde194b5068f8c53d0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Tue, 1 Oct 2024 09:49:32 +0100 Subject: [PATCH 066/188] feat: drop `v2g` and reimplement distance features (#771) * refactor(L2GFeatureMatrix): remove schema validation * refactor(FeatureFactory): reshape feature generation WIP * chore: pre-commit auto fixes [...] * chore: set l2gfeature properties with decorator * chore(l2gfeature): make credible_set and input_dependency instance attributes * chore(l2gfeature): make credible_set and input_dependency instance attributes * chore(featurefactory): distanceTssMeanFeature working * refactor(l2g): improve step dependency management * feat: implement * chore: fix mypy issues * feat: l2gfeaturematrix.from_features_list working * chore: comment out obsolete refs * chore(L2GFeatureMatrix): change `mode` attribute to `with_gold_standard` * refactor(l2g): move feature matrix writing to training module * feat(L2GFeatureMatrix): accept L2GGoldStandard or StudyLocus as inputs * feat: implement methods to build a feature matrix based on a studylocus/L2GGoldStandard instance * feat: coloc logic prototype * feat(l2g): filter non gwas credible sets at the start of the step * feat: rewrite colocalisation feature factory * test: add `test_colocalisation_feature_type` * test(colocalisation): add test_extract_maximum_coloc_probability_per_region_and_gene * feat(L2GFeatureInputLoader): support multiple deps by passing loader as kwarg * test: add integration tests `test_build_feature_matrix` * feat(variant_index): add `get_distance_to_gene` and deprecate `get_distance_from_tss` * feat(variant_index): deprecate `get_most_severe_transcript_consequence` * feat(variant_index): add `get_loftee` and deprecate `get_plof_v2g` * chore: reduce v2g assesments to intervals * feat(feature_factory): add distance to footprint features * test: refactor `test_feature_factory_return_type` * feat(feature_factory): add all distance neighbourhood features * chore: delete v2g * feat(feature_factory): add all colocalisation neighbourhood features * chore: final v2g deletion * chore: drop config yamls * refactor: move feature classes to datasets module * docs: update feature docs * fix: import * test: add semantic `TestCommonColocalisationFeatureLogic` * test: add semantic `TestCommonDistanceFeatureLogic` * refactor: separate features into diff modules * fix: documentation references * feat: implement distance to sentinel and adapt definitions * docs: update distance class names * fix: add all variant index mandatory fields in tests * fix(schema_validator): remove extra `[]` from parent prefix * fix: convert studylocusid to string in tests * revert: revert 72ea515fb3ea9cb07c448072be2449f4ced0dab3 (it was ok) * fix: adapt test --- docs/howto/command_line/run_step_in_cli.md | 1 - docs/python_api/datasets/l2g_feature.md | 29 - .../datasets/l2g_features/_l2g_feature.md | 11 + .../datasets/l2g_features/colocalisation.md | 27 + .../datasets/l2g_features/distance.md | 19 + docs/python_api/datasets/variant_to_gene.md | 9 - docs/python_api/methods/l2g/_l2g.md | 7 +- docs/python_api/steps/variant_to_gene_step.md | 5 - notebooks/Release_QC_metrics.ipynb | 85 +- src/gentropy/assets/schemas/v2g.json | 77 -- src/gentropy/common/spark_helpers.py | 6 +- src/gentropy/config.py | 47 +- src/gentropy/dataset/colocalisation.py | 1 + src/gentropy/dataset/intervals.py | 34 +- src/gentropy/dataset/l2g_feature.py | 506 ----------- src/gentropy/dataset/l2g_features/__init__.py | 3 + .../dataset/l2g_features/colocalisation.py | 791 ++++++++++++++++++ src/gentropy/dataset/l2g_features/distance.py | 422 ++++++++++ .../dataset/l2g_features/l2g_feature.py | 65 ++ src/gentropy/dataset/l2g_gold_standard.py | 10 +- src/gentropy/dataset/v2g.py | 51 -- src/gentropy/dataset/variant_index.py | 202 ++--- src/gentropy/datasource/ensembl/vep_parser.py | 6 +- .../open_targets/l2g_gold_standard.py | 28 +- src/gentropy/l2g.py | 51 +- src/gentropy/method/l2g/feature_factory.py | 40 +- src/gentropy/variant_to_gene.py | 119 --- tests/gentropy/conftest.py | 37 +- tests/gentropy/dataset/test_intervals.py | 18 - tests/gentropy/dataset/test_l2g.py | 2 +- tests/gentropy/dataset/test_l2g_feature.py | 441 +++++++++- tests/gentropy/dataset/test_v2g.py | 23 - tests/gentropy/dataset/test_variant_index.py | 62 +- .../open_targets/test_l2g_gold_standard.py | 74 +- tests/gentropy/test_schemas.py | 30 +- 35 files changed, 2093 insertions(+), 1246 deletions(-) delete mode 100644 docs/python_api/datasets/l2g_feature.md create mode 100644 docs/python_api/datasets/l2g_features/_l2g_feature.md create mode 100644 docs/python_api/datasets/l2g_features/colocalisation.md create mode 100644 docs/python_api/datasets/l2g_features/distance.md delete mode 100644 docs/python_api/datasets/variant_to_gene.md delete mode 100644 docs/python_api/steps/variant_to_gene_step.md delete mode 100644 src/gentropy/assets/schemas/v2g.json delete mode 100644 src/gentropy/dataset/l2g_feature.py create mode 100644 src/gentropy/dataset/l2g_features/__init__.py create mode 100644 src/gentropy/dataset/l2g_features/colocalisation.py create mode 100644 src/gentropy/dataset/l2g_features/distance.py create mode 100644 src/gentropy/dataset/l2g_features/l2g_feature.py delete mode 100644 src/gentropy/dataset/v2g.py delete mode 100644 src/gentropy/variant_to_gene.py delete mode 100644 tests/gentropy/dataset/test_intervals.py delete mode 100644 tests/gentropy/dataset/test_v2g.py diff --git a/docs/howto/command_line/run_step_in_cli.md b/docs/howto/command_line/run_step_in_cli.md index ac7d55ff9..b935d84fb 100644 --- a/docs/howto/command_line/run_step_in_cli.md +++ b/docs/howto/command_line/run_step_in_cli.md @@ -24,7 +24,6 @@ Available options: ukbiobank variant_annotation variant_index - variant_to_gene Set the environment variable HYDRA_FULL_ERROR=1 for a complete stack trace. ``` diff --git a/docs/python_api/datasets/l2g_feature.md b/docs/python_api/datasets/l2g_feature.md deleted file mode 100644 index bdab67e7c..000000000 --- a/docs/python_api/datasets/l2g_feature.md +++ /dev/null @@ -1,29 +0,0 @@ ---- -title: L2G Feature ---- - -## Abstract Class - -::: gentropy.dataset.l2g_feature.L2GFeature - -## Feature Classes - -### Derived from colocalisation - -::: gentropy.dataset.l2g_feature.EQtlColocClppMaximumFeature -::: gentropy.dataset.l2g_feature.PQtlColocClppMaximumFeature -::: gentropy.dataset.l2g_feature.SQtlColocClppMaximumFeature -::: gentropy.dataset.l2g_feature.TuQtlColocClppMaximumFeature -::: gentropy.dataset.l2g_feature.EQtlColocH4MaximumFeature -::: gentropy.dataset.l2g_feature.PQtlColocH4MaximumFeature -::: gentropy.dataset.l2g_feature.SQtlColocH4MaximumFeature -::: gentropy.dataset.l2g_feature.TuQtlColocH4MaximumFeature - -### Derived from distance - -::: gentropy.dataset.l2g_feature.DistanceTssMinimumFeature -::: gentropy.dataset.l2g_feature.DistanceTssMeanFeature - -## Schema - ---8<-- "assets/schemas/l2g_feature.md" diff --git a/docs/python_api/datasets/l2g_features/_l2g_feature.md b/docs/python_api/datasets/l2g_features/_l2g_feature.md new file mode 100644 index 000000000..b2f6f8187 --- /dev/null +++ b/docs/python_api/datasets/l2g_features/_l2g_feature.md @@ -0,0 +1,11 @@ +--- +title: L2G Feature +--- + +## Abstract Class + +::: gentropy.dataset.l2g_features.l2g_feature.L2GFeature + +## Schema + +--8<-- "assets/schemas/l2g_feature.md" diff --git a/docs/python_api/datasets/l2g_features/colocalisation.md b/docs/python_api/datasets/l2g_features/colocalisation.md new file mode 100644 index 000000000..a3928c4ab --- /dev/null +++ b/docs/python_api/datasets/l2g_features/colocalisation.md @@ -0,0 +1,27 @@ +--- +title: From colocalisation +--- + +## List of features + +::: gentropy.dataset.l2g_features.colocalisation.EQtlColocClppMaximumFeature +::: gentropy.dataset.l2g_features.colocalisation.PQtlColocClppMaximumFeature +::: gentropy.dataset.l2g_features.colocalisation.SQtlColocClppMaximumFeature +::: gentropy.dataset.l2g_features.colocalisation.TuQtlColocClppMaximumFeature +::: gentropy.dataset.l2g_features.colocalisation.EQtlColocH4MaximumFeature +::: gentropy.dataset.l2g_features.colocalisation.PQtlColocH4MaximumFeature +::: gentropy.dataset.l2g_features.colocalisation.SQtlColocH4MaximumFeature +::: gentropy.dataset.l2g_features.colocalisation.TuQtlColocH4MaximumFeature +::: gentropy.dataset.l2g_features.colocalisation.EQtlColocClppMaximumNeighbourhoodFeature +::: gentropy.dataset.l2g_features.colocalisation.PQtlColocClppMaximumNeighbourhoodFeature +::: gentropy.dataset.l2g_features.colocalisation.SQtlColocClppMaximumNeighbourhoodFeature +::: gentropy.dataset.l2g_features.colocalisation.TuQtlColocClppMaximumNeighbourhoodFeature +::: gentropy.dataset.l2g_features.colocalisation.EQtlColocH4MaximumNeighbourhoodFeature +::: gentropy.dataset.l2g_features.colocalisation.PQtlColocH4MaximumNeighbourhoodFeature +::: gentropy.dataset.l2g_features.colocalisation.SQtlColocH4MaximumNeighbourhoodFeature +::: gentropy.dataset.l2g_features.colocalisation.TuQtlColocH4MaximumNeighbourhoodFeature + +## Common logic + +::: gentropy.dataset.l2g_features.colocalisation.common_colocalisation_feature_logic +::: gentropy.dataset.l2g_features.colocalisation.common_neighbourhood_colocalisation_feature_logic diff --git a/docs/python_api/datasets/l2g_features/distance.md b/docs/python_api/datasets/l2g_features/distance.md new file mode 100644 index 000000000..e426b2952 --- /dev/null +++ b/docs/python_api/datasets/l2g_features/distance.md @@ -0,0 +1,19 @@ +--- +title: From distance +--- + +## List of features + +::: gentropy.dataset.l2g_features.distance.DistanceSentinelTssFeature +::: gentropy.dataset.l2g_features.distance.DistanceSentinelTssNeighbourhoodFeature +::: gentropy.dataset.l2g_features.distance.DistanceTssMeanFeature +::: gentropy.dataset.l2g_features.distance.DistanceTssMeanNeighbourhoodFeature +::: gentropy.dataset.l2g_features.distance.DistanceSentinelFootprintFeature +::: gentropy.dataset.l2g_features.distance.DistanceSentinelFootprintNeighbourhoodFeature +::: gentropy.dataset.l2g_features.distance.DistanceFootprintMeanFeature +::: gentropy.dataset.l2g_features.distance.DistanceFootprintMeanNeighbourhoodFeature + +## Common logic + +::: gentropy.dataset.l2g_features.distance.common_distance_feature_logic +::: gentropy.dataset.l2g_features.distance.common_neighbourhood_distance_feature_logic diff --git a/docs/python_api/datasets/variant_to_gene.md b/docs/python_api/datasets/variant_to_gene.md deleted file mode 100644 index 2af67df92..000000000 --- a/docs/python_api/datasets/variant_to_gene.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: Variant-to-gene ---- - -::: gentropy.dataset.v2g.V2G - -## Schema - ---8<-- "assets/schemas/v2g.md" diff --git a/docs/python_api/methods/l2g/_l2g.md b/docs/python_api/methods/l2g/_l2g.md index fca3ba79d..bbd7dad66 100644 --- a/docs/python_api/methods/l2g/_l2g.md +++ b/docs/python_api/methods/l2g/_l2g.md @@ -9,13 +9,10 @@ The **“locus-to-gene” (L2G)** model derives features to prioritize likely ca - **Chromatin Interaction:** (e.g., promoter-capture Hi-C) - **Variant Pathogenicity:** (from VEP) -The L2G model is distinct from the variant-to-gene (V2G) pipeline in that it: - -- Uses a machine-learning model to learn the weights of each evidence source based on a gold standard of previously identified causal genes. -- Relies upon fine-mapping and colocalization data. - Some of the predictive features weight variant-to-gene (or genomic region-to-gene) evidence based on the posterior probability that the variant is causal, determined through fine-mapping of the GWAS association. +For a more detailed description of how each feature is computed, see [the L2G Feature documentation](../../datasets/l2g_features/_l2g_feature.md). + Details of the L2G model are provided in our Nature Genetics publication (ref - [Nature Genetics Publication](https://www.nature.com/articles/s41588-021-00945-5)): - **Title:** An open approach to systematically prioritize causal variants and genes at all published human GWAS trait-associated loci. diff --git a/docs/python_api/steps/variant_to_gene_step.md b/docs/python_api/steps/variant_to_gene_step.md deleted file mode 100644 index db1c1fd20..000000000 --- a/docs/python_api/steps/variant_to_gene_step.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -title: variant_to_gene ---- - -::: gentropy.variant_to_gene.V2GStep diff --git a/notebooks/Release_QC_metrics.ipynb b/notebooks/Release_QC_metrics.ipynb index aa0924711..4eb27015b 100644 --- a/notebooks/Release_QC_metrics.ipynb +++ b/notebooks/Release_QC_metrics.ipynb @@ -13,21 +13,17 @@ "1. Import necessary modules and set up the release path and version.\n", "2. Load and analyze the variant index data:\n", " - Count the number of unique variants.\n", - "3. Load and analyze the variant-to-gene (v2g) data:\n", - " - Count the number of unique variants and total variant-to-gene assignments.\n", - " - Count the number of v2g assignments where the score is > 0.8.\n", - " - Plot a histogram/density plot for the \"score\" column.\n", - "4. Load and analyze the study index data for different data sources (FinnGen, GWASCat, eQTLcat):\n", + "3. Load and analyze the study index data for different data sources (FinnGen, GWASCat, eQTLcat):\n", " - Count the number of unique studies for each data source.\n", - "5. Analyze the credible sets for each datasource (Finngen, gwascat, eqtlcat):\n", + "4. Analyze the credible sets for each datasource (Finngen, gwascat, eqtlcat):\n", " - Analyze the credible sets:\n", " - Count the number of unique credible sets and unique study IDs.\n", " - Plot a scatter plot of the credible set size vs. the top posterior probability.\n", " - Count the number of credible sets with a top SNP posterior probability > 0.9..\n", - "6. Analyze colocalization data:\n", + "5. Analyze colocalization data:\n", " - Count the total number of colocalizations and the number with clpp > 0.8.\n", " - Calculate the average number of overlaps per credible set.\n", - "7. Analyze locus-to-gene (L2G) predictions:\n", + "6. Analyze locus-to-gene (L2G) predictions:\n", " - Load the locus-to-gene predictions data.\n", " - How many Studylocus contains a \"good\" l2g prediction? (l2g_score > 0.5)\n", " - How does l2g perform based on different datasource inputs? (impossible to tell)\n", @@ -126,79 +122,6 @@ "#variant_index.filter(variant_index[\"alleleFrequencies.populationName\"] > 0.05).show(10, False)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "#### 3. Load and analyze the variant-to-gene (v2g) data:\n", - " - Count the number of unique variants and total variant-to-gene assignments.\n", - " - Count the number of v2g assignments where the score is > 0.8." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Unique variants in v2g release: 5090991 , total variant to gene assignments: 105771851 , number of v2g assignments where score > 0.8: 23176515 ( 4.552 %)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Summary of v2g_score: Mean: 0.5909395615801637 L.quart: 0.29 Median: 0.62 U.quart: 0.94\n" - ] - } - ], - "source": [ - "#v2g_path='gs://genetics_etl_python_playground/releases/24.03/variant_to_gene'\n", - "v2g_path=f\"{release_path}/{release_ver}/variant_to_gene\"\n", - "v2g=session.spark.read.parquet(v2g_path, recursiveFileLookup=True)\n", - "\n", - "#How many variants?\n", - "sample_size_quartiles = v2g.stat.approxQuantile(\"score\", [0.25, 0.5, 0.75], 0.01)\n", - "#v2g.select().toPandas().plot.hist()\n", - "#v2g.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " - Plot a histogram/density plot for the \"score\" column." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "#The histogram/density plot for “score”\n", - "# Out of mem error:\n", - "#v2g.select(f.col(\"score\")).toPandas().plot.hist(bins=10, alpha=0.5, label=\"v2g scores\")" - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/src/gentropy/assets/schemas/v2g.json b/src/gentropy/assets/schemas/v2g.json deleted file mode 100644 index afbe401dd..000000000 --- a/src/gentropy/assets/schemas/v2g.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "type": "struct", - "fields": [ - { - "name": "geneId", - "type": "string", - "nullable": false, - "metadata": {} - }, - { - "name": "variantId", - "type": "string", - "nullable": false, - "metadata": {} - }, - { - "name": "distance", - "type": "long", - "nullable": true, - "metadata": {} - }, - { - "name": "chromosome", - "type": "string", - "nullable": false, - "metadata": {} - }, - { - "name": "datatypeId", - "type": "string", - "nullable": false, - "metadata": {} - }, - { - "name": "datasourceId", - "type": "string", - "nullable": false, - "metadata": {} - }, - { - "name": "score", - "type": "double", - "nullable": true, - "metadata": {} - }, - { - "name": "resourceScore", - "type": "double", - "nullable": true, - "metadata": {} - }, - { - "name": "pmid", - "type": "string", - "nullable": true, - "metadata": {} - }, - { - "name": "biofeature", - "type": "string", - "nullable": true, - "metadata": {} - }, - { - "name": "variantFunctionalConsequenceId", - "type": "string", - "nullable": true, - "metadata": {} - }, - { - "name": "isHighQualityPlof", - "type": "boolean", - "nullable": true, - "metadata": {} - } - ] -} diff --git a/src/gentropy/common/spark_helpers.py b/src/gentropy/common/spark_helpers.py index 4d24212a2..8f60956e7 100644 --- a/src/gentropy/common/spark_helpers.py +++ b/src/gentropy/common/spark_helpers.py @@ -6,7 +6,7 @@ import sys from functools import reduce, wraps from itertools import chain -from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, Optional, TypeVar +from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, TypeVar import pyspark.sql.functions as f import pyspark.sql.types as t @@ -447,14 +447,14 @@ def order_array_of_structs_by_two_fields( ) -def map_column_by_dictionary(col: Column, mapping_dict: Dict[str, str]) -> Column: +def map_column_by_dictionary(col: Column, mapping_dict: dict[str, str]) -> Column: """Map column values to dictionary values by key. Missing consequence label will be converted to None, unmapped consequences will be mapped as None. Args: col (Column): Column containing labels to map. - mapping_dict (Dict[str, str]): Dictionary with mapping key/value pairs. + mapping_dict (dict[str, str]): Dictionary with mapping key/value pairs. Returns: Column: Column with mapped values. diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 0a1f9438a..6f94cc9ed 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -2,7 +2,7 @@ import os from dataclasses import dataclass, field -from typing import Any, Dict, List +from typing import Any, List from hail import __file__ as hail_location from hydra.core.config_store import ConfigStore @@ -235,7 +235,7 @@ class LocusToGeneConfig(StepConfig): run_mode: str = MISSING predictions_path: str = MISSING credible_set_path: str = MISSING - variant_gene_path: str = MISSING + variant_index_path: str = MISSING colocalisation_path: str = MISSING study_index_path: str = MISSING model_path: str | None = None @@ -254,6 +254,16 @@ class LocusToGeneConfig(StepConfig): "pQtlColocH4Maximum", "sQtlColocH4Maximum", "tuQtlColocH4Maximum", + # distance to gene footprint + "distanceSentinelFootprint", + "distanceSentinelFootprintNeighbourhood", + "distanceFootprintMean", + "distanceFootprintMeanNeighbourhood", + # distance to gene tss + "distanceTssMean", + "distanceTssMeanNeighbourhood", + "distanceSentinelTss", + "distanceSentinelTssNeighbourhood", ] ) hyperparameters: dict[str, Any] = field( @@ -357,38 +367,6 @@ class ConvertToVcfStepConfig(StepConfig): _target_: str = "gentropy.variant_index.ConvertToVcfStep" -@dataclass -class VariantToGeneConfig(StepConfig): - """V2G step configuration.""" - - variant_index_path: str = MISSING - gene_index_path: str = MISSING - vep_consequences_path: str = MISSING - liftover_chain_file_path: str = MISSING - liftover_max_length_difference: int = 100 - max_distance: int = 500_000 - approved_biotypes: List[str] = field( - default_factory=lambda: [ - "protein_coding", - "3prime_overlapping_ncRNA", - "antisense", - "bidirectional_promoter_lncRNA", - "IG_C_gene", - "IG_D_gene", - "IG_J_gene", - "IG_V_gene", - "lincRNA", - "macro_lncRNA", - "non_coding", - "sense_intronic", - "sense_overlapping", - ] - ) - interval_sources: Dict[str, str] = field(default_factory=dict) - v2g_path: str = MISSING - _target_: str = "gentropy.variant_to_gene.V2GStep" - - @dataclass class LocusBreakerClumpingConfig(StepConfig): """Locus breaker clumping step configuration.""" @@ -565,7 +543,6 @@ def register_config() -> None: cs.store(group="step", name="ukb_ppp_eur_sumstat_preprocess", node=UkbPppEurConfig) cs.store(group="step", name="variant_index", node=VariantIndexConfig) cs.store(group="step", name="variant_to_vcf", node=ConvertToVcfStepConfig) - cs.store(group="step", name="variant_to_gene", node=VariantToGeneConfig) cs.store( group="step", name="window_based_clumping", node=WindowBasedClumpingStepConfig ) diff --git a/src/gentropy/dataset/colocalisation.py b/src/gentropy/dataset/colocalisation.py index 9e9035488..c85209462 100644 --- a/src/gentropy/dataset/colocalisation.py +++ b/src/gentropy/dataset/colocalisation.py @@ -18,6 +18,7 @@ from pyspark.sql.types import StructType from gentropy.dataset.study_index import StudyIndex + from gentropy.dataset.study_locus import StudyLocus from functools import reduce diff --git a/src/gentropy/dataset/intervals.py b/src/gentropy/dataset/intervals.py index c3b9136c9..37158810b 100644 --- a/src/gentropy/dataset/intervals.py +++ b/src/gentropy/dataset/intervals.py @@ -1,22 +1,19 @@ """Interval dataset.""" + from __future__ import annotations from dataclasses import dataclass from typing import TYPE_CHECKING -import pyspark.sql.functions as f - from gentropy.common.Liftover import LiftOverSpark from gentropy.common.schemas import parse_spark_schema from gentropy.dataset.dataset import Dataset from gentropy.dataset.gene_index import GeneIndex -from gentropy.dataset.v2g import V2G if TYPE_CHECKING: from pyspark.sql import SparkSession from pyspark.sql.types import StructType - from gentropy.dataset.variant_index import VariantIndex @dataclass @@ -74,32 +71,3 @@ def from_source( source_class = source_to_class[source_name] data = source_class.read(spark, source_path) # type: ignore return source_class.parse(data, gene_index, lift) # type: ignore - - def v2g(self: Intervals, variant_index: VariantIndex) -> V2G: - """Convert intervals into V2G by intersecting with a variant index. - - Args: - variant_index (VariantIndex): Variant index dataset - - Returns: - V2G: Variant-to-gene evidence dataset - """ - return V2G( - _df=( - self.df.alias("interval") - .join( - variant_index.df.selectExpr( - "chromosome as vi_chromosome", "variantId", "position" - ).alias("vi"), - on=[ - f.col("vi.vi_chromosome") == f.col("interval.chromosome"), - f.col("vi.position").between( - f.col("interval.start"), f.col("interval.end") - ), - ], - how="inner", - ) - .drop("start", "end", "vi_chromosome", "position") - ), - _schema=V2G.get_schema(), - ) diff --git a/src/gentropy/dataset/l2g_feature.py b/src/gentropy/dataset/l2g_feature.py deleted file mode 100644 index 319570cfd..000000000 --- a/src/gentropy/dataset/l2g_feature.py +++ /dev/null @@ -1,506 +0,0 @@ -"""L2G Feature Dataset with a collection of methods that extract features from the gentropy datasets to be fed in L2G.""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import TYPE_CHECKING, Any - -import pyspark.sql.functions as f - -from gentropy.common.schemas import parse_spark_schema -from gentropy.common.spark_helpers import convert_from_wide_to_long -from gentropy.dataset.colocalisation import Colocalisation -from gentropy.dataset.dataset import Dataset -from gentropy.dataset.l2g_gold_standard import L2GGoldStandard -from gentropy.dataset.study_index import StudyIndex -from gentropy.dataset.study_locus import StudyLocus -from gentropy.dataset.v2g import V2G - -if TYPE_CHECKING: - from pyspark.sql import DataFrame - from pyspark.sql.types import StructType - - -@dataclass -class L2GFeature(Dataset, ABC): - """Locus-to-gene feature dataset.""" - - def __post_init__( - self: L2GFeature, - feature_dependency_type: Any = None, - credible_set: StudyLocus | None = None, - ) -> None: - """Initializes a L2GFeature dataset. Any child class of L2GFeature must implement the `compute` method. - - Args: - feature_dependency_type (Any): The dependency that the L2GFeature dataset depends on. Defaults to None. - credible_set (StudyLocus | None): The credible set that the L2GFeature dataset is based on. Defaults to None. - """ - super().__post_init__() - self.feature_dependency_type = feature_dependency_type - self.credible_set = credible_set - - @classmethod - def get_schema(cls: type[L2GFeature]) -> StructType: - """Provides the schema for the L2GFeature dataset. - - Returns: - StructType: Schema for the L2GFeature dataset - """ - return parse_spark_schema("l2g_feature.json") - - @classmethod - @abstractmethod - def compute( - cls: type[L2GFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: Any, - ) -> L2GFeature: - """Computes the L2GFeature dataset. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (Any): The dependency that the L2GFeature class needs to compute the feature - Returns: - L2GFeature: a L2GFeature dataset - - Raises: - NotImplementedError: This method must be implemented in the child classes - """ - raise NotImplementedError("Must be implemented in the child classes") - - -def _common_colocalisation_feature_logic( - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - colocalisation_method: str, - colocalisation_metric: str, - feature_name: str, - qtl_type: str, - *, - colocalisation: Colocalisation, - study_index: StudyIndex, - study_locus: StudyLocus, -) -> DataFrame: - """Wrapper to call the logic that creates a type of colocalisation features. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - colocalisation_method (str): The colocalisation method to filter the data by - colocalisation_metric (str): The colocalisation metric to use - feature_name (str): The name of the feature to create - qtl_type (str): The type of QTL to filter the data by - colocalisation (Colocalisation): Dataset with the colocalisation results - study_index (StudyIndex): Study index to fetch study type and gene - study_locus (StudyLocus): Study locus to traverse between colocalisation and study index - - Returns: - DataFrame: Feature annotation in long format with the columns: studyLocusId, geneId, featureName, featureValue - """ - joining_cols = ( - ["studyLocusId", "geneId"] - if isinstance(study_loci_to_annotate, L2GGoldStandard) - else ["studyLocusId"] - ) - return convert_from_wide_to_long( - study_loci_to_annotate.df.join( - colocalisation.extract_maximum_coloc_probability_per_region_and_gene( - study_locus, - study_index, - filter_by_colocalisation_method=colocalisation_method, - filter_by_qtl=qtl_type, - ), - on=joining_cols, - ) - .selectExpr( - "studyLocusId", - "geneId", - f"{colocalisation_metric} as {feature_name}", - ) - .distinct(), - id_vars=("studyLocusId", "geneId"), - var_name="featureName", - value_name="featureValue", - ) - - -class EQtlColocClppMaximumFeature(L2GFeature): - """Max CLPP for each (study, locus, gene) aggregating over all eQTLs.""" - - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] - feature_name = "eQtlColocClppMaximum" - - @classmethod - def compute( - cls: type[EQtlColocClppMaximumFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: dict[str, Any], - ) -> EQtlColocClppMaximumFeature: - """Computes the feature. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (dict[str, Any]): Dictionary with the dependencies required. They are passed as keyword arguments. - - Returns: - EQtlColocClppMaximumFeature: Feature dataset - """ - colocalisation_method = "ECaviar" - colocalisation_metric = "clpp" - qtl_type = "eqtl" - - return cls( - _df=_common_colocalisation_feature_logic( - study_loci_to_annotate, - colocalisation_method, - colocalisation_metric, - cls.feature_name, - qtl_type, - **feature_dependency, - ), - _schema=cls.get_schema(), - ) - - -class PQtlColocClppMaximumFeature(L2GFeature): - """Max CLPP for each (study, locus, gene) aggregating over all pQTLs.""" - - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] - feature_name = "pQtlColocClppMaximum" - - @classmethod - def compute( - cls: type[PQtlColocClppMaximumFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: dict[str, Any], - ) -> PQtlColocClppMaximumFeature: - """Computes the feature. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (dict[str, Any]): Dataset with the colocalisation results - - Returns: - PQtlColocClppMaximumFeature: Feature dataset - """ - colocalisation_method = "ECaviar" - colocalisation_metric = "clpp" - qtl_type = "pqtl" - return cls( - _df=_common_colocalisation_feature_logic( - study_loci_to_annotate, - colocalisation_method, - colocalisation_metric, - cls.feature_name, - qtl_type, - **feature_dependency, - ), - _schema=cls.get_schema(), - ) - - -class SQtlColocClppMaximumFeature(L2GFeature): - """Max CLPP for each (study, locus, gene) aggregating over all sQTLs.""" - - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] - feature_name = "sQtlColocClppMaximum" - - @classmethod - def compute( - cls: type[SQtlColocClppMaximumFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: dict[str, Any], - ) -> SQtlColocClppMaximumFeature: - """Computes the feature. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (dict[str, Any]): Dataset with the colocalisation results - - Returns: - SQtlColocClppMaximumFeature: Feature dataset - """ - colocalisation_method = "ECaviar" - colocalisation_metric = "clpp" - qtl_type = "sqtl" - return cls( - _df=_common_colocalisation_feature_logic( - study_loci_to_annotate, - colocalisation_method, - colocalisation_metric, - cls.feature_name, - qtl_type, - **feature_dependency, - ), - _schema=cls.get_schema(), - ) - - -class TuQtlColocClppMaximumFeature(L2GFeature): - """Max CLPP for each (study, locus, gene) aggregating over all tuQTLs.""" - - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] - feature_name = "tuQtlColocClppMaximum" - - @classmethod - def compute( - cls: type[TuQtlColocClppMaximumFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: dict[str, Any], - ) -> TuQtlColocClppMaximumFeature: - """Computes the feature. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (dict[str, Any]): Dataset with the colocalisation results - - Returns: - TuQtlColocClppMaximumFeature: Feature dataset - """ - colocalisation_method = "ECaviar" - colocalisation_metric = "clpp" - qtl_type = "tuqtl" - return cls( - _df=_common_colocalisation_feature_logic( - study_loci_to_annotate, - colocalisation_method, - colocalisation_metric, - cls.feature_name, - qtl_type, - **feature_dependency, - ), - _schema=cls.get_schema(), - ) - - -class EQtlColocH4MaximumFeature(L2GFeature): - """Max CLPP for each (study, locus, gene) aggregating over all eQTLs.""" - - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] - feature_name = "eQtlColocH4Maximum" - - @classmethod - def compute( - cls: type[EQtlColocH4MaximumFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: dict[str, Any], - ) -> EQtlColocH4MaximumFeature: - """Computes the feature. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (dict[str, Any]): Dataset with the colocalisation results - - Returns: - EQtlColocH4MaximumFeature: Feature dataset - """ - colocalisation_method = "Coloc" - colocalisation_metric = "h4" - qtl_type = "eqtl" - return cls( - _df=_common_colocalisation_feature_logic( - study_loci_to_annotate, - colocalisation_method, - colocalisation_metric, - cls.feature_name, - qtl_type, - **feature_dependency, - ), - _schema=cls.get_schema(), - ) - - -class PQtlColocH4MaximumFeature(L2GFeature): - """Max CLPP for each (study, locus, gene) aggregating over all pQTLs.""" - - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] - feature_name = "pQtlColocH4Maximum" - - @classmethod - def compute( - cls: type[PQtlColocH4MaximumFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: dict[str, Any], - ) -> PQtlColocH4MaximumFeature: - """Computes the feature. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (dict[str, Any]): Dataset with the colocalisation results - - Returns: - PQtlColocH4MaximumFeature: Feature dataset - """ - colocalisation_method = "Coloc" - colocalisation_metric = "h4" - qtl_type = "pqtl" - return cls( - _df=_common_colocalisation_feature_logic( - study_loci_to_annotate, - colocalisation_method, - colocalisation_metric, - cls.feature_name, - qtl_type, - **feature_dependency, - ), - _schema=cls.get_schema(), - ) - - -class SQtlColocH4MaximumFeature(L2GFeature): - """Max CLPP for each (study, locus, gene) aggregating over all sQTLs.""" - - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] - feature_name = "sQtlColocH4Maximum" - - @classmethod - def compute( - cls: type[SQtlColocH4MaximumFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: dict[str, Any], - ) -> SQtlColocH4MaximumFeature: - """Computes the feature. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (dict[str, Any]): Dataset with the colocalisation results - - Returns: - SQtlColocH4MaximumFeature: Feature dataset - """ - colocalisation_method = "Coloc" - colocalisation_metric = "h4" - qtl_type = "sqtl" - return cls( - _df=_common_colocalisation_feature_logic( - study_loci_to_annotate, - colocalisation_method, - colocalisation_metric, - cls.feature_name, - qtl_type, - **feature_dependency, - ), - _schema=cls.get_schema(), - ) - - -class TuQtlColocH4MaximumFeature(L2GFeature): - """Max H4 for each (study, locus, gene) aggregating over all tuQTLs.""" - - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] - feature_name = "tuQtlColocH4Maximum" - - @classmethod - def compute( - cls: type[TuQtlColocH4MaximumFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: dict[str, Any], - ) -> TuQtlColocH4MaximumFeature: - """Computes the feature. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (dict[str, Any]): Dataset with the colocalisation results - - Returns: - TuQtlColocH4MaximumFeature: Feature dataset - """ - colocalisation_method = "Coloc" - colocalisation_metric = "h4" - qtl_type = "tuqtl" - return cls( - _df=_common_colocalisation_feature_logic( - study_loci_to_annotate, - colocalisation_method, - colocalisation_metric, - cls.feature_name, - qtl_type, - **feature_dependency, - ), - _schema=cls.get_schema(), - ) - - -class DistanceTssMinimumFeature(L2GFeature): - """Minimum distance of all tagging variants to gene TSS.""" - - @classmethod - def compute( - cls: type[DistanceTssMinimumFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: V2G, - ) -> L2GFeature: - """Computes the feature. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (V2G): Dataset that contains the distance information - - Returns: - L2GFeature: Feature dataset - - Raises: - NotImplementedError: Not implemented - """ - raise NotImplementedError - - -class DistanceTssMeanFeature(L2GFeature): - """Average distance of all tagging variants to gene TSS. - - NOTE: to be rewritten taking variant index as input - """ - - fill_na_value = 500_000 - feature_dependency_type = V2G - - @classmethod - def compute( - cls: type[DistanceTssMeanFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: V2G, - ) -> DistanceTssMeanFeature: - """Computes the feature. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (V2G): Dataset that contains the distance information - - Returns: - DistanceTssMeanFeature: Feature dataset - """ - agg_expr = f.mean("weightedScore").alias("distanceTssMean") - # Everything but expresion is common logic - v2g = feature_dependency.df.filter(f.col("datasourceId") == "canonical_tss") - wide_df = ( - study_loci_to_annotate.df.withColumn( - "variantInLocus", f.explode_outer("locus") - ) - .select( - "studyLocusId", - f.col("variantInLocus.variantId").alias("variantInLocusId"), - f.col("variantInLocus.posteriorProbability").alias( - "variantInLocusPosteriorProbability" - ), - ) - .join( - v2g.selectExpr("variantId as variantInLocusId", "geneId", "score"), - on="variantInLocusId", - how="inner", - ) - .withColumn( - "weightedScore", - f.col("score") * f.col("variantInLocusPosteriorProbability"), - ) - .groupBy("studyLocusId", "geneId") - .agg(agg_expr) - ) - return cls( - _df=convert_from_wide_to_long( - wide_df, - id_vars=("studyLocusId", "geneId"), - var_name="featureName", - value_name="featureValue", - ), - _schema=cls.get_schema(), - ) diff --git a/src/gentropy/dataset/l2g_features/__init__.py b/src/gentropy/dataset/l2g_features/__init__.py new file mode 100644 index 000000000..ce15cedfe --- /dev/null +++ b/src/gentropy/dataset/l2g_features/__init__.py @@ -0,0 +1,3 @@ +"""Feature factories for L2G.""" + +from __future__ import annotations diff --git a/src/gentropy/dataset/l2g_features/colocalisation.py b/src/gentropy/dataset/l2g_features/colocalisation.py new file mode 100644 index 000000000..c44573b72 --- /dev/null +++ b/src/gentropy/dataset/l2g_features/colocalisation.py @@ -0,0 +1,791 @@ +"""Collection of methods that extract features from the colocalisation datasets.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pyspark.sql.functions as f +from pyspark.sql import Window + +from gentropy.common.spark_helpers import convert_from_wide_to_long +from gentropy.dataset.colocalisation import Colocalisation +from gentropy.dataset.l2g_features.l2g_feature import L2GFeature +from gentropy.dataset.l2g_gold_standard import L2GGoldStandard +from gentropy.dataset.study_index import StudyIndex +from gentropy.dataset.study_locus import StudyLocus + +if TYPE_CHECKING: + from pyspark.sql import DataFrame + + +def common_colocalisation_feature_logic( + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + colocalisation_method: str, + colocalisation_metric: str, + feature_name: str, + qtl_type: str, + *, + colocalisation: Colocalisation, + study_index: StudyIndex, + study_locus: StudyLocus, +) -> DataFrame: + """Wrapper to call the logic that creates a type of colocalisation features. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + colocalisation_method (str): The colocalisation method to filter the data by + colocalisation_metric (str): The colocalisation metric to use + feature_name (str): The name of the feature to create + qtl_type (str): The type of QTL to filter the data by + colocalisation (Colocalisation): Dataset with the colocalisation results + study_index (StudyIndex): Study index to fetch study type and gene + study_locus (StudyLocus): Study locus to traverse between colocalisation and study index + + Returns: + DataFrame: Feature annotation in long format with the columns: studyLocusId, geneId, featureName, featureValue + """ + joining_cols = ( + ["studyLocusId", "geneId"] + if isinstance(study_loci_to_annotate, L2GGoldStandard) + else ["studyLocusId"] + ) + return ( + study_loci_to_annotate.df.join( + colocalisation.extract_maximum_coloc_probability_per_region_and_gene( + study_locus, + study_index, + filter_by_colocalisation_method=colocalisation_method, + filter_by_qtl=qtl_type, + ), + on=joining_cols, + ) + .selectExpr( + "studyLocusId", + "geneId", + f"{colocalisation_metric} as {feature_name}", + ) + .distinct() + ) + + +def common_neighbourhood_colocalisation_feature_logic( + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + colocalisation_method: str, + colocalisation_metric: str, + feature_name: str, + qtl_type: str, + *, + colocalisation: Colocalisation, + study_index: StudyIndex, + study_locus: StudyLocus, +) -> DataFrame: + """Wrapper to call the logic that creates a type of colocalisation features. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + colocalisation_method (str): The colocalisation method to filter the data by + colocalisation_metric (str): The colocalisation metric to use + feature_name (str): The name of the feature to create + qtl_type (str): The type of QTL to filter the data by + colocalisation (Colocalisation): Dataset with the colocalisation results + study_index (StudyIndex): Study index to fetch study type and gene + study_locus (StudyLocus): Study locus to traverse between colocalisation and study index + + Returns: + DataFrame: Feature annotation in long format with the columns: studyLocusId, geneId, featureName, featureValue + """ + # First maximum colocalisation score for each studylocus, gene + local_feature_name = feature_name.replace("Neighbourhood", "") + local_max = common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + local_feature_name, + qtl_type, + colocalisation=colocalisation, + study_index=study_index, + study_locus=study_locus, + ) + return ( + # Then compute maximum score in the vicinity (feature will be the same for any gene associated with a studyLocus) + local_max.withColumn( + "regional_maximum", + f.max(local_feature_name).over(Window.partitionBy("studyLocusId")), + ) + .withColumn(feature_name, f.col("regional_maximum") - f.col(local_feature_name)) + .drop("regional_maximum", local_feature_name) + ) + + +class EQtlColocClppMaximumFeature(L2GFeature): + """Max CLPP for each (study, locus, gene) aggregating over all eQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "eQtlColocClppMaximum" + + @classmethod + def compute( + cls: type[EQtlColocClppMaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> EQtlColocClppMaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dictionary with the dependencies required. They are passed as keyword arguments. + + Returns: + EQtlColocClppMaximumFeature: Feature dataset + """ + colocalisation_method = "ECaviar" + colocalisation_metric = "clpp" + qtl_type = "eqtl" + + return cls( + _df=convert_from_wide_to_long( + common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class EQtlColocClppMaximumNeighbourhoodFeature(L2GFeature): + """Max CLPP for each (study, locus) aggregating over all eQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "eQtlColocClppMaximumNeighbourhood" + + @classmethod + def compute( + cls: type[EQtlColocClppMaximumNeighbourhoodFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> EQtlColocClppMaximumNeighbourhoodFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dictionary with the dependencies required. They are passed as keyword arguments. + + Returns: + EQtlColocClppMaximumNeighbourhoodFeature: Feature dataset + """ + colocalisation_method = "ECaviar" + colocalisation_metric = "clpp" + qtl_type = "eqtl" + + return cls( + _df=convert_from_wide_to_long( + common_neighbourhood_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class PQtlColocClppMaximumFeature(L2GFeature): + """Max CLPP for each (study, locus, gene) aggregating over all pQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "pQtlColocClppMaximum" + + @classmethod + def compute( + cls: type[PQtlColocClppMaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> PQtlColocClppMaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + PQtlColocClppMaximumFeature: Feature dataset + """ + colocalisation_method = "ECaviar" + colocalisation_metric = "clpp" + qtl_type = "pqtl" + return cls( + _df=convert_from_wide_to_long( + common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class PQtlColocClppMaximumNeighbourhoodFeature(L2GFeature): + """Max CLPP for each (study, locus, gene) aggregating over all pQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "pQtlColocClppMaximumNeighbourhood" + + @classmethod + def compute( + cls: type[PQtlColocClppMaximumNeighbourhoodFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> PQtlColocClppMaximumNeighbourhoodFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + PQtlColocClppMaximumNeighbourhoodFeature: Feature dataset + """ + colocalisation_method = "ECaviar" + colocalisation_metric = "clpp" + qtl_type = "pqtl" + return cls( + _df=convert_from_wide_to_long( + common_neighbourhood_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class SQtlColocClppMaximumFeature(L2GFeature): + """Max CLPP for each (study, locus, gene) aggregating over all sQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "sQtlColocClppMaximum" + + @classmethod + def compute( + cls: type[SQtlColocClppMaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> SQtlColocClppMaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + SQtlColocClppMaximumFeature: Feature dataset + """ + colocalisation_method = "ECaviar" + colocalisation_metric = "clpp" + qtl_type = "sqtl" + return cls( + _df=convert_from_wide_to_long( + common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class SQtlColocClppMaximumNeighbourhoodFeature(L2GFeature): + """Max CLPP for each (study, locus, gene) aggregating over all sQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "sQtlColocClppMaximumNeighbourhood" + + @classmethod + def compute( + cls: type[SQtlColocClppMaximumNeighbourhoodFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> SQtlColocClppMaximumNeighbourhoodFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + SQtlColocClppMaximumNeighbourhoodFeature: Feature dataset + """ + colocalisation_method = "ECaviar" + colocalisation_metric = "clpp" + qtl_type = "sqtl" + return cls( + _df=convert_from_wide_to_long( + common_neighbourhood_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class TuQtlColocClppMaximumFeature(L2GFeature): + """Max CLPP for each (study, locus, gene) aggregating over all tuQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "tuQtlColocClppMaximum" + + @classmethod + def compute( + cls: type[TuQtlColocClppMaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> TuQtlColocClppMaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + TuQtlColocClppMaximumFeature: Feature dataset + """ + colocalisation_method = "ECaviar" + colocalisation_metric = "clpp" + qtl_type = "tuqtl" + return cls( + _df=convert_from_wide_to_long( + common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class TuQtlColocClppMaximumNeighbourhoodFeature(L2GFeature): + """Max CLPP for each (study, locus) aggregating over all tuQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "tuQtlColocClppMaximumNeighbourhood" + + @classmethod + def compute( + cls: type[TuQtlColocClppMaximumNeighbourhoodFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> TuQtlColocClppMaximumNeighbourhoodFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + TuQtlColocClppMaximumNeighbourhoodFeature: Feature dataset + """ + colocalisation_method = "ECaviar" + colocalisation_metric = "clpp" + qtl_type = "tuqtl" + return cls( + _df=convert_from_wide_to_long( + common_neighbourhood_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class EQtlColocH4MaximumFeature(L2GFeature): + """Max H4 for each (study, locus, gene) aggregating over all eQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "eQtlColocH4Maximum" + + @classmethod + def compute( + cls: type[EQtlColocH4MaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> EQtlColocH4MaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + EQtlColocH4MaximumFeature: Feature dataset + """ + colocalisation_method = "Coloc" + colocalisation_metric = "h4" + qtl_type = "eqtl" + return cls( + _df=convert_from_wide_to_long( + common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class EQtlColocH4MaximumNeighbourhoodFeature(L2GFeature): + """Max H4 for each (study, locus) aggregating over all eQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "eQtlColocH4MaximumNeighbourhood" + + @classmethod + def compute( + cls: type[EQtlColocH4MaximumNeighbourhoodFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> EQtlColocH4MaximumNeighbourhoodFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + EQtlColocH4MaximumNeighbourhoodFeature: Feature dataset + """ + colocalisation_method = "Coloc" + colocalisation_metric = "h4" + qtl_type = "eqtl" + return cls( + _df=convert_from_wide_to_long( + common_neighbourhood_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class PQtlColocH4MaximumFeature(L2GFeature): + """Max H4 for each (study, locus, gene) aggregating over all pQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "pQtlColocH4Maximum" + + @classmethod + def compute( + cls: type[PQtlColocH4MaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> PQtlColocH4MaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + PQtlColocH4MaximumFeature: Feature dataset + """ + colocalisation_method = "Coloc" + colocalisation_metric = "h4" + qtl_type = "pqtl" + return cls( + _df=convert_from_wide_to_long( + common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class PQtlColocH4MaximumNeighbourhoodFeature(L2GFeature): + """Max H4 for each (study, locus) aggregating over all pQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "pQtlColocH4MaximumNeighbourhood" + + @classmethod + def compute( + cls: type[PQtlColocH4MaximumNeighbourhoodFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> PQtlColocH4MaximumNeighbourhoodFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + PQtlColocH4MaximumNeighbourhoodFeature: Feature dataset + """ + colocalisation_method = "Coloc" + colocalisation_metric = "h4" + qtl_type = "pqtl" + return cls( + _df=convert_from_wide_to_long( + common_neighbourhood_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class SQtlColocH4MaximumFeature(L2GFeature): + """Max H4 for each (study, locus, gene) aggregating over all sQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "sQtlColocH4Maximum" + + @classmethod + def compute( + cls: type[SQtlColocH4MaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> SQtlColocH4MaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + SQtlColocH4MaximumFeature: Feature dataset + """ + colocalisation_method = "Coloc" + colocalisation_metric = "h4" + qtl_type = "sqtl" + return cls( + _df=convert_from_wide_to_long( + common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class SQtlColocH4MaximumNeighbourhoodFeature(L2GFeature): + """Max H4 for each (study, locus) aggregating over all sQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "sQtlColocH4MaximumNeighbourhood" + + @classmethod + def compute( + cls: type[SQtlColocH4MaximumNeighbourhoodFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> SQtlColocH4MaximumNeighbourhoodFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + SQtlColocH4MaximumNeighbourhoodFeature: Feature dataset + """ + colocalisation_method = "Coloc" + colocalisation_metric = "h4" + qtl_type = "sqtl" + return cls( + _df=convert_from_wide_to_long( + common_neighbourhood_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class TuQtlColocH4MaximumFeature(L2GFeature): + """Max H4 for each (study, locus, gene) aggregating over all tuQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "tuQtlColocH4Maximum" + + @classmethod + def compute( + cls: type[TuQtlColocH4MaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> TuQtlColocH4MaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + TuQtlColocH4MaximumFeature: Feature dataset + """ + colocalisation_method = "Coloc" + colocalisation_metric = "h4" + qtl_type = "tuqtl" + return cls( + _df=convert_from_wide_to_long( + common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class TuQtlColocH4MaximumNeighbourhoodFeature(L2GFeature): + """Max H4 for each (study, locus) aggregating over all tuQTLs.""" + + feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_name = "tuQtlColocH4MaximumNeighbourhood" + + @classmethod + def compute( + cls: type[TuQtlColocH4MaximumNeighbourhoodFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> TuQtlColocH4MaximumNeighbourhoodFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset with the colocalisation results + + Returns: + TuQtlColocH4MaximumNeighbourhoodFeature: Feature dataset + """ + colocalisation_method = "Coloc" + colocalisation_metric = "h4" + qtl_type = "tuqtl" + return cls( + _df=convert_from_wide_to_long( + common_colocalisation_feature_logic( + study_loci_to_annotate, + colocalisation_method, + colocalisation_metric, + cls.feature_name, + qtl_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) diff --git a/src/gentropy/dataset/l2g_features/distance.py b/src/gentropy/dataset/l2g_features/distance.py new file mode 100644 index 000000000..ea030108c --- /dev/null +++ b/src/gentropy/dataset/l2g_features/distance.py @@ -0,0 +1,422 @@ +"""Collection of methods that extract distance features from the variant index dataset.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pyspark.sql.functions as f +from pyspark.sql import Window + +from gentropy.common.spark_helpers import convert_from_wide_to_long +from gentropy.dataset.l2g_features.l2g_feature import L2GFeature +from gentropy.dataset.l2g_gold_standard import L2GGoldStandard +from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.variant_index import VariantIndex + +if TYPE_CHECKING: + from pyspark.sql import DataFrame + + +def common_distance_feature_logic( + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + *, + variant_index: VariantIndex, + feature_name: str, + distance_type: str, + genomic_window: int = 500_000, +) -> DataFrame: + """Calculate the distance feature that correlates a variant in a credible set with a gene. + + The distance is weighted by the posterior probability of the variant to factor in its contribution to the trait when we look at the average distance score for all variants in the credible set. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + variant_index (VariantIndex): The dataset containing distance to gene information + feature_name (str): The name of the feature + distance_type (str): The type of distance to gene + genomic_window (int): The maximum window size to consider + + Returns: + DataFrame: Feature dataset + """ + distances_dataset = variant_index.get_distance_to_gene(distance_type=distance_type) + if "Mean" in feature_name: + # Weighting by the SNP contribution is only applied when we are averaging all distances + distance_score_expr = ( + f.lit(genomic_window) - f.col(distance_type) + f.lit(1) + ) * f.col("posteriorProbability") + agg_expr = f.mean(f.col("distance_score")) + df = study_loci_to_annotate.df.withColumn( + "variantInLocus", f.explode_outer("locus") + ).select( + "studyLocusId", + f.col("variantInLocus.variantId").alias("variantId"), + f.col("variantInLocus.posteriorProbability").alias("posteriorProbability"), + ) + elif "Sentinel" in feature_name: + # For minimum distances we calculate the unweighted distance between the sentinel (lead) and the gene. This + distance_score_expr = f.lit(genomic_window) - f.col(distance_type) + f.lit(1) + agg_expr = f.first(f.col("distance_score")) + df = study_loci_to_annotate.df.select("studyLocusId", "variantId") + return ( + df.join( + distances_dataset.withColumnRenamed("targetId", "geneId"), + on="variantId", + how="inner", + ) + .withColumn("distance_score", f.log10(distance_score_expr)) + .groupBy("studyLocusId", "geneId") + .agg(agg_expr.alias(feature_name)) + ) + + +def common_neighbourhood_distance_feature_logic( + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + *, + variant_index: VariantIndex, + feature_name: str, + distance_type: str, + genomic_window: int = 500_000, +) -> DataFrame: + """Calculate the distance feature that correlates any variant in a credible set with any gene nearby the locus. The distance is weighted by the posterior probability of the variant to factor in its contribution to the trait. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + variant_index (VariantIndex): The dataset containing distance to gene information + feature_name (str): The name of the feature + distance_type (str): The type of distance to gene + genomic_window (int): The maximum window size to consider + + Returns: + DataFrame: Feature dataset + """ + local_feature_name = feature_name.replace("Neighbourhood", "") + # First compute mean distances to a gene + local_metric = common_distance_feature_logic( + study_loci_to_annotate, + feature_name=local_feature_name, + distance_type=distance_type, + variant_index=variant_index, + genomic_window=genomic_window, + ) + return ( + # Then compute mean distance in the vicinity (feature will be the same for any gene associated with a studyLocus) + local_metric.withColumn( + "regional_metric", + f.mean(f.col(local_feature_name)).over(Window.partitionBy("studyLocusId")), + ) + .withColumn(feature_name, f.col(local_feature_name) - f.col("regional_metric")) + .drop("regional_metric", local_feature_name) + ) + + +class DistanceTssMeanFeature(L2GFeature): + """Average distance of all tagging variants to gene TSS.""" + + fill_na_value = 500_000 + feature_dependency_type = VariantIndex + feature_name = "distanceTssMean" + + @classmethod + def compute( + cls: type[DistanceTssMeanFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> DistanceTssMeanFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset that contains the distance information + + Returns: + DistanceTssMeanFeature: Feature dataset + """ + distance_type = "distanceFromTss" + return cls( + _df=convert_from_wide_to_long( + common_distance_feature_logic( + study_loci_to_annotate, + feature_name=cls.feature_name, + distance_type=distance_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class DistanceTssMeanNeighbourhoodFeature(L2GFeature): + """Minimum mean distance to TSS for all genes in the vicinity of a studyLocus.""" + + fill_na_value = 500_000 + feature_dependency_type = VariantIndex + feature_name = "distanceTssMeanNeighbourhood" + + @classmethod + def compute( + cls: type[DistanceTssMeanNeighbourhoodFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> DistanceTssMeanNeighbourhoodFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset that contains the distance information + + Returns: + DistanceTssMeanNeighbourhoodFeature: Feature dataset + """ + distance_type = "distanceFromTss" + return cls( + _df=convert_from_wide_to_long( + common_neighbourhood_distance_feature_logic( + study_loci_to_annotate, + feature_name=cls.feature_name, + distance_type=distance_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class DistanceSentinelTssFeature(L2GFeature): + """Distance of the sentinel variant to gene TSS. This is not weighted by the causal probability.""" + + fill_na_value = 500_000 + feature_dependency_type = VariantIndex + feature_name = "distanceSentinelTss" + + @classmethod + def compute( + cls: type[DistanceSentinelTssFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> DistanceSentinelTssFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset that contains the distance information + + Returns: + DistanceSentinelTssFeature: Feature dataset + """ + distance_type = "distanceFromTss" + return cls( + _df=convert_from_wide_to_long( + common_distance_feature_logic( + study_loci_to_annotate, + feature_name=cls.feature_name, + distance_type=distance_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class DistanceSentinelTssNeighbourhoodFeature(L2GFeature): + """Distance between the sentinel variant and a gene TSS as a relation of the distnace with all the genes in the vicinity of a studyLocus. This is not weighted by the causal probability.""" + + fill_na_value = 500_000 + feature_dependency_type = VariantIndex + feature_name = "distanceSentinelTssNeighbourhood" + + @classmethod + def compute( + cls: type[DistanceSentinelTssNeighbourhoodFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> DistanceSentinelTssNeighbourhoodFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset that contains the distance information + + Returns: + DistanceSentinelTssNeighbourhoodFeature: Feature dataset + """ + distance_type = "distanceFromTss" + return cls( + _df=convert_from_wide_to_long( + common_neighbourhood_distance_feature_logic( + study_loci_to_annotate, + feature_name=cls.feature_name, + distance_type=distance_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class DistanceFootprintMeanFeature(L2GFeature): + """Average distance of all tagging variants to the footprint of a gene.""" + + fill_na_value = 500_000 + feature_dependency_type = VariantIndex + feature_name = "distanceFootprintMean" + + @classmethod + def compute( + cls: type[DistanceFootprintMeanFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> DistanceFootprintMeanFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset that contains the distance information + + Returns: + DistanceFootprintMeanFeature: Feature dataset + """ + distance_type = "distanceFromFootprint" + return cls( + _df=convert_from_wide_to_long( + common_distance_feature_logic( + study_loci_to_annotate, + feature_name=cls.feature_name, + distance_type=distance_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class DistanceFootprintMeanNeighbourhoodFeature(L2GFeature): + """Minimum mean distance to footprint for all genes in the vicinity of a studyLocus.""" + + fill_na_value = 500_000 + feature_dependency_type = VariantIndex + feature_name = "distanceFootprintMeanNeighbourhood" + + @classmethod + def compute( + cls: type[DistanceFootprintMeanNeighbourhoodFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> DistanceFootprintMeanNeighbourhoodFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset that contains the distance information + + Returns: + DistanceFootprintMeanNeighbourhoodFeature: Feature dataset + """ + distance_type = "distanceFromFootprint" + return cls( + _df=convert_from_wide_to_long( + common_neighbourhood_distance_feature_logic( + study_loci_to_annotate, + feature_name=cls.feature_name, + distance_type=distance_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class DistanceSentinelFootprintFeature(L2GFeature): + """Distance between the sentinel variant and the footprint of a gene.""" + + fill_na_value = 500_000 + feature_dependency_type = VariantIndex + feature_name = "distanceSentinelFootprintMinimum" + + @classmethod + def compute( + cls: type[DistanceSentinelFootprintFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> DistanceSentinelFootprintFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset that contains the distance information + + Returns: + DistanceSentinelFootprintFeature: Feature dataset + """ + distance_type = "distanceFromFootprint" + return cls( + _df=convert_from_wide_to_long( + common_distance_feature_logic( + study_loci_to_annotate, + feature_name=cls.feature_name, + distance_type=distance_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class DistanceSentinelFootprintNeighbourhoodFeature(L2GFeature): + """Distance between the sentinel variant and a gene footprint as a relation of the distnace with all the genes in the vicinity of a studyLocus. This is not weighted by the causal probability.""" + + fill_na_value = 500_000 + feature_dependency_type = VariantIndex + feature_name = "DistanceSentinelFootprintNeighbourhoodFeature" + + @classmethod + def compute( + cls: type[DistanceSentinelFootprintNeighbourhoodFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> DistanceSentinelFootprintNeighbourhoodFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset that contains the distance information + + Returns: + DistanceSentinelFootprintNeighbourhoodFeature: Feature dataset + """ + distance_type = "distanceFromFootprint" + return cls( + _df=convert_from_wide_to_long( + common_neighbourhood_distance_feature_logic( + study_loci_to_annotate, + feature_name=cls.feature_name, + distance_type=distance_type, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) diff --git a/src/gentropy/dataset/l2g_features/l2g_feature.py b/src/gentropy/dataset/l2g_features/l2g_feature.py new file mode 100644 index 000000000..7073ca758 --- /dev/null +++ b/src/gentropy/dataset/l2g_features/l2g_feature.py @@ -0,0 +1,65 @@ +"""L2G Feature Dataset with a collection of methods that extract features from the gentropy datasets to be fed in L2G.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +from gentropy.common.schemas import parse_spark_schema +from gentropy.dataset.dataset import Dataset + +if TYPE_CHECKING: + from pyspark.sql.types import StructType + + from gentropy.dataset.l2g_gold_standard import L2GGoldStandard + from gentropy.dataset.study_locus import StudyLocus + + +@dataclass +class L2GFeature(Dataset, ABC): + """Locus-to-gene feature dataset that serves as template to generate each of the features that inform about locus to gene assignments.""" + + def __post_init__( + self: L2GFeature, + feature_dependency_type: Any = None, + credible_set: StudyLocus | None = None, + ) -> None: + """Initializes a L2GFeature dataset. Any child class of L2GFeature must implement the `compute` method. + + Args: + feature_dependency_type (Any): The dependency that the L2GFeature dataset depends on. Defaults to None. + credible_set (StudyLocus | None): The credible set that the L2GFeature dataset is based on. Defaults to None. + """ + super().__post_init__() + self.feature_dependency_type = feature_dependency_type + self.credible_set = credible_set + + @classmethod + def get_schema(cls: type[L2GFeature]) -> StructType: + """Provides the schema for the L2GFeature dataset. + + Returns: + StructType: Schema for the L2GFeature dataset + """ + return parse_spark_schema("l2g_feature.json") + + @classmethod + @abstractmethod + def compute( + cls: type[L2GFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: Any, + ) -> L2GFeature: + """Computes the L2GFeature dataset. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (Any): The dependency that the L2GFeature class needs to compute the feature + Returns: + L2GFeature: a L2GFeature dataset + + Raises: + NotImplementedError: This method must be implemented in the child classes + """ + raise NotImplementedError("Must be implemented in the child classes") diff --git a/src/gentropy/dataset/l2g_gold_standard.py b/src/gentropy/dataset/l2g_gold_standard.py index 89f4c5f5d..064f6cc0e 100644 --- a/src/gentropy/dataset/l2g_gold_standard.py +++ b/src/gentropy/dataset/l2g_gold_standard.py @@ -18,7 +18,7 @@ from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix from gentropy.dataset.study_locus_overlap import StudyLocusOverlap - from gentropy.dataset.v2g import V2G + from gentropy.dataset.variant_index import VariantIndex @dataclass @@ -33,16 +33,16 @@ class L2GGoldStandard(Dataset): def from_otg_curation( cls: type[L2GGoldStandard], gold_standard_curation: DataFrame, - v2g: V2G, study_locus_overlap: StudyLocusOverlap, + variant_index: VariantIndex, interactions: DataFrame, ) -> L2GGoldStandard: """Initialise L2GGoldStandard from source dataset. Args: gold_standard_curation (DataFrame): Gold standard curation dataframe, extracted from - v2g (V2G): Variant to gene dataset to bring distance between a variant and a gene's TSS study_locus_overlap (StudyLocusOverlap): Study locus overlap dataset to remove duplicated loci + variant_index (VariantIndex): Dataset to bring distance between a variant and a gene's footprint interactions (DataFrame): Gene-gene interactions dataset to remove negative cases where the gene interacts with a positive gene Returns: @@ -55,7 +55,9 @@ def from_otg_curation( interactions_df = cls.process_gene_interactions(interactions) return ( - OpenTargetsL2GGoldStandard.as_l2g_gold_standard(gold_standard_curation, v2g) + OpenTargetsL2GGoldStandard.as_l2g_gold_standard( + gold_standard_curation, variant_index + ) # .filter_unique_associations(study_locus_overlap) .remove_false_negatives(interactions_df) ) diff --git a/src/gentropy/dataset/v2g.py b/src/gentropy/dataset/v2g.py deleted file mode 100644 index 04bad2113..000000000 --- a/src/gentropy/dataset/v2g.py +++ /dev/null @@ -1,51 +0,0 @@ -"""V2G dataset.""" -from __future__ import annotations - -from dataclasses import dataclass -from typing import TYPE_CHECKING - -import pyspark.sql.functions as f - -from gentropy.common.schemas import parse_spark_schema -from gentropy.dataset.dataset import Dataset - -if TYPE_CHECKING: - from pyspark.sql.types import StructType - - from gentropy.dataset.gene_index import GeneIndex - - -@dataclass -class V2G(Dataset): - """Variant-to-gene (V2G) evidence dataset. - - A variant-to-gene (V2G) evidence is understood as any piece of evidence that supports the association of a variant with a likely causal gene. The evidence can sometimes be context-specific and refer to specific `biofeatures` (e.g. cell types) - """ - - @classmethod - def get_schema(cls: type[V2G]) -> StructType: - """Provides the schema for the V2G dataset. - - Returns: - StructType: Schema for the V2G dataset - """ - return parse_spark_schema("v2g.json") - - def filter_by_genes(self: V2G, genes: GeneIndex) -> V2G: - """Filter V2G dataset by genes. - - Args: - genes (GeneIndex): Gene index dataset to filter by - - Returns: - V2G: V2G dataset filtered by genes - """ - self.df = self._df.join(genes.df.select("geneId"), on="geneId", how="inner") - return self - - def extract_distance_tss_minimum(self: V2G) -> None: - """Extract minimum distance to TSS.""" - self.df = self._df.filter(f.col("distance")).withColumn( - "distanceTssMinimum", - f.expr("min(distTss) OVER (PARTITION BY studyLocusId)"), - ) diff --git a/src/gentropy/dataset/variant_index.py b/src/gentropy/dataset/variant_index.py index 2f24cd985..4d53d741a 100644 --- a/src/gentropy/dataset/variant_index.py +++ b/src/gentropy/dataset/variant_index.py @@ -12,18 +12,15 @@ from gentropy.common.spark_helpers import ( get_nested_struct_schema, get_record_with_maximum_value, - normalise_column, rename_all_columns, safe_array_union, ) from gentropy.dataset.dataset import Dataset -from gentropy.dataset.v2g import V2G if TYPE_CHECKING: from pyspark.sql import Column, DataFrame from pyspark.sql.types import StructType - from gentropy.dataset.gene_index import GeneIndex @dataclass @@ -231,165 +228,106 @@ def filter_by_variant(self: VariantIndex, df: DataFrame) -> VariantIndex: _schema=self.schema, ) - def get_transcript_consequence_df( - self: VariantIndex, gene_index: GeneIndex | None = None - ) -> DataFrame: - """Dataframe of exploded transcript consequences. - - Optionally the trancript consequences can be reduced to the universe of a gene index. - - Args: - gene_index (GeneIndex | None): A gene index. Defaults to None. - - Returns: - DataFrame: A dataframe exploded by transcript consequences with the columns variantId, chromosome, transcriptConsequence - """ - # exploding the array removes records without VEP annotation - transript_consequences = self.df.withColumn( - "transcriptConsequence", f.explode("transcriptConsequences") - ).select( - "variantId", - "chromosome", - "position", - "transcriptConsequence", - f.col("transcriptConsequence.targetId").alias("geneId"), - ) - if gene_index: - transript_consequences = transript_consequences.join( - f.broadcast(gene_index.df), - on=["chromosome", "geneId"], - ) - return transript_consequences - - def get_distance_to_tss( + def get_distance_to_gene( self: VariantIndex, - gene_index: GeneIndex, + *, + distance_type: str = "distanceFromTss", max_distance: int = 500_000, - ) -> V2G: - """Extracts variant to gene assignments for variants falling within a window of a gene's TSS. + ) -> DataFrame: + """Extracts variant to gene assignments for variants falling within a window of a gene's TSS or footprint. Args: - gene_index (GeneIndex): A gene index to filter by. - max_distance (int): The maximum distance from the TSS to consider. Defaults to 500_000. + distance_type (str): The type of distance to use. Can be "distanceFromTss" or "distanceFromFootprint". Defaults to "distanceFromTss". + max_distance (int): The maximum distance to consider. Defaults to 500_000, the default window size for VEP. Returns: - V2G: variant to gene assignments with their distance to the TSS - """ - return V2G( - _df=( - self.df.alias("variant") - .join( - f.broadcast(gene_index.locations_lut()).alias("gene"), - on=[ - f.col("variant.chromosome") == f.col("gene.chromosome"), - f.abs(f.col("variant.position") - f.col("gene.tss")) - <= max_distance, - ], - how="inner", - ) - .withColumn( - "distance", f.abs(f.col("variant.position") - f.col("gene.tss")) - ) - .withColumn( - "inverse_distance", - max_distance - f.col("distance"), - ) - .transform(lambda df: normalise_column(df, "inverse_distance", "score")) - .select( - "variantId", - f.col("variant.chromosome").alias("chromosome"), - "distance", - "geneId", - "score", - f.lit("distance").alias("datatypeId"), - f.lit("canonical_tss").alias("datasourceId"), - ) - ), - _schema=V2G.get_schema(), - ) + DataFrame: A dataframe with the distance between a variant and a gene's TSS or footprint. - def get_plof_v2g(self: VariantIndex, gene_index: GeneIndex) -> V2G: - """Creates a dataset with variant to gene assignments with a flag indicating if the variant is predicted to be a loss-of-function variant by the LOFTEE algorithm. + Raises: + ValueError: Invalid distance type. + """ + if distance_type not in {"distanceFromTss", "distanceFromFootprint"}: + raise ValueError( + f"Invalid distance_type: {distance_type}. Must be 'distanceFromTss' or 'distanceFromFootprint'." + ) + df = self.df.select( + "variantId", f.explode("transcriptConsequences").alias("tc") + ).select("variantId", "tc.targetId", f"tc.{distance_type}") + if max_distance == 500_000: + return df + elif max_distance < 500_000: + return df.filter(f"{distance_type} <= {max_distance}") + else: + raise ValueError( + f"max_distance must be less than 500_000. Got {max_distance}." + ) - Optionally the trancript consequences can be reduced to the universe of a gene index. + def get_loftee(self: VariantIndex) -> DataFrame: + """Returns a dataframe with a flag indicating whether a variant is predicted to cause loss of function in a gene. The source of this information is the LOFTEE algorithm (https://github.com/konradjk/loftee). - Args: - gene_index (GeneIndex): A gene index to filter by. + !!! note, "This will return a filtered dataframe with only variants that have been annotated by LOFTEE." Returns: - V2G: variant to gene assignments from the LOFTEE algorithm + DataFrame: variant to gene assignments from the LOFTEE algorithm """ - return V2G( - _df=( - self.get_transcript_consequence_df(gene_index) - .filter(f.col("transcriptConsequence.lofteePrediction").isNotNull()) - .withColumn( - "isHighQualityPlof", - f.when( - f.col("transcriptConsequence.lofteePrediction") == "HC", True - ).when( - f.col("transcriptConsequence.lofteePrediction") == "LC", False - ), - ) - .withColumn( - "score", - f.when(f.col("isHighQualityPlof"), 1.0).when( - ~f.col("isHighQualityPlof"), 0 - ), - ) - .select( - "variantId", - "chromosome", - "geneId", - "isHighQualityPlof", - f.col("score"), - f.lit("vep").alias("datatypeId"), - f.lit("loftee").alias("datasourceId"), - ) - ), - _schema=V2G.get_schema(), + return ( + self.df.select("variantId", f.explode("transcriptConsequences").alias("tc")) + .filter(f.col("tc.lofteePrediction").isNotNull()) + .withColumn( + "isHighQualityPlof", + f.when(f.col("tc.lofteePrediction") == "HC", True).when( + f.col("tc.lofteePrediction") == "LC", False + ), + ) + .select( + "variantId", + f.col("tc.targetId"), + f.col("tc.lofteePrediction"), + "isHighQualityPlof", + ) ) - def get_most_severe_transcript_consequence( + def get_most_severe_gene_consequence( self: VariantIndex, + *, vep_consequences: DataFrame, - gene_index: GeneIndex, - ) -> V2G: - """Creates a dataset with variant to gene assignments based on VEP's predicted consequence of the transcript. - - Optionally the trancript consequences can be reduced to the universe of a gene index. + ) -> DataFrame: + """Returns a dataframe with the most severe consequence for a variant/gene pair. Args: vep_consequences (DataFrame): A dataframe of VEP consequences - gene_index (GeneIndex): A gene index to filter by. Defaults to None. Returns: - V2G: High and medium severity variant to gene assignments + DataFrame: A dataframe with the most severe consequence (plus a severity score) for a variant/gene pair """ - return V2G( - _df=self.get_transcript_consequence_df(gene_index) + return ( + self.df.select("variantId", f.explode("transcriptConsequences").alias("tc")) .select( "variantId", - "chromosome", - f.col("transcriptConsequence.targetId").alias("geneId"), - f.explode( - "transcriptConsequence.variantFunctionalConsequenceIds" - ).alias("variantFunctionalConsequenceId"), - f.lit("vep").alias("datatypeId"), - f.lit("variantConsequence").alias("datasourceId"), + f.col("tc.targetId"), + f.explode(f.col("tc.variantFunctionalConsequenceIds")).alias( + "variantFunctionalConsequenceId" + ), ) .join( - f.broadcast(vep_consequences), + # TODO: make this table a project config + f.broadcast( + vep_consequences.selectExpr( + "variantFunctionalConsequenceId", "score as severityScore" + ) + ), on="variantFunctionalConsequenceId", how="inner", ) - .drop("label") - .filter(f.col("score") != 0) - # A variant can have multiple predicted consequences on a transcript, the most severe one is selected + .filter(f.col("severityScore").isNull()) .transform( + # A variant can have multiple predicted consequences on a transcript, the most severe one is selected lambda df: get_record_with_maximum_value( - df, ["variantId", "geneId"], "score" + df, ["variantId", "targetId"], "severityScore" ) - ), - _schema=V2G.get_schema(), + ) + .withColumnRenamed( + "variantFunctionalConsequenceId", + "mostSevereVariantFunctionalConsequenceId", + ) ) diff --git a/src/gentropy/datasource/ensembl/vep_parser.py b/src/gentropy/datasource/ensembl/vep_parser.py index d84b58407..e3e36140d 100644 --- a/src/gentropy/datasource/ensembl/vep_parser.py +++ b/src/gentropy/datasource/ensembl/vep_parser.py @@ -3,7 +3,7 @@ from __future__ import annotations import importlib.resources as pkg_resources -from typing import TYPE_CHECKING, List +from typing import TYPE_CHECKING import pandas as pd from pyspark.sql import SparkSession @@ -529,14 +529,14 @@ def _collect_uniprot_accessions(trembl: Column, swissprot: Column) -> Column: ) @staticmethod - def _parse_variant_location_id(vep_input_field: Column) -> List[Column]: + def _parse_variant_location_id(vep_input_field: Column) -> list[Column]: r"""Parse variant identifier, chromosome, position, reference allele and alternate allele from VEP input field. Args: vep_input_field (Column): Column containing variant vcf string used as VEP input. Returns: - List[Column]: List of columns containing chromosome, position, reference allele and alternate allele. + list[Column]: List of columns containing chromosome, position, reference allele and alternate allele. """ variant_fields = f.split(vep_input_field, r"\t") return [ diff --git a/src/gentropy/datasource/open_targets/l2g_gold_standard.py b/src/gentropy/datasource/open_targets/l2g_gold_standard.py index 26d5a0253..21edcc201 100644 --- a/src/gentropy/datasource/open_targets/l2g_gold_standard.py +++ b/src/gentropy/datasource/open_targets/l2g_gold_standard.py @@ -9,7 +9,7 @@ from gentropy.dataset.l2g_gold_standard import L2GGoldStandard from gentropy.dataset.study_locus import StudyLocus -from gentropy.dataset.v2g import V2G +from gentropy.dataset.variant_index import VariantIndex class OpenTargetsL2GGoldStandard: @@ -60,7 +60,9 @@ def parse_positive_curation( @classmethod def expand_gold_standard_with_negatives( - cls: Type[OpenTargetsL2GGoldStandard], positive_set: DataFrame, v2g: V2G + cls: Type[OpenTargetsL2GGoldStandard], + positive_set: DataFrame, + variant_index: VariantIndex, ) -> DataFrame: """Create full set of positive and negative evidence of locus to gene associations. @@ -68,7 +70,7 @@ def expand_gold_standard_with_negatives( Args: positive_set (DataFrame): Positive set from curation - v2g (V2G): Variant to gene dataset to bring distance between a variant and a gene's TSS + variant_index (VariantIndex): Variant index to get distance to gene Returns: DataFrame: Full set of positive and negative evidence of locus to gene associations @@ -76,9 +78,13 @@ def expand_gold_standard_with_negatives( return ( positive_set.withColumnRenamed("geneId", "curated_geneId") .join( - v2g.df.selectExpr( - "variantId", "geneId as non_curated_geneId", "distance" - ).filter(f.col("distance") <= cls.LOCUS_TO_GENE_WINDOW), + variant_index.get_distance_to_gene() + .selectExpr( + "variantId", + "targetId as non_curated_geneId", + "distanceFromTss", + ) + .filter(f.col("distanceFromTss") <= cls.LOCUS_TO_GENE_WINDOW), on="variantId", how="left", ) @@ -86,7 +92,7 @@ def expand_gold_standard_with_negatives( "goldStandardSet", f.when( (f.col("curated_geneId") == f.col("non_curated_geneId")) - # to keep the positives that are outside the v2g dataset + # to keep the positives that are not part of the variant index | (f.col("non_curated_geneId").isNull()), f.lit(L2GGoldStandard.GS_POSITIVE_LABEL), ).otherwise(L2GGoldStandard.GS_NEGATIVE_LABEL), @@ -98,27 +104,27 @@ def expand_gold_standard_with_negatives( f.col("curated_geneId"), ).otherwise(f.col("non_curated_geneId")), ) - .drop("distance", "curated_geneId", "non_curated_geneId") + .drop("distanceFromTss", "curated_geneId", "non_curated_geneId") ) @classmethod def as_l2g_gold_standard( cls: type[OpenTargetsL2GGoldStandard], gold_standard_curation: DataFrame, - v2g: V2G, + variant_index: VariantIndex, ) -> L2GGoldStandard: """Initialise L2GGoldStandard from source dataset. Args: gold_standard_curation (DataFrame): Gold standard curation dataframe, extracted from https://github.com/opentargets/genetics-gold-standards - v2g (V2G): Variant to gene dataset to bring distance between a variant and a gene's TSS + variant_index (VariantIndex): Dataset to bring distance between a variant and a gene's footprint Returns: L2GGoldStandard: L2G Gold Standard dataset. False negatives have not yet been removed. """ return L2GGoldStandard( _df=cls.parse_positive_curation(gold_standard_curation).transform( - cls.expand_gold_standard_with_negatives, v2g + cls.expand_gold_standard_with_negatives, variant_index ), _schema=L2GGoldStandard.get_schema(), ) diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index ff8c6c8ff..9b9b7aa90 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -17,7 +17,7 @@ from gentropy.dataset.l2g_prediction import L2GPrediction from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus -from gentropy.dataset.v2g import V2G +from gentropy.dataset.variant_index import VariantIndex from gentropy.method.l2g.feature_factory import L2GFeatureInputLoader from gentropy.method.l2g.model import LocusToGeneModel from gentropy.method.l2g.trainer import LocusToGeneTrainer @@ -38,7 +38,7 @@ def __init__( model_path: str | None = None, credible_set_path: str, gold_standard_curation_path: str | None = None, - variant_gene_path: str | None = None, + variant_index_path: str | None = None, colocalisation_path: str | None = None, study_index_path: str | None = None, gene_interactions_path: str | None = None, @@ -59,7 +59,7 @@ def __init__( model_path (str | None): Path to the model. It can be either in the filesystem or the name on the Hugging Face Hub (in the form of username/repo_name). credible_set_path (str): Path to the credible set dataset necessary to build the feature matrix gold_standard_curation_path (str | None): Path to the gold standard curation file - variant_gene_path (str | None): Path to the variant-gene dataset + variant_index_path (str | None): Path to the variant index dataset colocalisation_path (str | None): Path to the colocalisation dataset study_index_path (str | None): Path to the study index dataset gene_interactions_path (str | None): Path to the gene interactions dataset @@ -96,8 +96,10 @@ def __init__( if study_index_path else None ) - self.v2g = ( - V2G.from_parquet(session, variant_gene_path) if variant_gene_path else None + self.variant_index = ( + VariantIndex.from_parquet(session, variant_index_path) + if variant_index_path + else None ) self.coloc = ( Colocalisation.from_parquet( @@ -107,7 +109,7 @@ def __init__( else None ) self.features_input_loader = L2GFeatureInputLoader( - v2g=self.v2g, + variant_index=self.variant_index, coloc=self.coloc, studies=self.studies, study_locus=self.credible_set, @@ -134,7 +136,7 @@ def run_predict(self) -> None: Raises: ValueError: If not all dependencies in prediction mode are set """ - if self.studies and self.v2g and self.coloc: + if self.studies and self.coloc: predictions = L2GPrediction.from_credible_set( self.session, self.credible_set, @@ -157,9 +159,9 @@ def run_train(self) -> None: if ( self.gs_curation and self.interactions - and self.v2g and self.wandb_run_name and self.model_path + and self.variant_index ): wandb_key = access_gcp_secret("wandb-key", "open-targets-genetics-dev") # Process gold standard and L2G features @@ -203,25 +205,30 @@ def _generate_feature_matrix(self, write_feature_matrix: bool) -> L2GFeatureMatr Raises: ValueError: If write_feature_matrix is set to True but a path is not provided or if dependencies to build features are not set. """ - if self.gs_curation and self.interactions and self.v2g: + if ( + self.gs_curation + and self.interactions + and self.studies + and self.variant_index + ): study_locus_overlap = StudyLocus( _df=self.credible_set.df.join( f.broadcast( - self.gs_curation - .withColumn( + self.gs_curation.withColumn( "variantId", f.concat_ws( - "_", - f.col("sentinel_variant.locus_GRCh38.chromosome"), - f.col("sentinel_variant.locus_GRCh38.position"), - f.col("sentinel_variant.alleles.reference"), - f.col("sentinel_variant.alleles.alternative"), - ) - ) - .select( + "_", + f.col("sentinel_variant.locus_GRCh38.chromosome"), + f.col("sentinel_variant.locus_GRCh38.position"), + f.col("sentinel_variant.alleles.reference"), + f.col("sentinel_variant.alleles.alternative"), + ), + ).select( StudyLocus.assign_study_locus_id( - ["association_info.otg_id", # studyId - "variantId"] + [ + "association_info.otg_id", # studyId + "variantId", + ] ), ) ), @@ -233,7 +240,7 @@ def _generate_feature_matrix(self, write_feature_matrix: bool) -> L2GFeatureMatr gold_standards = L2GGoldStandard.from_otg_curation( gold_standard_curation=self.gs_curation, - v2g=self.v2g, + variant_index=self.variant_index, study_locus_overlap=study_locus_overlap, interactions=self.interactions, ) diff --git a/src/gentropy/method/l2g/feature_factory.py b/src/gentropy/method/l2g/feature_factory.py index c0f0ef9b4..41084277f 100644 --- a/src/gentropy/method/l2g/feature_factory.py +++ b/src/gentropy/method/l2g/feature_factory.py @@ -4,17 +4,35 @@ from typing import Any, Iterator, Mapping -from gentropy.dataset.l2g_feature import ( +from gentropy.dataset.l2g_features.colocalisation import ( EQtlColocClppMaximumFeature, + EQtlColocClppMaximumNeighbourhoodFeature, EQtlColocH4MaximumFeature, - L2GFeature, + EQtlColocH4MaximumNeighbourhoodFeature, PQtlColocClppMaximumFeature, + PQtlColocClppMaximumNeighbourhoodFeature, PQtlColocH4MaximumFeature, + PQtlColocH4MaximumNeighbourhoodFeature, SQtlColocClppMaximumFeature, + SQtlColocClppMaximumNeighbourhoodFeature, SQtlColocH4MaximumFeature, + SQtlColocH4MaximumNeighbourhoodFeature, TuQtlColocClppMaximumFeature, + TuQtlColocClppMaximumNeighbourhoodFeature, TuQtlColocH4MaximumFeature, + TuQtlColocH4MaximumNeighbourhoodFeature, ) +from gentropy.dataset.l2g_features.distance import ( + DistanceFootprintMeanFeature, + DistanceFootprintMeanNeighbourhoodFeature, + DistanceSentinelFootprintFeature, + DistanceSentinelFootprintNeighbourhoodFeature, + DistanceSentinelTssFeature, + DistanceSentinelTssNeighbourhoodFeature, + DistanceTssMeanFeature, + DistanceTssMeanNeighbourhoodFeature, +) +from gentropy.dataset.l2g_features.l2g_feature import L2GFeature from gentropy.dataset.l2g_gold_standard import L2GGoldStandard from gentropy.dataset.study_locus import StudyLocus @@ -75,16 +93,30 @@ class FeatureFactory: """Factory class for creating features.""" feature_mapper: Mapping[str, type[L2GFeature]] = { - # "distanceTssMinimum": DistanceTssMinimumFeature, - # "distanceTssMean": DistanceTssMeanFeature, + "distanceSentinelTss": DistanceSentinelTssFeature, + "distanceSentinelTssNeighbourhood": DistanceSentinelTssNeighbourhoodFeature, + "distanceSentinelFootprint": DistanceSentinelFootprintFeature, + "distanceSentinelFootprintNeighbourhood": DistanceSentinelFootprintNeighbourhoodFeature, + "distanceTssMean": DistanceTssMeanFeature, + "distanceTssMeanNeighbourhood": DistanceTssMeanNeighbourhoodFeature, + "distanceFootprintMean": DistanceFootprintMeanFeature, + "distanceFootprintMeanNeighbourhood": DistanceFootprintMeanNeighbourhoodFeature, "eQtlColocClppMaximum": EQtlColocClppMaximumFeature, + "eQtlColocClppMaximumNeighbourhood": EQtlColocClppMaximumNeighbourhoodFeature, "pQtlColocClppMaximum": PQtlColocClppMaximumFeature, + "pQtlColocClppMaximumNeighbourhood": PQtlColocClppMaximumNeighbourhoodFeature, "sQtlColocClppMaximum": SQtlColocClppMaximumFeature, + "sQtlColocClppMaximumNeighbourhood": SQtlColocClppMaximumNeighbourhoodFeature, "tuQtlColocClppMaximum": TuQtlColocClppMaximumFeature, + "tuQtlColocClppMaximumNeighbourhood": TuQtlColocClppMaximumNeighbourhoodFeature, "eQtlColocH4Maximum": EQtlColocH4MaximumFeature, + "eQtlColocH4MaximumNeighbourhood": EQtlColocH4MaximumNeighbourhoodFeature, "pQtlColocH4Maximum": PQtlColocH4MaximumFeature, + "pQtlColocH4MaximumNeighbourhood": PQtlColocH4MaximumNeighbourhoodFeature, "sQtlColocH4Maximum": SQtlColocH4MaximumFeature, + "sQtlColocH4MaximumNeighbourhood": SQtlColocH4MaximumNeighbourhoodFeature, "tuQtlColocH4Maximum": TuQtlColocH4MaximumFeature, + "tuQtlColocH4MaximumNeighbourhood": TuQtlColocH4MaximumNeighbourhoodFeature, } def __init__( diff --git a/src/gentropy/variant_to_gene.py b/src/gentropy/variant_to_gene.py deleted file mode 100644 index cf21053d7..000000000 --- a/src/gentropy/variant_to_gene.py +++ /dev/null @@ -1,119 +0,0 @@ -"""Step to generate variant annotation dataset.""" - -from __future__ import annotations - -from functools import reduce - -from pyspark.sql import functions as f - -from gentropy.common.Liftover import LiftOverSpark -from gentropy.common.session import Session -from gentropy.dataset.gene_index import GeneIndex -from gentropy.dataset.intervals import Intervals -from gentropy.dataset.v2g import V2G -from gentropy.dataset.variant_index import VariantIndex - - -class V2GStep: - """Variant-to-gene (V2G) step. - - This step aims to generate a dataset that contains multiple pieces of evidence supporting the functional association of specific variants with genes. Some of the evidence types include: - - 1. Chromatin interaction experiments, e.g. Promoter Capture Hi-C (PCHi-C). - 2. In silico functional predictions, e.g. Variant Effect Predictor (VEP) from Ensembl. - 3. Distance between the variant and each gene's canonical transcription start site (TSS). - - Attributes: - session (Session): Session object. - variant_index_path (str): Input variant index path. - gene_index_path (str): Input gene index path. - vep_consequences_path (str): Input VEP consequences path. - liftover_chain_file_path (str): Path to GRCh37 to GRCh38 chain file. - liftover_max_length_difference: Maximum length difference for liftover. - max_distance (int): Maximum distance to consider. - approved_biotypes (list[str]): List of approved biotypes. - intervals (dict): Dictionary of interval sources. - v2g_path (str): Output V2G path. - """ - - def __init__( - self, - session: Session, - variant_index_path: str, - gene_index_path: str, - vep_consequences_path: str, - liftover_chain_file_path: str, - approved_biotypes: list[str], - interval_sources: dict[str, str], - v2g_path: str, - max_distance: int = 500_000, - liftover_max_length_difference: int = 100, - ) -> None: - """Run Variant-to-gene (V2G) step. - - Args: - session (Session): Session object. - variant_index_path (str): Input variant index path. - gene_index_path (str): Input gene index path. - vep_consequences_path (str): Input VEP consequences path. - liftover_chain_file_path (str): Path to GRCh37 to GRCh38 chain file. - approved_biotypes (list[str]): List of approved biotypes. - interval_sources (dict[str, str]): Dictionary of interval sources. - v2g_path (str): Output V2G path. - max_distance (int): Maximum distance to consider. - liftover_max_length_difference (int): Maximum length difference for liftover. - """ - # Read - gene_index = GeneIndex.from_parquet(session, gene_index_path) - vi = VariantIndex.from_parquet(session, variant_index_path).persist() - # Reading VEP consequence to score table and cast the score to the right type: - vep_consequences = session.spark.read.csv( - vep_consequences_path, sep="\t", header=True - ).withColumn("score", f.col("score").cast("double")) - - # Transform - lift = LiftOverSpark( - # lift over variants to hg38 - liftover_chain_file_path, - liftover_max_length_difference, - ) - gene_index_filtered = gene_index.filter_by_biotypes( - # Filter gene index by approved biotypes to define V2G gene universe - list(approved_biotypes) - ) - - intervals = Intervals( - _df=reduce( - lambda x, y: x.unionByName(y, allowMissingColumns=True), - # create interval instances by parsing each source - [ - Intervals.from_source( - session.spark, source_name, source_path, gene_index, lift - ).df - for source_name, source_path in interval_sources.items() - ], - ), - _schema=Intervals.get_schema(), - ) - v2g_datasets = [ - vi.get_distance_to_tss(gene_index_filtered, max_distance), - vi.get_most_severe_transcript_consequence( - vep_consequences, gene_index_filtered - ), - vi.get_plof_v2g(gene_index_filtered), - intervals.v2g(vi), - ] - v2g = V2G( - _df=reduce( - lambda x, y: x.unionByName(y, allowMissingColumns=True), - [dataset.df for dataset in v2g_datasets], - ).repartition("chromosome"), - _schema=V2G.get_schema(), - ) - - # Load - ( - v2g.df.write.partitionBy("chromosome") - .mode(session.write_mode) - .parquet(v2g_path) - ) diff --git a/tests/gentropy/conftest.py b/tests/gentropy/conftest.py index 21f05dcf3..a70c1a87d 100644 --- a/tests/gentropy/conftest.py +++ b/tests/gentropy/conftest.py @@ -25,7 +25,6 @@ from gentropy.dataset.study_locus import StudyLocus from gentropy.dataset.study_locus_overlap import StudyLocusOverlap from gentropy.dataset.summary_statistics import SummaryStatistics -from gentropy.dataset.v2g import V2G from gentropy.dataset.variant_index import VariantIndex from gentropy.datasource.eqtl_catalogue.finemapping import EqtlCatalogueFinemapping from gentropy.datasource.eqtl_catalogue.study_index import EqtlCatalogueStudyIndex @@ -252,29 +251,17 @@ def mock_intervals(spark: SparkSession) -> Intervals: @pytest.fixture() -def mock_v2g(spark: SparkSession) -> V2G: - """Mock v2g dataset.""" - v2g_schema = V2G.get_schema() - - data_spec = ( - dg.DataGenerator( - spark, - rows=400, - partitions=4, - randomSeedMethod="hash_fieldname", - ) - .withSchema(v2g_schema) - .withColumnSpec("distance", percentNulls=0.1) - .withColumnSpec("resourceScore", percentNulls=0.1) - .withColumnSpec("score", percentNulls=0.1) - .withColumnSpec("pmid", percentNulls=0.1) - .withColumnSpec("biofeature", percentNulls=0.1) - .withColumnSpec("variantFunctionalConsequenceId", percentNulls=0.1) - .withColumnSpec("isHighQualityPlof", percentNulls=0.1) +def mock_variant_consequence_to_score(spark: SparkSession) -> DataFrame: + """Slice of the VEP consequence to score table.""" + return spark.createDataFrame( + [ + ("SO_0001893", "transcript_ablation", 1.0), + ("SO_0001822", "inframe_deletion", 0.66), + ("SO_0001567", "stop_retained_variant", 0.33), + ], + ["variantFunctionalConsequenceId", "label", "score"], ) - return V2G(_df=data_spec.build(), _schema=v2g_schema) - @pytest.fixture() def mock_variant_index(spark: SparkSession) -> VariantIndex: @@ -386,9 +373,9 @@ def mock_summary_statistics_data(spark: SparkSession) -> DataFrame: # Allowing missingness: .withColumnSpec("standardError", percentNulls=0.1) # Making sure p-values are below 1: - ).build() + ) - return data_spec + return data_spec.build() @pytest.fixture() @@ -620,7 +607,7 @@ def mock_l2g_feature_matrix(spark: SparkSession) -> L2GFeatureMatrix: ("1", "gene1", 100.0, None), ("2", "gene2", 1000.0, 0.0), ], - "studyLocusId STRING, geneId STRING, distanceTssMean FLOAT, distanceTssMinimum FLOAT", + "studyLocusId STRING, geneId STRING, distanceTssMean FLOAT, distanceSentinelTssMinimum FLOAT", ), with_gold_standard=False, ) diff --git a/tests/gentropy/dataset/test_intervals.py b/tests/gentropy/dataset/test_intervals.py deleted file mode 100644 index 26d79acd1..000000000 --- a/tests/gentropy/dataset/test_intervals.py +++ /dev/null @@ -1,18 +0,0 @@ -"""Tests on LD index.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING - -from gentropy.dataset.v2g import V2G - -if TYPE_CHECKING: - from gentropy.dataset.intervals import Intervals - from gentropy.dataset.variant_index import VariantIndex - - -def test_interval_v2g_creation( - mock_intervals: Intervals, mock_variant_index: VariantIndex -) -> None: - """Test creation of V2G from intervals.""" - assert isinstance(mock_intervals.v2g(mock_variant_index), V2G) diff --git a/tests/gentropy/dataset/test_l2g.py b/tests/gentropy/dataset/test_l2g.py index 125352f8e..f73b6f7c2 100644 --- a/tests/gentropy/dataset/test_l2g.py +++ b/tests/gentropy/dataset/test_l2g.py @@ -177,7 +177,7 @@ def test_calculate_feature_missingness_rate( spark: SparkSession, mock_l2g_feature_matrix: L2GFeatureMatrix ) -> None: """Test L2GFeatureMatrix.calculate_feature_missingness_rate.""" - expected_missingness = {"distanceTssMean": 0.0, "distanceTssMinimum": 1.0} + expected_missingness = {"distanceTssMean": 0.0, "distanceSentinelTssMinimum": 1.0} observed_missingness = mock_l2g_feature_matrix.calculate_feature_missingness_rate() assert isinstance(observed_missingness, dict) assert mock_l2g_feature_matrix.features_list is not None and len( diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py index 82df2dd4f..18d8a4066 100644 --- a/tests/gentropy/dataset/test_l2g_feature.py +++ b/tests/gentropy/dataset/test_l2g_feature.py @@ -1,28 +1,62 @@ -"""Test L2G feature generation.""" +"""Test locus-to-gene feature generation.""" from __future__ import annotations from typing import TYPE_CHECKING, Any +import pyspark.sql.functions as f import pytest +from pyspark.sql.types import ( + ArrayType, + BooleanType, + IntegerType, + LongType, + StringType, + StructField, + StructType, +) -from gentropy.dataset.l2g_feature import ( +from gentropy.dataset.colocalisation import Colocalisation +from gentropy.dataset.l2g_features.colocalisation import ( EQtlColocClppMaximumFeature, + EQtlColocClppMaximumNeighbourhoodFeature, EQtlColocH4MaximumFeature, - L2GFeature, + EQtlColocH4MaximumNeighbourhoodFeature, PQtlColocClppMaximumFeature, + PQtlColocClppMaximumNeighbourhoodFeature, PQtlColocH4MaximumFeature, + PQtlColocH4MaximumNeighbourhoodFeature, SQtlColocClppMaximumFeature, + SQtlColocClppMaximumNeighbourhoodFeature, SQtlColocH4MaximumFeature, + SQtlColocH4MaximumNeighbourhoodFeature, TuQtlColocClppMaximumFeature, + TuQtlColocClppMaximumNeighbourhoodFeature, TuQtlColocH4MaximumFeature, + TuQtlColocH4MaximumNeighbourhoodFeature, + common_colocalisation_feature_logic, + common_neighbourhood_colocalisation_feature_logic, +) +from gentropy.dataset.l2g_features.distance import ( + DistanceFootprintMeanFeature, + DistanceFootprintMeanNeighbourhoodFeature, + DistanceSentinelFootprintFeature, + DistanceSentinelFootprintNeighbourhoodFeature, + DistanceSentinelTssFeature, + DistanceSentinelTssNeighbourhoodFeature, + DistanceTssMeanFeature, + DistanceTssMeanNeighbourhoodFeature, + common_distance_feature_logic, + common_neighbourhood_distance_feature_logic, ) +from gentropy.dataset.l2g_features.l2g_feature import L2GFeature +from gentropy.dataset.study_index import StudyIndex +from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.variant_index import VariantIndex from gentropy.method.l2g.feature_factory import L2GFeatureInputLoader if TYPE_CHECKING: - from gentropy.dataset.colocalisation import Colocalisation - from gentropy.dataset.study_index import StudyIndex - from gentropy.dataset.study_locus import StudyLocus + from pyspark.sql import SparkSession @pytest.mark.parametrize( @@ -36,6 +70,22 @@ PQtlColocClppMaximumFeature, SQtlColocClppMaximumFeature, TuQtlColocClppMaximumFeature, + EQtlColocClppMaximumNeighbourhoodFeature, + PQtlColocClppMaximumNeighbourhoodFeature, + SQtlColocClppMaximumNeighbourhoodFeature, + TuQtlColocClppMaximumNeighbourhoodFeature, + EQtlColocH4MaximumNeighbourhoodFeature, + PQtlColocH4MaximumNeighbourhoodFeature, + SQtlColocH4MaximumNeighbourhoodFeature, + TuQtlColocH4MaximumNeighbourhoodFeature, + DistanceTssMeanFeature, + DistanceTssMeanNeighbourhoodFeature, + DistanceFootprintMeanFeature, + DistanceFootprintMeanNeighbourhoodFeature, + DistanceSentinelTssFeature, + DistanceSentinelTssNeighbourhoodFeature, + DistanceSentinelFootprintFeature, + DistanceSentinelFootprintNeighbourhoodFeature, ], ) def test_feature_factory_return_type( @@ -43,11 +93,13 @@ def test_feature_factory_return_type( mock_study_locus: StudyLocus, mock_colocalisation: Colocalisation, mock_study_index: StudyIndex, + mock_variant_index: VariantIndex, ) -> None: """Test that every feature factory returns a L2GFeature dataset.""" loader = L2GFeatureInputLoader( colocalisation=mock_colocalisation, study_index=mock_study_index, + variant_index=mock_variant_index, study_locus=mock_study_locus, ) feature_dataset = feature_class.compute( @@ -57,3 +109,380 @@ def test_feature_factory_return_type( ), ) assert isinstance(feature_dataset, L2GFeature) + + +class TestCommonColocalisationFeatureLogic: + """Test the common logic of the colocalisation features.""" + + def test__common_colocalisation_feature_logic( + self: TestCommonColocalisationFeatureLogic, + spark: SparkSession, + ) -> None: + """Test the common logic of the colocalisation features. + + The test data associates studyLocusId1 with gene1 based on the colocalisation with studyLocusId2 and studyLocusId3. + The H4 value of number 2 is higher, therefore the feature value should be based on that. + """ + feature_name = "eQtlColocH4Maximum" + observed_df = common_colocalisation_feature_logic( + self.sample_study_loci_to_annotate, + self.colocalisation_method, + self.colocalisation_metric, + feature_name, + self.qtl_type, + colocalisation=self.sample_colocalisation, + study_index=self.sample_studies, + study_locus=self.sample_study_locus, + ) + expected_df = spark.createDataFrame( + [ + { + "studyLocusId": "1", + "geneId": "gene1", + "eQtlColocH4Maximum": 0.81, + }, + { + "studyLocusId": "1", + "geneId": "gene2", + "eQtlColocH4Maximum": 0.9, + }, + ], + ).select("studyLocusId", "geneId", "eQtlColocH4Maximum") + assert ( + observed_df.collect() == expected_df.collect() + ), "The feature values are not as expected." + + def test__common_neighbourhood_colocalisation_feature_logic( + self: TestCommonColocalisationFeatureLogic, spark: SparkSession + ) -> None: + """Test the common logic of the neighbourhood colocalisation features.""" + feature_name = "eQtlColocH4MaximumNeighbourhood" + observed_df = common_neighbourhood_colocalisation_feature_logic( + self.sample_study_loci_to_annotate, + self.colocalisation_method, + self.colocalisation_metric, + feature_name, + self.qtl_type, + colocalisation=self.sample_colocalisation, + study_index=self.sample_studies, + study_locus=self.sample_study_locus, + ) + expected_df = spark.createDataFrame( + [ + { + "studyLocusId": "1", + "geneId": "gene1", + "eQtlColocH4MaximumNeighbourhood": 0.08999999999999997, + }, + { + "studyLocusId": "1", + "geneId": "gene2", + "eQtlColocH4MaximumNeighbourhood": 0.0, + }, + ], + ).select("studyLocusId", "geneId", "eQtlColocH4MaximumNeighbourhood") + assert ( + observed_df.collect() == expected_df.collect() + ), "The expected and observed dataframes do not match." + + @pytest.fixture(autouse=True) + def _setup(self: TestCommonColocalisationFeatureLogic, spark: SparkSession) -> None: + """Set up the test variables.""" + self.colocalisation_method = "Coloc" + self.colocalisation_metric = "h4" + self.qtl_type = "eqtl" + + self.sample_study_loci_to_annotate = StudyLocus( + _df=spark.createDataFrame( + [ + { + "studyLocusId": "1", + "variantId": "lead1", + "studyId": "study1", # this is a GWAS + "chromosome": "1", + }, + ] + ), + _schema=StudyLocus.get_schema(), + ) + self.sample_colocalisation = Colocalisation( + _df=spark.createDataFrame( + [ + { + "leftStudyLocusId": "1", + "rightStudyLocusId": "2", + "chromosome": "1", + "colocalisationMethod": "COLOC", + "numberColocalisingVariants": 1, + "h4": 0.81, + "rightStudyType": "eqtl", + }, + { + "leftStudyLocusId": "1", + "rightStudyLocusId": "3", # qtl linked to the same gene as studyLocusId 2 with a lower score + "chromosome": "1", + "colocalisationMethod": "COLOC", + "numberColocalisingVariants": 1, + "h4": 0.50, + "rightStudyType": "eqtl", + }, + { + "leftStudyLocusId": "1", + "rightStudyLocusId": "4", # qtl linked to a diff gene and with the highest score + "chromosome": "1", + "colocalisationMethod": "COLOC", + "numberColocalisingVariants": 1, + "h4": 0.90, + "rightStudyType": "eqtl", + }, + ], + schema=Colocalisation.get_schema(), + ), + _schema=Colocalisation.get_schema(), + ) + self.sample_study_locus = StudyLocus( + _df=spark.createDataFrame( + [ + { + "studyLocusId": "1", + "variantId": "lead1", + "studyId": "study1", # this is a GWAS + "chromosome": "1", + }, + { + "studyLocusId": "2", + "variantId": "lead1", + "studyId": "study2", # this is a QTL (same gee) + "chromosome": "1", + }, + { + "studyLocusId": "3", + "variantId": "lead1", + "studyId": "study3", # this is another QTL (same gene) + "chromosome": "1", + }, + { + "studyLocusId": "4", + "variantId": "lead1", + "studyId": "study4", # this is another QTL (diff gene) + "chromosome": "1", + }, + ] + ), + _schema=StudyLocus.get_schema(), + ) + self.sample_studies = StudyIndex( + _df=spark.createDataFrame( + [ + { + "studyId": "study1", + "studyType": "gwas", + "geneId": None, + "traitFromSource": "trait1", + "projectId": "project1", + }, + { + "studyId": "study2", + "studyType": "eqtl", + "geneId": "gene1", + "traitFromSource": "trait2", + "projectId": "project2", + }, + { + "studyId": "study3", + "studyType": "eqtl", + "geneId": "gene1", + "traitFromSource": "trait3", + "projectId": "project3", + }, + { + "studyId": "study4", + "studyType": "eqtl", + "geneId": "gene2", + "traitFromSource": "trait4", + "projectId": "project4", + }, + ] + ), + _schema=StudyIndex.get_schema(), + ) + + +class TestCommonDistanceFeatureLogic: + """Test the CommonDistanceFeatureLogic methods.""" + + @pytest.mark.parametrize( + ("feature_name", "expected_data"), + [ + ( + "distanceSentinelTss", + [ + { + "studyLocusId": "1", + "geneId": "gene1", + "distanceSentinelTss": 0.0, + }, + { + "studyLocusId": "1", + "geneId": "gene2", + "distanceSentinelTss": 0.95, + }, + ], + ), + ( + "distanceTssMean", + [ + {"studyLocusId": "1", "geneId": "gene1", "distanceTssMean": 0.09}, + {"studyLocusId": "1", "geneId": "gene2", "distanceTssMean": 0.65}, + ], + ), + ], + ) + def test_common_distance_feature_logic( + self: TestCommonDistanceFeatureLogic, + spark: SparkSession, + feature_name: str, + expected_data: dict[str, Any], + ) -> None: + """Test the logic of the function that extracts features from distance. + + 2 tests: + - distanceSentinelTss: distance of the sentinel is 10, the max distance is 10. In log scale, the score is 0. + - distanceTssMean: avg distance of any variant in the credible set, weighted by its posterior. + """ + observed_df = ( + common_distance_feature_logic( + self.sample_study_locus, + variant_index=self.sample_variant_index, + feature_name=feature_name, + distance_type=self.distance_type, + genomic_window=10, + ) + .withColumn(feature_name, f.round(f.col(feature_name), 2)) + .orderBy(feature_name) + ) + expected_df = ( + spark.createDataFrame(expected_data) + .select("studyLocusId", "geneId", feature_name) + .orderBy(feature_name) + ) + assert ( + observed_df.collect() == expected_df.collect() + ), f"Expected and observed dataframes are not equal for feature {feature_name}." + + def test_common_neighbourhood_colocalisation_feature_logic( + self: TestCommonDistanceFeatureLogic, + spark: SparkSession, + ) -> None: + """Test the logic of the function that extracts the distance between the sentinel of a credible set and the nearby genes.""" + feature_name = "distanceSentinelTssNeighbourhood" + observed_df = ( + common_neighbourhood_distance_feature_logic( + self.sample_study_locus, + variant_index=self.sample_variant_index, + feature_name=feature_name, + distance_type=self.distance_type, + genomic_window=10, + ) + .withColumn(feature_name, f.round(f.col(feature_name), 2)) + .orderBy(f.col(feature_name).asc()) + ) + expected_df = spark.createDataFrame( + (["1", "gene1", -0.48], ["1", "gene2", 0.48]), + ["studyLocusId", "geneId", feature_name], + ).orderBy(feature_name) + assert ( + observed_df.collect() == expected_df.collect() + ), "Output doesn't meet the expectation." + + @pytest.fixture(autouse=True) + def _setup(self: TestCommonDistanceFeatureLogic, spark: SparkSession) -> None: + """Set up testing fixtures.""" + self.distance_type = "distanceFromTss" + self.sample_study_locus = StudyLocus( + _df=spark.createDataFrame( + [ + { + "studyLocusId": "1", + "variantId": "lead1", + "studyId": "study1", + "locus": [ + { + "variantId": "lead1", + "posteriorProbability": 0.5, + }, + { + "variantId": "tag1", + "posteriorProbability": 0.5, + }, + ], + "chromosome": "1", + }, + ], + StudyLocus.get_schema(), + ), + _schema=StudyLocus.get_schema(), + ) + self.variant_index_schema = StructType( + [ + StructField("variantId", StringType(), True), + StructField("chromosome", StringType(), True), + StructField("position", IntegerType(), True), + StructField("referenceAllele", StringType(), True), + StructField("alternateAllele", StringType(), True), + StructField( + "transcriptConsequences", + ArrayType( + StructType( + [ + StructField("distanceFromTss", LongType(), True), + StructField("targetId", StringType(), True), + StructField("isEnsemblCanonical", BooleanType(), True), + ] + ) + ), + True, + ), + ] + ) + self.sample_variant_index = VariantIndex( + _df=spark.createDataFrame( + [ + ( + "lead1", + "chrom", + 1, + "A", + "T", + [ + { + "distanceFromTss": 10, + "targetId": "gene1", + "isEnsemblCanonical": True, + }, + { + "distanceFromTss": 2, + "targetId": "gene2", + "isEnsemblCanonical": True, + }, + ], + ), + ( + "tag1", + "chrom", + 1, + "A", + "T", + [ + { + "distanceFromTss": 5, + "targetId": "gene1", + "isEnsemblCanonical": True, + }, + ], + ), + ], + self.variant_index_schema, + ), + _schema=VariantIndex.get_schema(), + ) diff --git a/tests/gentropy/dataset/test_v2g.py b/tests/gentropy/dataset/test_v2g.py deleted file mode 100644 index 24a917508..000000000 --- a/tests/gentropy/dataset/test_v2g.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Tests V2G dataset.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING - -from gentropy.dataset.v2g import V2G - -if TYPE_CHECKING: - from gentropy.dataset.gene_index import GeneIndex - - -def test_v2g_creation(mock_v2g: V2G) -> None: - """Test v2g creation with mock data.""" - assert isinstance(mock_v2g, V2G) - - -def test_v2g_filter_by_genes(mock_v2g: V2G, mock_gene_index: GeneIndex) -> None: - """Test v2g filter by genes.""" - assert isinstance( - mock_v2g.filter_by_genes(mock_gene_index), - V2G, - ) diff --git a/tests/gentropy/dataset/test_variant_index.py b/tests/gentropy/dataset/test_variant_index.py index 12afba89f..29a6ef035 100644 --- a/tests/gentropy/dataset/test_variant_index.py +++ b/tests/gentropy/dataset/test_variant_index.py @@ -8,12 +8,10 @@ from pyspark.sql import functions as f from pyspark.sql import types as t -from gentropy.dataset.gene_index import GeneIndex -from gentropy.dataset.v2g import V2G from gentropy.dataset.variant_index import VariantIndex if TYPE_CHECKING: - from pyspark.sql import SparkSession + from pyspark.sql import DataFrame, SparkSession def test_variant_index_creation(mock_variant_index: VariantIndex) -> None: @@ -21,20 +19,6 @@ def test_variant_index_creation(mock_variant_index: VariantIndex) -> None: assert isinstance(mock_variant_index, VariantIndex) -def test_get_plof_v2g( - mock_variant_index: VariantIndex, mock_gene_index: GeneIndex -) -> None: - """Test get_plof_v2g with mock variant annotation.""" - assert isinstance(mock_variant_index.get_plof_v2g(mock_gene_index), V2G) - - -def test_get_distance_to_tss( - mock_variant_index: VariantIndex, mock_gene_index: GeneIndex -) -> None: - """Test get_distance_to_tss with mock variant annotation.""" - assert isinstance(mock_variant_index.get_distance_to_tss(mock_gene_index), V2G) - - class TestVariantIndex: """Collection of tests around the functionality and shape of the variant index.""" @@ -147,3 +131,47 @@ def test_rsid_column_updated(self: TestVariantIndex) -> None: .count() == 2 ) + + @pytest.mark.parametrize( + "distance_type", ["distanceFromTss", "distanceFromFootprint"] + ) + def test_get_distance_to_gene( + self: TestVariantIndex, mock_variant_index: VariantIndex, distance_type: str + ) -> None: + """Assert that the function returns a df with the requested columns.""" + expected_cols = ["variantId", "targetId", distance_type] + observed = mock_variant_index.get_distance_to_gene(distance_type=distance_type) + for col in expected_cols: + assert col in observed.columns, f"Column {col} not in {observed.columns}" + + def test_get_most_severe_gene_consequence( + self: TestVariantIndex, + mock_variant_index: VariantIndex, + mock_variant_consequence_to_score: DataFrame, + ) -> None: + """Assert that the function returns a df with the requested columns.""" + expected_cols = [ + "variantId", + "targetId", + "mostSevereVariantFunctionalConsequenceId", + "severityScore", + ] + observed = mock_variant_index.get_most_severe_gene_consequence( + vep_consequences=mock_variant_consequence_to_score + ) + for col in expected_cols: + assert col in observed.columns, f"Column {col} not in {observed.columns}" + + def test_get_loftee( + self: TestVariantIndex, mock_variant_index: VariantIndex + ) -> None: + """Assert that the function returns a df with the requested columns.""" + expected_cols = [ + "variantId", + "targetId", + "lofteePrediction", + "isHighQualityPlof", + ] + observed = mock_variant_index.get_loftee() + for col in expected_cols: + assert col in observed.columns, f"Column {col} not in {observed.columns}" diff --git a/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py index 78f97d48f..aa36359ca 100644 --- a/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py +++ b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py @@ -6,11 +6,20 @@ import pytest from pyspark.sql import DataFrame +from pyspark.sql.types import ( + ArrayType, + BooleanType, + IntegerType, + LongType, + StringType, + StructField, + StructType, +) from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix from gentropy.dataset.l2g_gold_standard import L2GGoldStandard from gentropy.dataset.study_index import StudyIndex -from gentropy.dataset.v2g import V2G +from gentropy.dataset.variant_index import VariantIndex from gentropy.datasource.open_targets.l2g_gold_standard import ( OpenTargetsL2GGoldStandard, ) @@ -25,13 +34,13 @@ def test_open_targets_as_l2g_gold_standard( sample_l2g_gold_standard: DataFrame, - mock_v2g: V2G, + mock_variant_index: VariantIndex, ) -> None: """Test L2G gold standard from OTG curation.""" assert isinstance( OpenTargetsL2GGoldStandard.as_l2g_gold_standard( sample_l2g_gold_standard, - mock_v2g, + mock_variant_index, ), L2GGoldStandard, ) @@ -81,19 +90,52 @@ def _setup(self: TestExpandGoldStandardWithNegatives, spark: SparkSession) -> No ["variantId", "geneId", "studyId"], ) - sample_v2g_df = spark.createDataFrame( - [ - ("variant1", "gene1", 5, "X", "X", "X"), - ("variant1", "gene3", 10, "X", "X", "X"), - ], + sample_variant_index_df = spark.createDataFrame( [ - "variantId", - "geneId", - "distance", - "chromosome", - "datatypeId", - "datasourceId", + ( + "variant1", + "chrom", + 1, + "A", + "T", + [ + { + "distanceFromTss": 5, + "targetId": "gene1", + "isEnsemblCanonical": True, + }, + { + "distanceFromTss": 10, + "targetId": "gene3", + "isEnsemblCanonical": True, + }, + ], + ), ], + StructType( + [ + StructField("variantId", StringType(), True), + StructField("chromosome", StringType(), True), + StructField("position", IntegerType(), True), + StructField("referenceAllele", StringType(), True), + StructField("alternateAllele", StringType(), True), + StructField( + "transcriptConsequences", + ArrayType( + StructType( + [ + StructField("distanceFromTss", LongType(), True), + StructField("targetId", StringType(), True), + StructField( + "isEnsemblCanonical", BooleanType(), True + ), + ] + ) + ), + True, + ), + ] + ), ) self.expected_expanded_gs = spark.createDataFrame( @@ -107,7 +149,9 @@ def _setup(self: TestExpandGoldStandardWithNegatives, spark: SparkSession) -> No self.observed_df = ( OpenTargetsL2GGoldStandard.expand_gold_standard_with_negatives( self.sample_positive_set, - V2G(_df=sample_v2g_df, _schema=V2G.get_schema()), + VariantIndex( + _df=sample_variant_index_df, _schema=VariantIndex.get_schema() + ), ) ) diff --git a/tests/gentropy/test_schemas.py b/tests/gentropy/test_schemas.py index 6840e3207..1b06076d0 100644 --- a/tests/gentropy/test_schemas.py +++ b/tests/gentropy/test_schemas.py @@ -18,7 +18,7 @@ from _pytest.fixtures import FixtureRequest from gentropy.dataset.gene_index import GeneIndex - from gentropy.dataset.v2g import V2G + from gentropy.dataset.l2g_prediction import L2GPrediction SCHEMA_DIR = "src/gentropy/assets/schemas" @@ -75,21 +75,23 @@ def test_schema_columns_camelcase(schema_json: str) -> None: class TestValidateSchema: - """Test validate_schema method using V2G (unnested) and GeneIndex (nested) as a testing dataset.""" + """Test validate_schema method using L2GPrediction (unnested) and GeneIndex (nested) as a testing dataset.""" @pytest.fixture() def mock_dataset_instance( self: TestValidateSchema, request: FixtureRequest - ) -> V2G | GeneIndex: + ) -> L2GPrediction | GeneIndex: """Meta fixture to return the value of any requested fixture.""" return request.getfixturevalue(request.param) @pytest.mark.parametrize( - "mock_dataset_instance", ["mock_v2g", "mock_gene_index"], indirect=True + "mock_dataset_instance", + ["mock_l2g_predictions", "mock_gene_index"], + indirect=True, ) def test_validate_schema_extra_field( self: TestValidateSchema, - mock_dataset_instance: V2G | GeneIndex, + mock_dataset_instance: L2GPrediction | GeneIndex, ) -> None: """Test that validate_schema raises an error if the observed schema has an extra field.""" with pytest.raises(SchemaValidationError, match="extraField"): @@ -98,22 +100,26 @@ def test_validate_schema_extra_field( ) @pytest.mark.parametrize( - "mock_dataset_instance", ["mock_v2g", "mock_gene_index"], indirect=True + "mock_dataset_instance", + ["mock_l2g_predictions", "mock_gene_index"], + indirect=True, ) def test_validate_schema_missing_field( self: TestValidateSchema, - mock_dataset_instance: V2G | GeneIndex, + mock_dataset_instance: L2GPrediction | GeneIndex, ) -> None: """Test that validate_schema raises an error if the observed schema is missing a required field, geneId in this case.""" with pytest.raises(SchemaValidationError, match="geneId"): mock_dataset_instance.df = mock_dataset_instance.df.drop("geneId") @pytest.mark.parametrize( - "mock_dataset_instance", ["mock_v2g", "mock_gene_index"], indirect=True + "mock_dataset_instance", + ["mock_l2g_predictions", "mock_gene_index"], + indirect=True, ) def test_validate_schema_duplicated_field( self: TestValidateSchema, - mock_dataset_instance: V2G | GeneIndex, + mock_dataset_instance: L2GPrediction | GeneIndex, ) -> None: """Test that validate_schema raises an error if the observed schema has a duplicated field, geneId in this case.""" with pytest.raises(SchemaValidationError, match="geneId"): @@ -122,11 +128,13 @@ def test_validate_schema_duplicated_field( ) @pytest.mark.parametrize( - "mock_dataset_instance", ["mock_v2g", "mock_gene_index"], indirect=True + "mock_dataset_instance", + ["mock_l2g_predictions", "mock_gene_index"], + indirect=True, ) def test_validate_schema_different_datatype( self: TestValidateSchema, - mock_dataset_instance: V2G | GeneIndex, + mock_dataset_instance: L2GPrediction | GeneIndex, ) -> None: """Test that validate_schema raises an error if any field in the observed schema has a different type than expected.""" with pytest.raises(SchemaValidationError, match="geneId"): From c3b8c2c8b4457047e3504f5d56817005a67be807 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Tue, 1 Oct 2024 10:02:36 +0100 Subject: [PATCH 067/188] feat: out sample LD qc reason (#798) Co-authored-by: Daniel Suveges --- src/gentropy/dataset/study_locus.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index a4d35e7d5..4616052aa 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -86,6 +86,7 @@ class StudyLocusQualityCheck(Enum): ) TOP_HIT = "Study locus from curated top hit" EXPLAINED_BY_SUSIE = "Study locus in region explained by a SuSiE credible set" + OUT_OF_SAMPLE_LD = "Study locus finemapped without in-sample LD reference" class CredibleInterval(Enum): From a5588ae6fa27b28809a63e5dce63d3164e94a9a9 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Tue, 1 Oct 2024 13:09:04 +0200 Subject: [PATCH 068/188] chore: drop redundant parameter (#802) Co-authored-by: Szymon Szyszkowski --- src/gentropy/config.py | 1 - src/gentropy/sumstat_qc_step.py | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 6f94cc9ed..33865d6ea 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -439,7 +439,6 @@ class GWASQCStep(StepConfig): gwas_path: str = MISSING output_path: str = MISSING - studyid: str = MISSING pval_threshold: float = MISSING _target_: str = "gentropy.sumstat_qc_step.SummaryStatisticsQCStep" diff --git a/src/gentropy/sumstat_qc_step.py b/src/gentropy/sumstat_qc_step.py index b5aed905e..333ab19f3 100644 --- a/src/gentropy/sumstat_qc_step.py +++ b/src/gentropy/sumstat_qc_step.py @@ -15,7 +15,6 @@ def __init__( session: Session, gwas_path: str, output_path: str, - studyid: str, pval_threshold: float = 1e-8, ) -> None: """Calculating quality control metrics on the provided GWAS study. @@ -24,7 +23,6 @@ def __init__( session (Session): Spark session gwas_path (str): Path to the GWAS summary statistics. output_path (str): Output path for the QC results. - studyid (str): Study ID for the QC. pval_threshold (float): P-value threshold for the QC. Default is 1e-8. """ @@ -35,5 +33,5 @@ def __init__( gwas=gwas, limit=100_000_000, pval_threshold=pval_threshold ) .write.mode(session.write_mode) - .parquet(output_path + "/qc_results_" + studyid) + .parquet(output_path) ) From d4b507049ca54c927e8d80da43449f93e931e28a Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Tue, 1 Oct 2024 16:41:19 +0200 Subject: [PATCH 069/188] fix: align the schema of study_index for ukb ppp eur (#803) * fix(ukb_ppp_study_index): update column name to match schema * chore: add note that notebooks are not supported --------- Co-authored-by: Szymon Szyszkowski --- notebooks/README.md | 3 ++ notebooks/Release_QC_metrics.ipynb | 2 +- .../datasource/ukb_ppp_eur/study_index.py | 31 ++++++++----------- 3 files changed, 17 insertions(+), 19 deletions(-) create mode 100644 notebooks/README.md diff --git a/notebooks/README.md b/notebooks/README.md new file mode 100644 index 000000000..35132b6bf --- /dev/null +++ b/notebooks/README.md @@ -0,0 +1,3 @@ +# Notebooks + +The notebooks in listed in this directory are not actively maintained and updated. diff --git a/notebooks/Release_QC_metrics.ipynb b/notebooks/Release_QC_metrics.ipynb index 4eb27015b..5f9bf77c0 100644 --- a/notebooks/Release_QC_metrics.ipynb +++ b/notebooks/Release_QC_metrics.ipynb @@ -419,7 +419,7 @@ "# Number of studies\n", "eqtl_index=session.spark.read.parquet(eqtl_index_path, recursiveFileLookup=True)\n", "# Number of tissues, list of tissues\n", - "#eqtl_index.select(f.col(\"tissueFromSourceId\")).distinct().show(truncate=False)\n", + "#eqtl_index.select(f.col(\"biosampleFromSourceId\")).distinct().show(truncate=False)\n", "\n", "# Credible_set. Please use Daniels’ notebook as a reference. For each subfolder:\n", "# eqtl catalog susie:\n", diff --git a/src/gentropy/datasource/ukb_ppp_eur/study_index.py b/src/gentropy/datasource/ukb_ppp_eur/study_index.py index f694b9a47..8a3105f5d 100644 --- a/src/gentropy/datasource/ukb_ppp_eur/study_index.py +++ b/src/gentropy/datasource/ukb_ppp_eur/study_index.py @@ -1,4 +1,5 @@ """Study Index for Finngen data source.""" + from __future__ import annotations import pyspark.sql.functions as f @@ -29,9 +30,7 @@ def from_source( """ # In order to populate the nSamples column, we need to peek inside the summary stats dataframe. num_of_samples = ( - spark - .read - .parquet(raw_summary_stats_path) + spark.read.parquet(raw_summary_stats_path) .filter(f.col("chromosome") == "22") .groupBy("studyId") .agg(f.first("N").cast("integer").alias("nSamples")) @@ -45,7 +44,7 @@ def from_source( f.lit("UKB_PPP_EUR").alias("projectId"), f.col("_gentropy_study_id").alias("studyId"), f.col("UKBPPP_ProteinID").alias("traitFromSource"), - f.lit("UBERON_0001969").alias("tissueFromSourceId"), + f.lit("UBERON_0001969").alias("biosampleFromSourceId"), f.col("ensembl_id").alias("geneId"), f.lit(True).alias("hasSumstats"), f.col("_gentropy_summary_stats_link").alias("summarystatsLocation"), @@ -53,21 +52,17 @@ def from_source( .join(num_of_samples, "studyId", "inner") ) # Add population structure. - study_index_df = ( - study_index_df - .withColumn( - "discoverySamples", - f.array( - f.struct( - f.col("nSamples").cast("integer").alias("sampleSize"), - f.lit("European").alias("ancestry"), - ) + study_index_df = study_index_df.withColumn( + "discoverySamples", + f.array( + f.struct( + f.col("nSamples").cast("integer").alias("sampleSize"), + f.lit("European").alias("ancestry"), ) - ) - .withColumn( - "ldPopulationStructure", - cls.aggregate_and_map_ancestries(f.col("discoverySamples")), - ) + ), + ).withColumn( + "ldPopulationStructure", + cls.aggregate_and_map_ancestries(f.col("discoverySamples")), ) return StudyIndex( From ccb484ed880e4418db0886e5c549c418fb1258c4 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Tue, 1 Oct 2024 17:09:53 +0200 Subject: [PATCH 070/188] feat: force reinstallation of the gentropy on the cluster (#804) Co-authored-by: Szymon Szyszkowski --- utils/install_dependencies_on_cluster.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/install_dependencies_on_cluster.sh b/utils/install_dependencies_on_cluster.sh index 6b76a7d60..b0a165c04 100644 --- a/utils/install_dependencies_on_cluster.sh +++ b/utils/install_dependencies_on_cluster.sh @@ -60,7 +60,9 @@ function main() { echo "Uninstalling previous version if it exists" pip uninstall -y gentropy echo "Install package..." - run_with_retry pip install --upgrade ${PACKAGENAME} + # NOTE: ensure the gentropy is reinstalled each time without version cache + # see https://pip.pypa.io/en/stable/cli/pip_install/#cmdoption-force-reinstall + run_with_retry pip install --force-reinstall ${PACKAGENAME} } From 1c396d2a8c9a92c97e0d2e387061cbff2b229d35 Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Tue, 1 Oct 2024 17:01:43 +0100 Subject: [PATCH 071/188] feat(validation): adding credible set confidence annotation at validation time (#801) * feat: adding credible set confidence * feat: adding confidence assignment to the credible set validation step * fix: adding confidence field to schema * fix: schema * fix: confidence criteria * fix: finalising labels * fix: docstring typo * test: for credible set confidence assignment --- src/gentropy/assets/schemas/study_locus.json | 6 ++ src/gentropy/dataset/study_locus.py | 89 +++++++++++++++++++- src/gentropy/study_locus_validation.py | 2 + tests/gentropy/dataset/test_study_locus.py | 40 ++++++++- 4 files changed, 131 insertions(+), 6 deletions(-) diff --git a/src/gentropy/assets/schemas/study_locus.json b/src/gentropy/assets/schemas/study_locus.json index 52a19f941..5c7bf1178 100644 --- a/src/gentropy/assets/schemas/study_locus.json +++ b/src/gentropy/assets/schemas/study_locus.json @@ -241,6 +241,12 @@ }, "type": "array" } + }, + { + "metadata": {}, + "name": "confidence", + "nullable": true, + "type": "string" } ], "type": "struct" diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 4616052aa..bf9998458 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -34,6 +34,28 @@ from gentropy.method.l2g.feature_factory import L2GFeatureInputLoader +class CredibleSetConfidenceClasses(Enum): + """Confidence assignments for credible sets, based on finemapping method and quality checks. + + List of confidence classes, from the highest to the lowest confidence level. + + Attributes: + FINEMAPPED_IN_SAMPLE_LD (str): SuSiE fine-mapped credible set with in-sample LD + FINEMAPPED_OUT_OF_SAMPLE_LD (str): SuSiE fine-mapped credible set with out-of-sample LD + PICSED_SUMMARY_STATS (str): PICS fine-mapped credible set extracted from summary statistics + PICSED_TOP_HIT (str): PICS fine-mapped credible set based on reported top hit + UNKNOWN (str): Unknown confidence, for credible sets which did not fit any of the above categories + """ + + FINEMAPPED_IN_SAMPLE_LD = "SuSiE fine-mapped credible set with in-sample LD" + FINEMAPPED_OUT_OF_SAMPLE_LD = "SuSiE fine-mapped credible set with out-of-sample LD" + PICSED_SUMMARY_STATS = ( + "PICS fine-mapped credible set extracted from summary statistics" + ) + PICSED_TOP_HIT = "PICS fine-mapped credible set based on reported top hit" + UNKNOWN = "Unknown confidence" + + class StudyLocusQualityCheck(Enum): """Study-Locus quality control options listing concerns on the quality of the association. @@ -468,8 +490,9 @@ def assign_study_locus_id(uniqueness_defining_columns: list[str]) -> Column: +----------+----------+-----------------+--------------------------------+ """ - return Dataset.generate_identifier(uniqueness_defining_columns).alias("studyLocusId") - + return Dataset.generate_identifier(uniqueness_defining_columns).alias( + "studyLocusId" + ) @classmethod def calculate_credible_set_log10bf(cls: type[StudyLocus], logbfs: Column) -> Column: @@ -1126,3 +1149,65 @@ def window_based_clumping( from gentropy.method.window_based_clumping import WindowBasedClumping return WindowBasedClumping.clump(self, window_size) + + def assign_confidence(self: StudyLocus) -> StudyLocus: + """Assign confidence to study locus. + + Returns: + StudyLocus: Study locus with confidence assigned. + """ + # Return self if the required columns are not in the dataframe: + if ( + "qualityControls" not in self.df.columns + or "finemappingMethod" not in self.df.columns + ): + return self + + # Assign confidence based on the presence of quality controls + df = self.df.withColumn( + "confidence", + f.when( + (f.col("finemappingMethod") == "SuSiE-inf") + & ( + ~f.array_contains( + f.col("qualityControls"), + StudyLocusQualityCheck.OUT_OF_SAMPLE_LD.value, + ) + ), + CredibleSetConfidenceClasses.FINEMAPPED_IN_SAMPLE_LD.value, + ) + .when( + (f.col("finemappingMethod") == "SuSiE-inf") + & ( + f.array_contains( + f.col("qualityControls"), + StudyLocusQualityCheck.OUT_OF_SAMPLE_LD.value, + ) + ), + CredibleSetConfidenceClasses.FINEMAPPED_OUT_OF_SAMPLE_LD.value, + ) + .when( + (f.col("finemappingMethod") == "pics") + & ( + ~f.array_contains( + f.col("qualityControls"), StudyLocusQualityCheck.TOP_HIT.value + ) + ), + CredibleSetConfidenceClasses.PICSED_SUMMARY_STATS.value, + ) + .when( + (f.col("finemappingMethod") == "pics") + & ( + f.array_contains( + f.col("qualityControls"), StudyLocusQualityCheck.TOP_HIT.value + ) + ), + CredibleSetConfidenceClasses.PICSED_TOP_HIT.value, + ) + .otherwise(CredibleSetConfidenceClasses.UNKNOWN.value), + ) + + return StudyLocus( + _df=df, + _schema=self.get_schema(), + ) diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index fc69f6855..486b31ca5 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -47,6 +47,8 @@ def __init__( .qc_explained_by_SuSiE() # Flagging credible sets in regions explained by SuSiE # Annotates credible intervals and filter to only keep 99% credible sets .filter_credible_set(credible_interval=CredibleInterval.IS99) + # Annotate credible set confidence: + .assign_confidence() ).persist() # we will need this for 2 types of outputs study_locus_with_qc.valid_rows( diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index 3240cdb02..3f6cfcb59 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -23,6 +23,7 @@ from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import ( CredibleInterval, + CredibleSetConfidenceClasses, StudyLocus, StudyLocusQualityCheck, ) @@ -615,12 +616,20 @@ class TestStudyLocusValidation: STUDY_LOCUS_DATA = [ # Won't be flagged: - ("1", "v1", "s1", 1.0, -8, []), + ("1", "v1", "s1", 1.0, -8, [], "pics"), # Already flagged, needs to be tested if the flag reamins unique: - ("2", "v2", "s2", 5.0, -4, [StudyLocusQualityCheck.SUBSIGNIFICANT_FLAG.value]), + ( + "2", + "v2", + "s2", + 5.0, + -4, + [StudyLocusQualityCheck.SUBSIGNIFICANT_FLAG.value], + "pics", + ), # To be flagged: - ("3", "v3", "s3", 1.0, -4, []), - ("4", "v4", "s4", 5.0, -3, []), + ("3", "v3", "s3", 1.0, -4, [], "SuSiE-inf"), + ("4", "v4", "s4", 5.0, -3, [], "unknown"), ] STUDY_LOCUS_SCHEMA = t.StructType( @@ -631,6 +640,7 @@ class TestStudyLocusValidation: t.StructField("pValueMantissa", t.FloatType(), False), t.StructField("pValueExponent", t.IntegerType(), False), t.StructField("qualityControls", t.ArrayType(t.StringType()), False), + t.StructField("finemappingMethod", t.StringType(), False), ] ) @@ -678,6 +688,28 @@ def test_return_type_pval_validation( self.study_locus.validate_lead_pvalue(test_pvalues), StudyLocus ) + def test_confidence_flag_return_type(self: TestStudyLocusValidation) -> None: + """Testing if the confidence flagging returns the right type.""" + assert isinstance(self.study_locus.assign_confidence(), StudyLocus) + + def test_confidence_flag_new_column(self: TestStudyLocusValidation) -> None: + """Testing if the confidence flagging adds a new column.""" + assert ( + self.study_locus.assign_confidence().df.columns + == self.study_locus.df.columns + ["confidence"] + ) + + def test_confidence_flag_unknown_confidence(self: TestStudyLocusValidation) -> None: + """Testing if the confidence flagging adds a new column.""" + assert ( + self.study_locus.assign_confidence() + .df.filter( + f.col("confidence") == CredibleSetConfidenceClasses.UNKNOWN.value + ) + .count() + == 1 + ) + @pytest.mark.parametrize( ("test_pvalues", "flagged_count"), [(1e-5, 3), (1e-4, 2)], From 9177dd44d6ba4abe3ed010a5177de6e97504eb3a Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Wed, 2 Oct 2024 14:23:44 +0200 Subject: [PATCH 072/188] chore: make the lb clumping ingest the partitionned data (#806) Co-authored-by: Szymon Szyszkowski --- src/gentropy/locus_breaker_clumping.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gentropy/locus_breaker_clumping.py b/src/gentropy/locus_breaker_clumping.py index 7f3649097..fde722354 100644 --- a/src/gentropy/locus_breaker_clumping.py +++ b/src/gentropy/locus_breaker_clumping.py @@ -1,4 +1,4 @@ -"""Step to apply linkageg based clumping on study-locus dataset.""" +"""Step to apply linkage based clumping on study-locus dataset.""" from __future__ import annotations @@ -47,7 +47,8 @@ def __init__( remove_mhc (bool, optional): If true will use exclude_region() to remove the MHC region. """ sum_stats = SummaryStatistics.from_parquet( - session, summary_statistics_input_path, recursiveFileLookup=True + session, + summary_statistics_input_path, ) lbc = sum_stats.locus_breaker_clumping( lbc_baseline_pvalue, From 174f8f945b2b02016aae14c9d11100a577463d57 Mon Sep 17 00:00:00 2001 From: Yakov Date: Wed, 2 Oct 2024 13:54:54 +0100 Subject: [PATCH 073/188] feat: adding filtering to susie finemapper (#796) * feat: adding filltering to susie fine_mapper * fix: correct options * fix: fix simulations * fix: correct clumping * fix: fix options * fix: fix for catching None * fix: adding LD and gwas_df as output * chore: changing defaults * chore: v1 * fix: spelling in susie_finemapper.py --------- Co-authored-by: Daniel-Considine <113430683+Daniel-Considine@users.noreply.github.com> --- src/gentropy/config.py | 4 +- src/gentropy/finemapping_simulations.py | 65 +++++----- src/gentropy/susie_finemapper.py | 159 +++++++++++++++--------- tests/gentropy/method/test_susie_inf.py | 6 +- 4 files changed, 142 insertions(+), 92 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 33865d6ea..e1068cfa6 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -417,8 +417,7 @@ class FinemapperConfig(StepConfig): study_locus_manifest_path: str = MISSING study_locus_index: int = MISSING max_causal_snps: int = MISSING - primary_signal_pval_threshold: float = MISSING - secondary_signal_pval_threshold: float = MISSING + lead_pval_threshold: float = MISSING purity_mean_r2_threshold: float = MISSING purity_min_r2_threshold: float = MISSING cs_lbf_thr: float = MISSING @@ -430,6 +429,7 @@ class FinemapperConfig(StepConfig): carma_time_limit: int = MISSING imputed_r2_threshold: float = MISSING ld_score_threshold: float = MISSING + ld_min_r2: float = MISSING _target_: str = "gentropy.susie_finemapper.SusieFineMapperStep" diff --git a/src/gentropy/finemapping_simulations.py b/src/gentropy/finemapping_simulations.py index 66a8f7855..92b30566c 100644 --- a/src/gentropy/finemapping_simulations.py +++ b/src/gentropy/finemapping_simulations.py @@ -139,40 +139,47 @@ def SimulationLoop( imputed_r2_threshold=0.9, ld_score_threshold=5, sum_pips=0.99, - primary_signal_pval_threshold=1e-2, - secondary_signal_pval_threshold=1e-2, + lead_pval_threshold=1, purity_mean_r2_threshold=0, purity_min_r2_threshold=0, cs_lbf_thr=2, + ld_min_r2=0.9, + locusStart=1, + locusEnd=2, ) - cred_set = CS_sim["study_locus"].df - - X = ld_index_pd["variantId"][x_cycle["indexes"]].tolist() - - cred_set = cred_set.withColumn("exploded_locus", col("locus.variantId")) - # Create a condition for each element in X - conditions = [array_contains(col("exploded_locus"), x) for x in X] - # Combine the conditions using the | operator - combined_condition = conditions[0] - for condition in conditions[1:]: - combined_condition = combined_condition | condition - # Create a new column that is True if any condition is True and False otherwise - cred_set = cred_set.withColumn("is_in_X", combined_condition) - - cred_set = cred_set.withColumn( - "is_in_lead", when(col("variantId").isin(X), 1).otherwise(0) - ) - - cred_set = cred_set.toPandas() - cred_set = cred_set[column_list] - if counter == 1: - cred_sets = cred_set - else: - # cred_sets = cred_sets.unionByName(cred_set) - cred_sets = pd.concat([cred_sets, cred_set], axis=0) - # cred_sets=cred_sets.merge(cred_set) - counter = counter + 1 + if CS_sim is not None: + cs_sl = CS_sim["study_locus"] + cred_set = cs_sl.df + + X = ld_index_pd["variantId"][x_cycle["indexes"]].tolist() + + cred_set = cred_set.withColumn( + "exploded_locus", col("locus.variantId") + ) + # Create a condition for each element in X + conditions = [array_contains(col("exploded_locus"), x) for x in X] + # Combine the conditions using the | operator + combined_condition = conditions[0] + for condition in conditions[1:]: + combined_condition = combined_condition | condition + # Create a new column that is True if any condition is True and False otherwise + cred_set = cred_set.withColumn("is_in_X", combined_condition) + + cred_set = cred_set.withColumn( + "is_in_lead", when(col("variantId").isin(X), 1).otherwise(0) + ) + + cred_set = cred_set.toPandas() + cred_set = cred_set[column_list] + + if counter == 1: + cred_sets = cred_set + else: + # cred_sets = cred_sets.unionByName(cred_set) + cred_sets = pd.concat([cred_sets, cred_set], axis=0) + # cred_sets=cred_sets.merge(cred_set) + counter = counter + 1 return cred_sets diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index 26c73e20f..1379ad89b 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -11,7 +11,7 @@ import pyspark.sql.functions as f import scipy as sc from pyspark.sql import DataFrame, Row, Window -from pyspark.sql.functions import row_number +from pyspark.sql.functions import desc, row_number from pyspark.sql.types import ( DoubleType, IntegerType, @@ -47,10 +47,9 @@ def __init__( study_locus_manifest_path: str, study_locus_index: int, max_causal_snps: int = 10, - primary_signal_pval_threshold: float = 1, - secondary_signal_pval_threshold: float = 1, + lead_pval_threshold: float = 1e-5, purity_mean_r2_threshold: float = 0, - purity_min_r2_threshold: float = 0, + purity_min_r2_threshold: float = 0.25, cs_lbf_thr: float = 2, sum_pips: float = 0.99, susie_est_tausq: bool = False, @@ -60,6 +59,7 @@ def __init__( carma_tau: float = 0.15, imputed_r2_threshold: float = 0.9, ld_score_threshold: float = 5, + ld_min_r2: float = 0.8, ) -> None: """Run fine-mapping on a studyLocusId from a collected studyLocus table. @@ -69,8 +69,7 @@ def __init__( study_locus_manifest_path (str): Path to the CSV manifest containing all study locus input and output locations. Should contain two columns: study_locus_input and study_locus_output study_locus_index (int): Index (0-based) of the locus in the manifest to process in this call max_causal_snps (int): Maximum number of causal variants in locus, default is 10 - primary_signal_pval_threshold (float): p-value threshold for the lead variant from the primary signal (credibleSetIndex==1), default is 5e-8 - secondary_signal_pval_threshold (float): p-value threshold for the lead variant from the secondary signals, default is 1e-7 + lead_pval_threshold (float): p-value threshold for the lead variant from CS, default is 1e-5 purity_mean_r2_threshold (float): thrshold for purity mean r2 qc metrics for filtering credible sets, default is 0 purity_min_r2_threshold (float): thrshold for purity min r2 qc metrics for filtering credible sets, default is 0.25 cs_lbf_thr (float): credible set logBF threshold for filtering credible sets, default is 2 @@ -82,6 +81,7 @@ def __init__( carma_tau (float): CARMA tau, shrinkage parameter imputed_r2_threshold (float): imputed R2 threshold, default is 0.9 ld_score_threshold (float): LD score threshold ofr imputation, default is 5 + ld_min_r2 (float): Threshold to filter CS by leads in high LD, default is 0.8 """ # Read locus manifest. study_locus_manifest = pd.read_csv(study_locus_manifest_path) @@ -108,12 +108,11 @@ def __init__( study_locus_row=study_locus, study_index=study_index, max_causal_snps=max_causal_snps, - primary_signal_pval_threshold=primary_signal_pval_threshold, - secondary_signal_pval_threshold=secondary_signal_pval_threshold, purity_mean_r2_threshold=purity_mean_r2_threshold, purity_min_r2_threshold=purity_min_r2_threshold, cs_lbf_thr=cs_lbf_thr, sum_pips=sum_pips, + lead_pval_threshold=lead_pval_threshold, susie_est_tausq=susie_est_tausq, run_carma=run_carma, run_sumstat_imputation=run_sumstat_imputation, @@ -121,35 +120,39 @@ def __init__( carma_time_limit=carma_time_limit, imputed_r2_threshold=imputed_r2_threshold, ld_score_threshold=ld_score_threshold, + ld_min_r2=ld_min_r2, ) if result_logging is not None: - # Write result - result_logging["study_locus"].df.write.mode(session.write_mode).parquet( - study_locus_output - ) - # Write log - result_logging["log"].to_parquet( - study_locus_output + ".log", - engine="pyarrow", - index=False, - ) + if result_logging["study_locus"] is not None: + # Write result + result_logging["study_locus"].df.write.mode(session.write_mode).parquet( + study_locus_output + ) + # Write log + result_logging["log"].to_parquet( + study_locus_output + ".log", + engine="pyarrow", + index=False, + ) @staticmethod - def susie_inf_to_studylocus( + def susie_inf_to_studylocus( # noqa: C901 susie_output: dict[str, Any], session: Session, studyId: str, region: str, variant_index: DataFrame, ld_matrix: np.ndarray, + locusStart: int, + locusEnd: int, cs_lbf_thr: float = 2, sum_pips: float = 0.99, - primary_signal_pval_threshold: float = 1, - secondary_signal_pval_threshold: float = 1, + lead_pval_threshold: float = 1, purity_mean_r2_threshold: float = 0, purity_min_r2_threshold: float = 0, - ) -> StudyLocus: + ld_min_r2: float = 0.9, + ) -> StudyLocus | None: """Convert SuSiE-inf output to StudyLocus DataFrame. Args: @@ -159,15 +162,17 @@ def susie_inf_to_studylocus( region (str): region variant_index (DataFrame): DataFrame with variant information ld_matrix (np.ndarray): LD matrix used for fine-mapping + locusStart (int): locus start + locusEnd (int): locus end cs_lbf_thr (float): credible set logBF threshold for filtering credible sets, default is 2 sum_pips (float): the expected sum of posterior probabilities in the locus, default is 0.99 (99% credible set) - primary_signal_pval_threshold (float): p-value threshold for the lead variant from the primary signal (credibleSetIndex==1) - secondary_signal_pval_threshold (float): p-value threshold for the lead variant from the secondary signals + lead_pval_threshold (float): p-value threshold for the lead variant from CS purity_mean_r2_threshold (float): thrshold for purity mean r2 qc metrics for filtering credible sets purity_min_r2_threshold (float): thrshold for purity min r2 qc metrics for filtering credible sets + ld_min_r2 (float): Threshold to fillter CS by leads in high LD, default is 0.9 Returns: - StudyLocus: StudyLocus object with fine-mapped credible sets + StudyLocus | None: StudyLocus object with fine-mapped credible sets """ # PLEASE DO NOT REMOVE THIS LINE pd.DataFrame.iteritems = pd.DataFrame.items @@ -333,37 +338,63 @@ def susie_inf_to_studylocus( mantissa, exponent = neglog_pvalue_to_mantissa_and_exponent( cred_sets.neglogpval ) - cred_sets = cred_sets.withColumn("pValueMantissa", mantissa) cred_sets = cred_sets.withColumn("pValueExponent", exponent) - cred_sets = cred_sets.withColumn( "pValueMantissa", f.col("pValueMantissa").cast("float") ) + # Filter by lead p-value, credible set logBF, purity mean r2 and purity min r2 cred_sets = cred_sets.filter( - (f.col("neglogpval") >= -np.log10(secondary_signal_pval_threshold)) - | (f.col("credibleSetIndex") == 1) + (f.col("neglogpval") >= -np.log10(lead_pval_threshold)) + & (f.col("credibleSetlog10BF") >= cs_lbf_thr * 0.4342944819) + & (f.col("purityMinR2") >= purity_min_r2_threshold) + & (f.col("purityMeanR2") >= purity_mean_r2_threshold) ) - cred_sets = cred_sets.filter( - (f.col("neglogpval") >= -np.log10(primary_signal_pval_threshold)) - | (f.col("credibleSetIndex") > 1) - ) + if cred_sets.count() == 0: + return None + + # Remove duplicated by lead variant + if cred_sets.count() > 1: + window = Window.partitionBy("variantId").orderBy("credibleSetIndex") + cred_sets = cred_sets.withColumn("rank", row_number().over(window)) + cred_sets = cred_sets.filter(cred_sets["rank"] == 1).drop("rank") + cred_sets = cred_sets.orderBy("credibleSetIndex") + + # Remove CSs with high LD between leads + if cred_sets.count() > 1: + cred_sets = cred_sets.orderBy(desc("neglogpval")) + lead_variantId_list = ( + cred_sets.select("variantId").toPandas()["variantId"].tolist() + ) + vlist_series = pd.Series(lead_variantId_list) + ind = vlist_series.map( + variant_index_df.set_index("variantId").index.get_loc + ) + ld_leads = ld_matrix[ind, :][:, ind] + ld_leads = ld_leads**2 + ld_leads = ld_leads - np.tril(ld_leads) + np.fill_diagonal(ld_leads, -1) + + lead_variantId_list_to_delete: list[str] = [] + for idx in range(len(lead_variantId_list)): + vId = lead_variantId_list[idx] + if vId in lead_variantId_list_to_delete: + continue + high_ld_indices = np.where(ld_leads[idx, :] >= ld_min_r2)[0] + if len(high_ld_indices) > 0: + lead_variantId_list_to_delete = ( + lead_variantId_list_to_delete + + list(np.array(lead_variantId_list)[high_ld_indices]) + ) + if len(lead_variantId_list_to_delete) > 0: + for vId in lead_variantId_list_to_delete: + cred_sets = cred_sets.filter(f.col("variantId") != vId) cred_sets = cred_sets.drop("neglogpval") - - cred_sets = cred_sets.filter( - (f.col("credibleSetlog10BF") >= cs_lbf_thr * 0.4342944819) - | (f.col("credibleSetIndex") == 1) - ) - - cred_sets = cred_sets.filter(f.col("purityMeanR2") >= purity_mean_r2_threshold) - cred_sets = cred_sets.filter(f.col("purityMinR2") >= purity_min_r2_threshold) - - window = Window.partitionBy("studyLocusId").orderBy("credibleSetIndex") - cred_sets = cred_sets.withColumn("rank", row_number().over(window)) - cred_sets = cred_sets.filter(cred_sets["rank"] == 1).drop("rank") + cred_sets = cred_sets.withColumn("locusStart", f.lit(locusStart)) + cred_sets = cred_sets.withColumn("locusEnd", f.lit(locusEnd)) return StudyLocus( _df=cred_sets, @@ -379,6 +410,8 @@ def susie_finemapper_from_prepared_dataframes( session: Session, studyId: str, region: str, + locusStart: int, + locusEnd: int, susie_est_tausq: bool = False, run_carma: bool = False, run_sumstat_imputation: bool = False, @@ -387,12 +420,12 @@ def susie_finemapper_from_prepared_dataframes( imputed_r2_threshold: float = 0.8, ld_score_threshold: float = 4, sum_pips: float = 0.99, - primary_signal_pval_threshold: float = 5e-8, - secondary_signal_pval_threshold: float = 1e-7, + lead_pval_threshold: float = 1e-5, purity_mean_r2_threshold: float = 0, purity_min_r2_threshold: float = 0.25, cs_lbf_thr: float = 2, - ) -> dict[str, Any]: + ld_min_r2: float = 0.9, + ) -> dict[str, Any] | None: """Susie fine-mapper function that uses LD, z-scores, variant info and other options for Fine-Mapping. Args: @@ -403,6 +436,8 @@ def susie_finemapper_from_prepared_dataframes( session (Session): Spark session studyId (str): study ID region (str): region + locusStart (int): locus start + locusEnd (int): locus end susie_est_tausq (bool): estimate tau squared, default is False run_carma (bool): run CARMA, default is False run_sumstat_imputation (bool): run summary statistics imputation, default is False @@ -411,14 +446,14 @@ def susie_finemapper_from_prepared_dataframes( imputed_r2_threshold (float): imputed R2 threshold, default is 0.8 ld_score_threshold (float): LD score threshold ofr imputation, default is 4 sum_pips (float): the expected sum of posterior probabilities in the locus, default is 0.99 (99% credible set) - primary_signal_pval_threshold (float): p-value threshold for the lead variant from the primary signal (credibleSetIndex==1) - secondary_signal_pval_threshold (float): p-value threshold for the lead variant from the secondary signals + lead_pval_threshold (float): p-value threshold for the lead variant from CS, default is 1e-5 purity_mean_r2_threshold (float): thrshold for purity mean r2 qc metrics for filtering credible sets purity_min_r2_threshold (float): thrshold for purity min r2 qc metrics for filtering credible sets cs_lbf_thr (float): credible set logBF threshold for filtering credible sets, default is 2 + ld_min_r2 (float): Threshold to fillter CS by leads in high LD, default is 0.9 Returns: - dict[str, Any]: dictionary with study locus, number of GWAS variants, number of LD variants, number of variants after merge, number of outliers, number of imputed variants, number of variants to fine-map + dict[str, Any] | None: dictionary with study locus, number of GWAS variants, number of LD variants, number of variants after merge, number of outliers, number of imputed variants, number of variants to fine-map """ # PLEASE DO NOT REMOVE THIS LINE pd.DataFrame.iteritems = pd.DataFrame.items @@ -542,11 +577,13 @@ def susie_finemapper_from_prepared_dataframes( variant_index=variant_index, sum_pips=sum_pips, ld_matrix=ld_to_fm, - primary_signal_pval_threshold=primary_signal_pval_threshold, - secondary_signal_pval_threshold=secondary_signal_pval_threshold, + lead_pval_threshold=lead_pval_threshold, purity_mean_r2_threshold=purity_mean_r2_threshold, purity_min_r2_threshold=purity_min_r2_threshold, cs_lbf_thr=cs_lbf_thr, + ld_min_r2=ld_min_r2, + locusStart=locusStart, + locusEnd=locusEnd, ) end_time = time.time() @@ -568,6 +605,8 @@ def susie_finemapper_from_prepared_dataframes( return { "study_locus": study_locus, "log": log_df, + "LD": ld_to_fm, + "GWAS_df": GWAS_df, } @staticmethod @@ -584,11 +623,11 @@ def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries( imputed_r2_threshold: float = 0.9, ld_score_threshold: float = 5, sum_pips: float = 0.99, - primary_signal_pval_threshold: float = 5e-8, - secondary_signal_pval_threshold: float = 1e-7, + lead_pval_threshold: float = 1e-5, purity_mean_r2_threshold: float = 0, purity_min_r2_threshold: float = 0.25, cs_lbf_thr: float = 2, + ld_min_r2: float = 0.9, ) -> dict[str, Any] | None: """Susie fine-mapper function that uses study-locus row with collected locus, chromosome and position as inputs. @@ -605,11 +644,11 @@ def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries( imputed_r2_threshold (float): imputed R2 threshold, default is 0.8 ld_score_threshold (float): LD score threshold ofr imputation, default is 4 sum_pips (float): the expected sum of posterior probabilities in the locus, default is 0.99 (99% credible set) - primary_signal_pval_threshold (float): p-value threshold for the lead variant from the primary signal (credibleSetIndex==1) - secondary_signal_pval_threshold (float): p-value threshold for the lead variant from the secondary signals + lead_pval_threshold (float): p-value threshold for the lead variant from CS, default is 1e-5 purity_mean_r2_threshold (float): thrshold for purity mean r2 qc metrics for filtering credible sets purity_min_r2_threshold (float): thrshold for purity min r2 qc metrics for filtering credible sets cs_lbf_thr (float): credible set logBF threshold for filtering credible sets, default is 2 + ld_min_r2 (float): Threshold to fillter CS by leads in high LD, default is 0.9 Returns: dict[str, Any] | None: dictionary with study locus, number of GWAS variants, number of LD variants, number of variants after merge, number of outliers, number of imputed variants, number of variants to fine-map, or None @@ -796,6 +835,8 @@ def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries( session=session, studyId=studyId, region=region, + locusStart=int(locusStart), + locusEnd=int(locusEnd), susie_est_tausq=susie_est_tausq, run_carma=run_carma, run_sumstat_imputation=run_sumstat_imputation, @@ -804,11 +845,11 @@ def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries( imputed_r2_threshold=imputed_r2_threshold, ld_score_threshold=ld_score_threshold, sum_pips=sum_pips, - primary_signal_pval_threshold=primary_signal_pval_threshold, - secondary_signal_pval_threshold=secondary_signal_pval_threshold, + lead_pval_threshold=lead_pval_threshold, purity_mean_r2_threshold=purity_mean_r2_threshold, purity_min_r2_threshold=purity_min_r2_threshold, cs_lbf_thr=cs_lbf_thr, + ld_min_r2=ld_min_r2, ) return out diff --git a/tests/gentropy/method/test_susie_inf.py b/tests/gentropy/method/test_susie_inf.py index 91227af0f..4885a3d8a 100644 --- a/tests/gentropy/method/test_susie_inf.py +++ b/tests/gentropy/method/test_susie_inf.py @@ -82,10 +82,12 @@ def test_SUSIE_inf_convert_to_study_locus( variant_index=gwas_df, cs_lbf_thr=2, ld_matrix=ld, - primary_signal_pval_threshold=1, - secondary_signal_pval_threshold=1, + lead_pval_threshold=1, purity_mean_r2_threshold=0, purity_min_r2_threshold=0, sum_pips=0.99, + ld_min_r2=1, + locusStart=1, + locusEnd=2, ) assert isinstance(L1, StudyLocus), "L1 is not an instance of StudyLocus" From 25a4820bfd68b1536dd45db16a9cfac280e9e3d7 Mon Sep 17 00:00:00 2001 From: Yakov Date: Wed, 2 Oct 2024 16:32:13 +0100 Subject: [PATCH 074/188] feat: add sumstat QC fields to schema (#809) * feat: add sumstat QC fields to schema * fix: fix of type * fix: fix for struct --- src/gentropy/assets/schemas/study_index.json | 32 ++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/gentropy/assets/schemas/study_index.json b/src/gentropy/assets/schemas/study_index.json index e18401917..0fe5f253d 100644 --- a/src/gentropy/assets/schemas/study_index.json +++ b/src/gentropy/assets/schemas/study_index.json @@ -256,6 +256,38 @@ "type": "string", "nullable": true, "metadata": {} + }, + { + "name": "sumStatQCPerformed", + "type": "boolean", + "nullable": true, + "metadata": {} + }, + { + "name": "sumStatQCValues", + "type": { + "type": "array", + "elementType": { + "type": "struct", + "fields": [ + { + "name": "QCCheckName", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "QCCheckValue", + "type": "float", + "nullable": true, + "metadata": {} + } + ] + }, + "containsNull": true + }, + "nullable": true, + "metadata": {} } ] } From 70fd5931c02a30cdfe2df9382bf54d33be4f1e9d Mon Sep 17 00:00:00 2001 From: Tobi Alegbe Date: Thu, 3 Oct 2024 12:01:52 +0100 Subject: [PATCH 075/188] feat: fix biosample study validation (#810) * fix(biosample index): update biosample index validation step to only apply to non-gwas studies * fix(biosample index): revert to dropping * feat(biosample index): update test study index to test biosample index too * feat(biosample index): add biosampleId to study index schema --- src/gentropy/assets/schemas/study_index.json | 6 + src/gentropy/dataset/study_index.py | 6 +- src/gentropy/study_validation.py | 2 +- tests/gentropy/dataset/test_study_index.py | 112 ++++++++++++++----- 4 files changed, 92 insertions(+), 34 deletions(-) diff --git a/src/gentropy/assets/schemas/study_index.json b/src/gentropy/assets/schemas/study_index.json index 0fe5f253d..a2dac1bca 100644 --- a/src/gentropy/assets/schemas/study_index.json +++ b/src/gentropy/assets/schemas/study_index.json @@ -57,6 +57,12 @@ "nullable": true, "metadata": {} }, + { + "name": "biosampleId", + "type": "string", + "nullable": true, + "metadata": {} + }, { "name": "pubmedId", "type": "string", diff --git a/src/gentropy/dataset/study_index.py b/src/gentropy/dataset/study_index.py index 3f9b65097..143396894 100644 --- a/src/gentropy/dataset/study_index.py +++ b/src/gentropy/dataset/study_index.py @@ -417,7 +417,7 @@ def validate_biosample(self: StudyIndex, biosample_index: BiosampleIndex) -> Stu biosample_index (BiosampleIndex): Biosample index containing a reference of biosample identifiers e.g. cell types, tissues, cell lines, etc. Returns: - StudyIndex: with flagged studies if biosampleIndex could not be validated. + StudyIndex: where non-gwas studies are flagged if biosampleIndex could not be validated. """ biosample_set = biosample_index.df.select("biosampleId", f.lit(True).alias("isIdFound")) @@ -426,7 +426,7 @@ def validate_biosample(self: StudyIndex, biosample_index: BiosampleIndex) -> Stu .withColumn( "isIdFound", f.when( - f.col("isIdFound").isNull(), + (f.col("studyType") != "gwas") & (f.col("isIdFound").isNull()), f.lit(False), ).otherwise(f.lit(True)), ) @@ -438,7 +438,7 @@ def validate_biosample(self: StudyIndex, biosample_index: BiosampleIndex) -> Stu StudyQualityCheck.UNKNOWN_BIOSAMPLE, ), ) - .drop("isIdFound").drop("biosampleId") + .drop("isIdFound") ) return StudyIndex(_df=validated_df, _schema=StudyIndex.get_schema()) diff --git a/src/gentropy/study_validation.py b/src/gentropy/study_validation.py index e1337dd00..573298757 100644 --- a/src/gentropy/study_validation.py +++ b/src/gentropy/study_validation.py @@ -66,7 +66,7 @@ def __init__( .validate_study_type() # Flagging non-supported study types. .validate_target(target_index) # Flagging QTL studies with invalid targets .validate_disease(disease_index) # Flagging invalid EFOs - .validate_biosample(biosample_index) # Flagging studies with invalid biosamples + .validate_biosample(biosample_index) # Flagging QTL studies with invalid biosamples ).persist() # we will need this for 2 types of outputs study_index_with_qc.valid_rows( diff --git a/tests/gentropy/dataset/test_study_index.py b/tests/gentropy/dataset/test_study_index.py index fee3a2557..b4e092317 100644 --- a/tests/gentropy/dataset/test_study_index.py +++ b/tests/gentropy/dataset/test_study_index.py @@ -6,6 +6,7 @@ from pyspark.sql import DataFrame, SparkSession from pyspark.sql import functions as f +from gentropy.dataset.biosample_index import BiosampleIndex from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.study_index import StudyIndex @@ -143,55 +144,82 @@ def test_aggregate_samples_by_ancestry__correctness(spark: SparkSession) -> None ) -class TestGeneValidation: - """A small test suite to ensure the gene validation works as intended.""" +class TestQTLValidation: + """A small test suite to ensure the QTL study validation works as intended.""" GENE_DATA = [ ("ENSG00000102021", "1"), ("ENSG000001020", "1"), ] - GENE_COLUMNS = ["geneId", "chromosome"] + BIOSAMPLE_DATA = [("UBERON_00123", "lung"), ("CL_00321", "monocyte")] + BIOSAMPLE_COLUMNS = ["biosampleId", "biosampleName"] + STUDY_DATA = [ - ("s1", "eqtl", "p", "ENSG00000102021"), + ("s1", "eqtl", "p", "ENSG00000102021", "UBERON_00123"), # This is the only study to be flagged: QTL + Wrong gene - ("s2", "eqtl", "p", "cicaful"), - ("s3", "gwas", "p", None), - ("s4", "gwas", "p", "pocok"), + ("s2", "eqtl", "p", "cicaful", "UBERON_00123"), + # This is the only study to be flagged: QTL + Wrong biosample + ("s3", "sqtl", "p", "ENSG00000102021", "jibberish"), + ("s4", "gwas", "p", None, "anything"), + ("s5", "gwas", "p", "pocok", None), + ] + STUDY_COLUMNS = [ + "studyId", + "studyType", + "projectId", + "geneId", + "biosampleFromSourceId", ] - STUDY_COLUMNS = ["studyId", "studyType", "projectId", "geneId"] @pytest.fixture(autouse=True) - def _setup(self: TestGeneValidation, spark: SparkSession) -> None: + def _setup(self: TestQTLValidation, spark: SparkSession) -> None: """Setup fixture.""" - self.study_index = StudyIndex( - _df=spark.createDataFrame(self.STUDY_DATA, self.STUDY_COLUMNS).withColumn( - "qualityControls", f.array().cast("array") - ), - _schema=StudyIndex.get_schema(), - ) - self.study_index_no_gene = StudyIndex( - _df=spark.createDataFrame(self.STUDY_DATA, self.STUDY_COLUMNS) - .withColumn("qualityControls", f.array().cast("array")) - .drop("geneId"), - _schema=StudyIndex.get_schema(), - ) + def create_study_index(drop_column: str) -> StudyIndex: + df = spark.createDataFrame(self.STUDY_DATA, self.STUDY_COLUMNS) + df = df.withColumn("qualityControls", f.array().cast("array")) + if drop_column != "": + df = df.drop(drop_column) + return StudyIndex(_df=df, _schema=StudyIndex.get_schema()) + + self.study_index = create_study_index("") + self.study_index_no_gene = create_study_index("geneId") + self.study_index_no_biosample_id = create_study_index("biosampleId") self.gene_index = GeneIndex( _df=spark.createDataFrame(self.GENE_DATA, self.GENE_COLUMNS), _schema=GeneIndex.get_schema(), ) + self.biosample_index = BiosampleIndex( + _df=spark.createDataFrame(self.BIOSAMPLE_DATA, self.BIOSAMPLE_COLUMNS), + _schema=BiosampleIndex.get_schema(), + ) - def test_gene_validation_type(self: TestGeneValidation) -> None: - """Testing if the validation runs and returns the expected type.""" + def test_gene_validation_type(self: TestQTLValidation) -> None: + """Testing if the target validation runs and returns the expected type.""" validated = self.study_index.validate_target(self.gene_index) assert isinstance(validated, StudyIndex) - def test_gene_validation_correctness(self: TestGeneValidation) -> None: - """Testing if the gene validation only flags the expected studies.""" - validated = self.study_index.validate_target(self.gene_index).persist() + def test_biosample_validation_type(self: TestQTLValidation) -> None: + """Testing if the biosample validation runs and returns the expected type.""" + validated = self.study_index.validate_biosample(self.biosample_index) + assert isinstance(validated, StudyIndex) + + @pytest.mark.parametrize("gene_or_biosample", ["gene", "biosample"]) + def test_qtl_validation_correctness( + self: TestQTLValidation, gene_or_biosample: str + ) -> None: + """Testing if the QTL validation only flags the expected studies.""" + if gene_or_biosample == "gene": + validated = self.study_index.validate_target(self.gene_index).persist() + bad_study = "s2" + if gene_or_biosample == "biosample": + validated = self.study_index.validate_biosample( + self.biosample_index + ).persist() + bad_study = "s3" # Make sure there's only one flagged: assert validated.df.filter(f.size("qualityControls") != 0).count() == 1 @@ -201,11 +229,27 @@ def test_gene_validation_correctness(self: TestGeneValidation) -> None: 0 ]["studyId"] - assert flagged_study == "s2" + assert flagged_study == bad_study - def test_gene_validation_no_gene_column(self: TestGeneValidation) -> None: - """Testing what happens if no geneId column is present.""" - validated = self.study_index_no_gene.validate_target(self.gene_index) + def test_gene_validation_correctness(self: TestQTLValidation) -> None: + """Testing if the gene validation only flags the expected studies.""" + self.test_qtl_validation_correctness("gene") + + def test_biosample_validation_correctness(self: TestQTLValidation) -> None: + """Testing if the biosample validation only flags the expected studies.""" + self.test_qtl_validation_correctness("biosample") + + @pytest.mark.parametrize("gene_or_biosample", ["gene", "biosample"]) + def test_qtl_validation_no_relevant_column( + self: TestQTLValidation, gene_or_biosample: str + ) -> None: + """Testing what happens if no relevant column is present.""" + if gene_or_biosample == "gene": + validated = self.study_index_no_gene.validate_target(self.gene_index) + if gene_or_biosample == "biosample": + validated = self.study_index_no_biosample_id.validate_biosample( + self.biosample_index + ) # Asserty type: assert isinstance(validated, StudyIndex) @@ -213,6 +257,14 @@ def test_gene_validation_no_gene_column(self: TestGeneValidation) -> None: # Assert count: assert validated.df.count() == self.study_index.df.count() + def test_qtl_validation_no_gene_column(self: TestQTLValidation) -> None: + """Testing what happens if no gene column is present.""" + self.test_qtl_validation_no_relevant_column("gene") + + def test_qtl_validation_no_biosample_column(self: TestQTLValidation) -> None: + """Testing what happens if no biosample column is present.""" + self.test_qtl_validation_no_relevant_column("biosample") + class TestUniquenessValidation: """A small test suite to ensure the gene validation works as intended.""" From c286c3bb14224dd05ff21da7fec27ac07ffe1657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:44:36 +0100 Subject: [PATCH 076/188] refactor(vep_parser): store consequence to impact score as a project config (#811) * refactor: set variant functional consequence to pathogenicity score as class attribute * chore: drop `VariantIndex.get_most_severe_gene_consequence` * chore(VariantIndex): make `CONSEQUENCE_TO_PATHOGENICITY_SCORE` a class attribute * fix(vep): convert `id_to_score_map` to `label_to_score_map` * chore: remove comment * refactor: move `consequence_to_pathogenicity_score` to `VariantIndexConfig` --- .../data/variant_consequence_to_score.tsv | 46 ------------- src/gentropy/common/spark_helpers.py | 4 +- src/gentropy/config.py | 64 ++++++++++++++++++- src/gentropy/dataset/variant_index.py | 56 ++-------------- src/gentropy/datasource/ensembl/vep_parser.py | 30 ++++----- tests/gentropy/dataset/test_variant_index.py | 20 +----- 6 files changed, 84 insertions(+), 136 deletions(-) delete mode 100644 src/gentropy/assets/data/variant_consequence_to_score.tsv diff --git a/src/gentropy/assets/data/variant_consequence_to_score.tsv b/src/gentropy/assets/data/variant_consequence_to_score.tsv deleted file mode 100644 index 589a855e6..000000000 --- a/src/gentropy/assets/data/variant_consequence_to_score.tsv +++ /dev/null @@ -1,46 +0,0 @@ -variantFunctionalConsequenceId label score -SO_0001893 transcript_ablation 1.0 -ECO_0000205 curator_inference -SO_0002165 trinucleotide_repeat_expansion -SO_0001574 splice_acceptor_variant 1.0 -SO_0001575 splice_donor_variant 1.0 -SO_0001587 stop_gained 1.0 -SO_0001589 frameshift_variant 1.0 -SO_0002012 start_lost 1.0 -SO_0001578 stop_lost 1.0 -SO_0001889 transcript_amplification 1.0 -SO_0001894 regulatory_region_ablation 0.66 -SO_0001583 missense_variant 0.66 -SO_0001818 protein_altering_variant 0.66 -SO_0001821 inframe_insertion 0.66 -SO_0001822 inframe_deletion 0.66 -SO_0001582 initiator_codon_variant -SO_0001630 splice_region_variant 0.33 -SO_0001626 incomplete_terminal_codon_variant 0.33 -SO_0001567 stop_retained_variant 0.33 -SO_0001819 synonymous_variant 0.33 -SO_0002019 start_retained_variant 0.33 -SO_0001619 non_coding_transcript_variant 0.0 -SO_0001620 mature_miRNA_variant 0.0 -SO_0001621 NMD_transcript_variant 0.1 -SO_0001623 5_prime_UTR_variant 0.1 -SO_0001624 3_prime_UTR_variant 0.1 -SO_0001627 intron_variant 0.1 -SO_0001792 non_coding_transcript_exon_variant 0.0 -SO_0001580 coding_sequence_variant 0.0 -SO_0001566 regulatory_region_variant 0.0 -SO_0001631 upstream_gene_variant 0.0 -SO_0001632 downstream_gene_variant 0.0 -SO_0001782 TF_binding_site_variant 0.0 -SO_0001891 regulatory_region_amplification 0.0 -SO_0001892 TFBS_amplification 0.0 -SO_0001895 TFBS_ablation 0.0 -SO_0001906 feature_truncation 0.0 -SO_0001907 feature_elongation 0.0 -SO_0001628 intergenic_variant 0.0 -SO_0001060 sequence_variant -SO_0001825 conservative_inframe_deletion -SO_0001787 splice_donor_5th_base_variant 0.66 -SO_0002170 splice_donor_region_variant 0.33 -SO_0002169 splice_polypyrimidine_tract_variant 0.33 -SO_0001968 coding_transcript_variant 0.1 diff --git a/src/gentropy/common/spark_helpers.py b/src/gentropy/common/spark_helpers.py index 8f60956e7..fb13763b0 100644 --- a/src/gentropy/common/spark_helpers.py +++ b/src/gentropy/common/spark_helpers.py @@ -447,14 +447,14 @@ def order_array_of_structs_by_two_fields( ) -def map_column_by_dictionary(col: Column, mapping_dict: dict[str, str]) -> Column: +def map_column_by_dictionary(col: Column, mapping_dict: dict[str, Any]) -> Column: """Map column values to dictionary values by key. Missing consequence label will be converted to None, unmapped consequences will be mapped as None. Args: col (Column): Column containing labels to map. - mapping_dict (dict[str, str]): Dictionary with mapping key/value pairs. + mapping_dict (dict[str, Any]): Dictionary with mapping key/value pairs. Returns: Column: Column with mapped values. diff --git a/src/gentropy/config.py b/src/gentropy/config.py index e1068cfa6..532b76e22 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -2,7 +2,7 @@ import os from dataclasses import dataclass, field -from typing import Any, List +from typing import Any, ClassVar, List, TypedDict from hail import __file__ as hail_location from hydra.core.config_store import ConfigStore @@ -348,11 +348,73 @@ class GnomadVariantConfig(StepConfig): class VariantIndexConfig(StepConfig): """Variant index step configuration.""" + class _ConsequenceToPathogenicityScoreMap(TypedDict): + """Typing definition for CONSEQUENCE_TO_PATHOGENICITY_SCORE.""" + + id: str + label: str + score: float + session: SessionConfig = SessionConfig() vep_output_json_path: str = MISSING variant_index_path: str = MISSING gnomad_variant_annotations_path: str | None = None hash_threshold: int = 300 + consequence_to_pathogenicity_score: ClassVar[ + list[_ConsequenceToPathogenicityScoreMap] + ] = [ + {"id": "SO_0001575", "label": "splice_donor_variant", "score": 1.0}, + {"id": "SO_0001589", "label": "frameshift_variant", "score": 1.0}, + {"id": "SO_0001574", "label": "splice_acceptor_variant", "score": 1.0}, + {"id": "SO_0001587", "label": "stop_gained", "score": 1.0}, + {"id": "SO_0002012", "label": "start_lost", "score": 1.0}, + {"id": "SO_0001578", "label": "stop_lost", "score": 1.0}, + {"id": "SO_0001893", "label": "transcript_ablation", "score": 1.0}, + {"id": "SO_0001822", "label": "inframe_deletion", "score": 0.66}, + { + "id": "SO_0001818", + "label": "protein_altering_variant", + "score": 0.66, + }, + {"id": "SO_0001821", "label": "inframe_insertion", "score": 0.66}, + { + "id": "SO_0001787", + "label": "splice_donor_5th_base_variant", + "score": 0.66, + }, + {"id": "SO_0001583", "label": "missense_variant", "score": 0.66}, + {"id": "SO_0001567", "label": "stop_retained_variant", "score": 0.33}, + {"id": "SO_0001630", "label": "splice_region_variant", "score": 0.33}, + {"id": "SO_0002019", "label": "start_retained_variant", "score": 0.33}, + { + "id": "SO_0002169", + "label": "splice_polypyrimidine_tract_variant", + "score": 0.33, + }, + {"id": "SO_0001819", "label": "synonymous_variant", "score": 0.33}, + { + "id": "SO_0002170", + "label": "splice_donor_region_variant", + "score": 0.33, + }, + {"id": "SO_0001624", "label": "3_prime_UTR_variant", "score": 0.1}, + {"id": "SO_0001623", "label": "5_prime_UTR_variant", "score": 0.1}, + {"id": "SO_0001627", "label": "intron_variant", "score": 0.1}, + { + "id": "SO_0001619", + "label": "non_coding_transcript_variant", + "score": 0.0, + }, + {"id": "SO_0001580", "label": "coding_sequence_variant", "score": 0.0}, + {"id": "SO_0001632", "label": "downstream_gene_variant", "score": 0.0}, + {"id": "SO_0001631", "label": "upstream_gene_variant", "score": 0.0}, + { + "id": "SO_0001792", + "label": "non_coding_transcript_exon_variant", + "score": 0.0, + }, + {"id": "SO_0001620", "label": "mature_miRNA_variant", "score": 0.0}, + ] _target_: str = "gentropy.variant_index.VariantIndexStep" diff --git a/src/gentropy/dataset/variant_index.py b/src/gentropy/dataset/variant_index.py index 4d53d741a..7277d1fb5 100644 --- a/src/gentropy/dataset/variant_index.py +++ b/src/gentropy/dataset/variant_index.py @@ -11,7 +11,6 @@ from gentropy.common.schemas import parse_spark_schema from gentropy.common.spark_helpers import ( get_nested_struct_schema, - get_record_with_maximum_value, rename_all_columns, safe_array_union, ) @@ -22,7 +21,6 @@ from pyspark.sql.types import StructType - @dataclass class VariantIndex(Dataset): """Dataset for representing variants and methods applied on them.""" @@ -130,7 +128,6 @@ def add_annotation( # Prefix for renaming columns: prefix = "annotation_" - # Generate select expressions that to merge and import columns from annotation: select_expressions = [] @@ -146,9 +143,13 @@ def add_annotation( if isinstance(field.dataType.elementType, t.StructType): # Extract the schema of the array to get the order of the fields: array_schema = [ - field for field in VariantIndex.get_schema().fields if field.name == column + field + for field in VariantIndex.get_schema().fields + if field.name == column ][0].dataType - fields_order = get_nested_struct_schema(array_schema).fieldNames() + fields_order = get_nested_struct_schema( + array_schema + ).fieldNames() select_expressions.append( safe_array_union( f.col(column), f.col(f"{prefix}{column}"), fields_order @@ -286,48 +287,3 @@ def get_loftee(self: VariantIndex) -> DataFrame: "isHighQualityPlof", ) ) - - def get_most_severe_gene_consequence( - self: VariantIndex, - *, - vep_consequences: DataFrame, - ) -> DataFrame: - """Returns a dataframe with the most severe consequence for a variant/gene pair. - - Args: - vep_consequences (DataFrame): A dataframe of VEP consequences - - Returns: - DataFrame: A dataframe with the most severe consequence (plus a severity score) for a variant/gene pair - """ - return ( - self.df.select("variantId", f.explode("transcriptConsequences").alias("tc")) - .select( - "variantId", - f.col("tc.targetId"), - f.explode(f.col("tc.variantFunctionalConsequenceIds")).alias( - "variantFunctionalConsequenceId" - ), - ) - .join( - # TODO: make this table a project config - f.broadcast( - vep_consequences.selectExpr( - "variantFunctionalConsequenceId", "score as severityScore" - ) - ), - on="variantFunctionalConsequenceId", - how="inner", - ) - .filter(f.col("severityScore").isNull()) - .transform( - # A variant can have multiple predicted consequences on a transcript, the most severe one is selected - lambda df: get_record_with_maximum_value( - df, ["variantId", "targetId"], "severityScore" - ) - ) - .withColumnRenamed( - "variantFunctionalConsequenceId", - "mostSevereVariantFunctionalConsequenceId", - ) - ) diff --git a/src/gentropy/datasource/ensembl/vep_parser.py b/src/gentropy/datasource/ensembl/vep_parser.py index e3e36140d..6931b96de 100644 --- a/src/gentropy/datasource/ensembl/vep_parser.py +++ b/src/gentropy/datasource/ensembl/vep_parser.py @@ -2,15 +2,12 @@ from __future__ import annotations -import importlib.resources as pkg_resources from typing import TYPE_CHECKING -import pandas as pd from pyspark.sql import SparkSession from pyspark.sql import functions as f from pyspark.sql import types as t -from gentropy.assets import data from gentropy.common.schemas import parse_spark_schema from gentropy.common.spark_helpers import ( enforce_schema, @@ -24,9 +21,12 @@ if TYPE_CHECKING: from pyspark.sql import Column, DataFrame +from gentropy.config import VariantIndexConfig + class VariantEffectPredictorParser: """Collection of methods to parse VEP output in json format.""" + # NOTE: Due to the fact that the comparison of the xrefs is done om the base of rsids # if the field `colocalised_variants` have multiple rsids, this extracting xrefs will result in # an array of xref structs, rather then the struct itself. @@ -568,22 +568,16 @@ def process_vep_output( Returns: DataFrame: processed data in the right shape. """ - so_df = pd.read_csv( - pkg_resources.open_text( - data, "variant_consequence_to_score.tsv", encoding="utf-8" - ), - sep="\t", - ) - - # Reading consequence to sequence ontology map: + # Consequence to sequence ontology map: sequence_ontology_map = { - row["label"]: row["variantFunctionalConsequenceId"] - for _, row in so_df.iterrows() + item["label"]: item["id"] + for item in VariantIndexConfig.consequence_to_pathogenicity_score + } + # Sequence ontology to score map: + label_to_score_map = { + item["label"]: item["score"] + for item in VariantIndexConfig.consequence_to_pathogenicity_score } - - # Reading score dictionary: - score_dictionary = {row["label"]: row["score"] for _, row in so_df.iterrows()} - # Processing VEP output: return ( vep_output @@ -694,7 +688,7 @@ def process_vep_output( f.transform( transcript.consequence_terms, lambda term: map_column_by_dictionary( - term, score_dictionary + term, label_to_score_map ), ) ) diff --git a/tests/gentropy/dataset/test_variant_index.py b/tests/gentropy/dataset/test_variant_index.py index 29a6ef035..15b102415 100644 --- a/tests/gentropy/dataset/test_variant_index.py +++ b/tests/gentropy/dataset/test_variant_index.py @@ -11,7 +11,7 @@ from gentropy.dataset.variant_index import VariantIndex if TYPE_CHECKING: - from pyspark.sql import DataFrame, SparkSession + from pyspark.sql import SparkSession def test_variant_index_creation(mock_variant_index: VariantIndex) -> None: @@ -144,24 +144,6 @@ def test_get_distance_to_gene( for col in expected_cols: assert col in observed.columns, f"Column {col} not in {observed.columns}" - def test_get_most_severe_gene_consequence( - self: TestVariantIndex, - mock_variant_index: VariantIndex, - mock_variant_consequence_to_score: DataFrame, - ) -> None: - """Assert that the function returns a df with the requested columns.""" - expected_cols = [ - "variantId", - "targetId", - "mostSevereVariantFunctionalConsequenceId", - "severityScore", - ] - observed = mock_variant_index.get_most_severe_gene_consequence( - vep_consequences=mock_variant_consequence_to_score - ) - for col in expected_cols: - assert col in observed.columns, f"Column {col} not in {observed.columns}" - def test_get_loftee( self: TestVariantIndex, mock_variant_index: VariantIndex ) -> None: From 8876fc1db5aaeb41d1e961ca24aaac57d17c1a2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Thu, 3 Oct 2024 17:33:42 +0100 Subject: [PATCH 077/188] feat(l2g): implement variant consequence features from VEP (#805) * feat(variant_index): set variant functional consequence to pathogenicity score as class attribute * feat: add and test vep features * feat: add and test vep neighbourhood features * feat(vep_features): use only protein coding genes to look in the vicinity * chore: update feature list and other bits * fix: docs ref * refactor: set variant functional consequence to pathogenicity score as class attribute * chore: drop `VariantIndex.get_most_severe_gene_consequence` * chore(VariantIndex): make `CONSEQUENCE_TO_PATHOGENICITY_SCORE` a class attribute * fix(vep): convert `id_to_score_map` to `label_to_score_map` * chore: remove comment * chore: access max consequence score from variant index --- docs/python_api/datasets/l2g_features/vep.md | 15 + src/gentropy/config.py | 6 + src/gentropy/dataset/l2g_features/vep.py | 274 +++++++++++++++++++ src/gentropy/dataset/variant_index.py | 2 +- src/gentropy/l2g.py | 9 + src/gentropy/method/l2g/feature_factory.py | 10 + tests/gentropy/conftest.py | 13 - tests/gentropy/dataset/test_l2g_feature.py | 254 +++++++++++++++++ 8 files changed, 569 insertions(+), 14 deletions(-) create mode 100644 docs/python_api/datasets/l2g_features/vep.md create mode 100644 src/gentropy/dataset/l2g_features/vep.py diff --git a/docs/python_api/datasets/l2g_features/vep.md b/docs/python_api/datasets/l2g_features/vep.md new file mode 100644 index 000000000..d2715e977 --- /dev/null +++ b/docs/python_api/datasets/l2g_features/vep.md @@ -0,0 +1,15 @@ +--- +title: From VEP +--- + +## List of features + +::: gentropy.dataset.l2g_features.vep.VepMeanFeature +::: gentropy.dataset.l2g_features.vep.VepMeanNeighbourhoodFeature +::: gentropy.dataset.l2g_features.vep.VepMaximumFeature +::: gentropy.dataset.l2g_features.vep.VepMaximumNeighbourhoodFeature + +## Common logic + +::: gentropy.dataset.l2g_features.vep.common_vep_feature_logic +::: gentropy.dataset.l2g_features.vep.common_neighbourhood_vep_feature_logic diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 532b76e22..6faf59602 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -238,6 +238,7 @@ class LocusToGeneConfig(StepConfig): variant_index_path: str = MISSING colocalisation_path: str = MISSING study_index_path: str = MISSING + gene_index_path: str = MISSING model_path: str | None = None feature_matrix_path: str | None = None gold_standard_curation_path: str | None = None @@ -264,6 +265,11 @@ class LocusToGeneConfig(StepConfig): "distanceTssMeanNeighbourhood", "distanceSentinelTss", "distanceSentinelTssNeighbourhood", + # vep + "vepMaximum", + "vepMaximumNeighbourhood", + "vepMean", + "vepMeanNeighbourhood", ] ) hyperparameters: dict[str, Any] = field( diff --git a/src/gentropy/dataset/l2g_features/vep.py b/src/gentropy/dataset/l2g_features/vep.py new file mode 100644 index 000000000..13ac05f91 --- /dev/null +++ b/src/gentropy/dataset/l2g_features/vep.py @@ -0,0 +1,274 @@ +"""Collection of methods that extract distance features from the variant index dataset.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pyspark.sql.functions as f +from pyspark.sql import Window + +from gentropy.common.spark_helpers import convert_from_wide_to_long +from gentropy.dataset.gene_index import GeneIndex +from gentropy.dataset.l2g_features.l2g_feature import L2GFeature +from gentropy.dataset.l2g_gold_standard import L2GGoldStandard +from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.variant_index import VariantIndex + +if TYPE_CHECKING: + from pyspark.sql import DataFrame + + +def common_vep_feature_logic( + study_loci_to_annotate: L2GGoldStandard | StudyLocus, + *, + variant_index: VariantIndex, + feature_name: str, +) -> DataFrame: + """Extracts variant severity score computed from VEP. + + Args: + study_loci_to_annotate (L2GGoldStandard | StudyLocus): The dataset containing study loci that will be used for annotation + variant_index (VariantIndex): The dataset containing functional consequence information + feature_name (str): The name of the feature + + Returns: + DataFrame: Feature dataset + """ + # Variant/Target/Severity dataframe + consequences_dataset = variant_index.df.withColumn( + "transcriptConsequence", f.explode("transcriptConsequences") + ).select( + "variantId", + f.col("transcriptConsequence.targetId").alias("geneId"), + f.col("transcriptConsequence.consequenceScore").alias("severityScore"), + ) + if isinstance(study_loci_to_annotate, StudyLocus): + variants_df = ( + study_loci_to_annotate.df.withColumn( + "variantInLocus", f.explode_outer("locus") + ) + .select( + "studyLocusId", + f.col("variantInLocus.variantId").alias("variantId"), + f.col("variantInLocus.posteriorProbability").alias( + "posteriorProbability" + ), + ) + .join(consequences_dataset, "variantId") + ) + elif isinstance(study_loci_to_annotate, L2GGoldStandard): + variants_df = study_loci_to_annotate.df.select( + "studyLocusId", "variantId", f.lit(1.0).alias("posteriorProbability") + ).join(consequences_dataset, "variantId") + + if "Maximum" in feature_name: + agg_expr = f.max("severityScore") + elif "Mean" in feature_name: + variants_df = variants_df.withColumn( + "weightedScore", f.col("severityScore") * f.col("posteriorProbability") + ) + agg_expr = f.mean("weightedScore") + return variants_df.groupBy("studyLocusId", "geneId").agg( + agg_expr.alias(feature_name) + ) + + +def common_neighbourhood_vep_feature_logic( + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + *, + variant_index: VariantIndex, + gene_index: GeneIndex, + feature_name: str, +) -> DataFrame: + """Extracts variant severity score computed from VEP for any gene, based on what is the mean score for protein coding genes that are nearby the locus. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + variant_index (VariantIndex): The dataset containing functional consequence information + gene_index (GeneIndex): The dataset containing the gene biotype + feature_name (str): The name of the feature + + Returns: + DataFrame: Feature dataset + """ + local_feature_name = feature_name.replace("Neighbourhood", "") + # First compute mean distances to a gene + local_metric = common_vep_feature_logic( + study_loci_to_annotate, + feature_name=local_feature_name, + variant_index=variant_index, + ) + return ( + # Then compute mean distance in the vicinity (feature will be the same for any gene associated with a studyLocus) + local_metric.join( + # Bring gene classification + gene_index.df.select("geneId", "biotype"), + "geneId", + "inner", + ) + .withColumn( + "regional_metric", + f.coalesce( + # Calculate mean based on protein coding genes + f.mean( + f.when( + f.col("biotype") == "protein_coding", f.col(local_feature_name) + ) + ).over(Window.partitionBy("studyLocusId")), + # Default to 0 if there are no protein coding genes + f.lit(0), + ), + ) + .withColumn(feature_name, f.col(local_feature_name) - f.col("regional_metric")) + .drop("regional_metric", local_feature_name, "biotype") + ) + + +class VepMaximumFeature(L2GFeature): + """Maximum functional consequence score among all variants in a credible set for a studyLocus/gene.""" + + feature_dependency_type = VariantIndex + feature_name = "vepMaximum" + + @classmethod + def compute( + cls: type[VepMaximumFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> VepMaximumFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset that contains the functional consequence information + + Returns: + VepMaximumFeature: Feature dataset + """ + return cls( + _df=convert_from_wide_to_long( + common_vep_feature_logic( + study_loci_to_annotate=study_loci_to_annotate, + feature_name=cls.feature_name, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class VepMaximumNeighbourhoodFeature(L2GFeature): + """Maximum functional consequence score among all variants in a credible set for a studyLocus/gene relative to the mean VEP score across all protein coding genes in the vicinity.""" + + feature_dependency_type = [VariantIndex, GeneIndex] + feature_name = "vepMaximumNeighbourhood" + + @classmethod + def compute( + cls: type[VepMaximumNeighbourhoodFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> VepMaximumNeighbourhoodFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset that contains the functional consequence information + + Returns: + VepMaximumNeighbourhoodFeature: Feature dataset + """ + return cls( + _df=convert_from_wide_to_long( + common_neighbourhood_vep_feature_logic( + study_loci_to_annotate, + feature_name=cls.feature_name, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class VepMeanFeature(L2GFeature): + """Average functional consequence score among all variants in a credible set for a studyLocus/gene. + + The mean severity score is weighted by the posterior probability of each variant. + """ + + feature_dependency_type = VariantIndex + feature_name = "vepMean" + + @classmethod + def compute( + cls: type[VepMeanFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> VepMeanFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset that contains the functional consequence information + + Returns: + VepMeanFeature: Feature dataset + """ + return cls( + _df=convert_from_wide_to_long( + common_vep_feature_logic( + study_loci_to_annotate=study_loci_to_annotate, + feature_name=cls.feature_name, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class VepMeanNeighbourhoodFeature(L2GFeature): + """Mean functional consequence score among all variants in a credible set for a studyLocus/gene relative to the mean VEP score across all protein coding genes in the vicinity. + + The mean severity score is weighted by the posterior probability of each variant. + """ + + feature_dependency_type = [VariantIndex, GeneIndex] + feature_name = "vepMeanNeighbourhood" + + @classmethod + def compute( + cls: type[VepMeanNeighbourhoodFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> VepMeanNeighbourhoodFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset that contains the functional consequence information + + Returns: + VepMeanNeighbourhoodFeature: Feature dataset + """ + return cls( + _df=convert_from_wide_to_long( + common_neighbourhood_vep_feature_logic( + study_loci_to_annotate, + feature_name=cls.feature_name, + **feature_dependency, + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) diff --git a/src/gentropy/dataset/variant_index.py b/src/gentropy/dataset/variant_index.py index 7277d1fb5..a1a2e2a4d 100644 --- a/src/gentropy/dataset/variant_index.py +++ b/src/gentropy/dataset/variant_index.py @@ -1,4 +1,4 @@ -"""Dataset definition for variant annotation.""" +"""Dataset definition for variant index.""" from __future__ import annotations diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 9b9b7aa90..7962da484 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -12,6 +12,7 @@ from gentropy.common.utils import access_gcp_secret from gentropy.config import LocusToGeneConfig from gentropy.dataset.colocalisation import Colocalisation +from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix from gentropy.dataset.l2g_gold_standard import L2GGoldStandard from gentropy.dataset.l2g_prediction import L2GPrediction @@ -41,6 +42,7 @@ def __init__( variant_index_path: str | None = None, colocalisation_path: str | None = None, study_index_path: str | None = None, + gene_index_path: str | None = None, gene_interactions_path: str | None = None, predictions_path: str | None = None, feature_matrix_path: str | None = None, @@ -62,6 +64,7 @@ def __init__( variant_index_path (str | None): Path to the variant index dataset colocalisation_path (str | None): Path to the colocalisation dataset study_index_path (str | None): Path to the study index dataset + gene_index_path (str | None): Path to the gene index dataset gene_interactions_path (str | None): Path to the gene interactions dataset predictions_path (str | None): Path to the L2G predictions output dataset feature_matrix_path (str | None): Path to the L2G feature matrix output dataset @@ -108,11 +111,17 @@ def __init__( if colocalisation_path else None ) + self.gene_index = ( + GeneIndex.from_parquet(session, gene_index_path, recursiveFileLookup=True) + if gene_index_path + else None + ) self.features_input_loader = L2GFeatureInputLoader( variant_index=self.variant_index, coloc=self.coloc, studies=self.studies, study_locus=self.credible_set, + gene_index=self.gene_index, ) if run_mode == "predict": diff --git a/src/gentropy/method/l2g/feature_factory.py b/src/gentropy/method/l2g/feature_factory.py index 41084277f..41db44806 100644 --- a/src/gentropy/method/l2g/feature_factory.py +++ b/src/gentropy/method/l2g/feature_factory.py @@ -33,6 +33,12 @@ DistanceTssMeanNeighbourhoodFeature, ) from gentropy.dataset.l2g_features.l2g_feature import L2GFeature +from gentropy.dataset.l2g_features.vep import ( + VepMaximumFeature, + VepMaximumNeighbourhoodFeature, + VepMeanFeature, + VepMeanNeighbourhoodFeature, +) from gentropy.dataset.l2g_gold_standard import L2GGoldStandard from gentropy.dataset.study_locus import StudyLocus @@ -117,6 +123,10 @@ class FeatureFactory: "sQtlColocH4MaximumNeighbourhood": SQtlColocH4MaximumNeighbourhoodFeature, "tuQtlColocH4Maximum": TuQtlColocH4MaximumFeature, "tuQtlColocH4MaximumNeighbourhood": TuQtlColocH4MaximumNeighbourhoodFeature, + "vepMean": VepMeanFeature, + "vepMeanNeighbourhood": VepMeanNeighbourhoodFeature, + "vepMaximum": VepMaximumFeature, + "vepMaximumNeighbourhood": VepMaximumNeighbourhoodFeature, } def __init__( diff --git a/tests/gentropy/conftest.py b/tests/gentropy/conftest.py index a70c1a87d..f977e3f74 100644 --- a/tests/gentropy/conftest.py +++ b/tests/gentropy/conftest.py @@ -250,19 +250,6 @@ def mock_intervals(spark: SparkSession) -> Intervals: return Intervals(_df=data_spec.build(), _schema=interval_schema) -@pytest.fixture() -def mock_variant_consequence_to_score(spark: SparkSession) -> DataFrame: - """Slice of the VEP consequence to score table.""" - return spark.createDataFrame( - [ - ("SO_0001893", "transcript_ablation", 1.0), - ("SO_0001822", "inframe_deletion", 0.66), - ("SO_0001567", "stop_retained_variant", 0.33), - ], - ["variantFunctionalConsequenceId", "label", "score"], - ) - - @pytest.fixture() def mock_variant_index(spark: SparkSession) -> VariantIndex: """Mock variant index.""" diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py index 18d8a4066..c674280ac 100644 --- a/tests/gentropy/dataset/test_l2g_feature.py +++ b/tests/gentropy/dataset/test_l2g_feature.py @@ -9,6 +9,7 @@ from pyspark.sql.types import ( ArrayType, BooleanType, + FloatType, IntegerType, LongType, StringType, @@ -17,6 +18,7 @@ ) from gentropy.dataset.colocalisation import Colocalisation +from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.l2g_features.colocalisation import ( EQtlColocClppMaximumFeature, EQtlColocClppMaximumNeighbourhoodFeature, @@ -50,6 +52,14 @@ common_neighbourhood_distance_feature_logic, ) from gentropy.dataset.l2g_features.l2g_feature import L2GFeature +from gentropy.dataset.l2g_features.vep import ( + VepMaximumFeature, + VepMaximumNeighbourhoodFeature, + VepMeanFeature, + VepMeanNeighbourhoodFeature, + common_neighbourhood_vep_feature_logic, + common_vep_feature_logic, +) from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus from gentropy.dataset.variant_index import VariantIndex @@ -86,6 +96,10 @@ DistanceSentinelTssNeighbourhoodFeature, DistanceSentinelFootprintFeature, DistanceSentinelFootprintNeighbourhoodFeature, + VepMaximumFeature, + VepMeanFeature, + VepMaximumNeighbourhoodFeature, + VepMeanNeighbourhoodFeature, ], ) def test_feature_factory_return_type( @@ -94,6 +108,7 @@ def test_feature_factory_return_type( mock_colocalisation: Colocalisation, mock_study_index: StudyIndex, mock_variant_index: VariantIndex, + mock_gene_index: GeneIndex, ) -> None: """Test that every feature factory returns a L2GFeature dataset.""" loader = L2GFeatureInputLoader( @@ -101,6 +116,7 @@ def test_feature_factory_return_type( study_index=mock_study_index, variant_index=mock_variant_index, study_locus=mock_study_locus, + gene_index=mock_gene_index, ) feature_dataset = feature_class.compute( study_loci_to_annotate=mock_study_locus, @@ -486,3 +502,241 @@ def _setup(self: TestCommonDistanceFeatureLogic, spark: SparkSession) -> None: ), _schema=VariantIndex.get_schema(), ) + + +class TestCommonVepFeatureLogic: + """Test the common_vep_feature_logic methods.""" + + @pytest.mark.parametrize( + ("feature_name", "expected_data"), + [ + ( + "vepMean", + [ + { + "studyLocusId": "1", + "geneId": "gene1", + "vepMean": "0.33", + }, + { + "studyLocusId": "1", + "geneId": "gene2", + "vepMean": "0.50", + }, + ], + ), + ( + "vepMaximum", + [ + { + "studyLocusId": "1", + "geneId": "gene1", + "vepMaximum": "0.66", + }, + { + "studyLocusId": "1", + "geneId": "gene2", + "vepMaximum": "1.00", + }, + ], + ), + ], + ) + def test_common_vep_feature_logic( + self: TestCommonVepFeatureLogic, + spark: SparkSession, + feature_name: str, + expected_data: dict[str, Any], + ) -> None: + """Test the logic of the function that extracts features from VEP's functional consequences.""" + observed_df = ( + common_vep_feature_logic( + self.sample_study_locus, + variant_index=self.sample_variant_index, + feature_name=feature_name, + ) + .orderBy(feature_name) + .withColumn( + feature_name, f.format_number(f.round(f.col(feature_name), 2), 2) + ) + ) + expected_df = ( + spark.createDataFrame(expected_data) + .orderBy(feature_name) + .select("studyLocusId", "geneId", feature_name) + ) + assert ( + observed_df.collect() == expected_df.collect() + ), f"Expected and observed dataframes are not equal for feature {feature_name}." + + def test_common_neighbourhood_vep_feature_logic_no_protein_coding( + self: TestCommonVepFeatureLogic, + spark: SparkSession, + ) -> None: + """Test the logic of the function that extracts the maximum severity score for a gene given the average of the maximum scores for all protein coding genes in the vicinity. + + Because the genes in the vicinity are all non coding, the neighbourhood features should equal the local ones. + """ + feature_name = "vepMaximumNeighbourhood" + sample_gene_index = GeneIndex( + _df=spark.createDataFrame( + [ + { + "geneId": "gene1", + "biotype": "lncRNA", + "chromosome": "1", + }, + { + "geneId": "gene2", + "biotype": "lncRNA", + "chromosome": "1", + }, + ], + GeneIndex.get_schema(), + ), + _schema=GeneIndex.get_schema(), + ) + observed_df = ( + common_neighbourhood_vep_feature_logic( + self.sample_study_locus, + variant_index=self.sample_variant_index, + gene_index=sample_gene_index, + feature_name=feature_name, + ) + .withColumn(feature_name, f.round(f.col(feature_name), 2)) + .orderBy(f.col(feature_name).asc()) + .select("studyLocusId", "geneId", feature_name) + ) + expected_df = ( + spark.createDataFrame( + (["1", "gene1", 0.66], ["1", "gene2", 1.0]), + ["studyLocusId", "geneId", feature_name], + ) + .orderBy(feature_name) + .select("studyLocusId", "geneId", feature_name) + ) + assert ( + observed_df.collect() == expected_df.collect() + ), "Output doesn't meet the expectation." + + def test_common_neighbourhood_vep_feature_logic( + self: TestCommonVepFeatureLogic, + spark: SparkSession, + ) -> None: + """Test the logic of the function that extracts the maximum severity score for a gene given the average of the maximum scores for all protein coding genes in the vicinity.""" + feature_name = "vepMaximumNeighbourhood" + sample_gene_index = GeneIndex( + _df=spark.createDataFrame( + [ + { + "geneId": "gene1", + "biotype": "protein_coding", + "chromosome": "1", + }, + { + "geneId": "gene2", + "biotype": "lncRNA", + "chromosome": "1", + }, + ], + GeneIndex.get_schema(), + ), + _schema=GeneIndex.get_schema(), + ) + observed_df = ( + common_neighbourhood_vep_feature_logic( + self.sample_study_locus, + variant_index=self.sample_variant_index, + gene_index=sample_gene_index, + feature_name=feature_name, + ) + .withColumn(feature_name, f.round(f.col(feature_name), 2)) + .orderBy(f.col(feature_name).asc()) + ) + expected_df = ( + spark.createDataFrame( + (["1", "gene1", 0.0], ["1", "gene2", 0.34]), + ["studyLocusId", "geneId", feature_name], + ) + .select("studyLocusId", "geneId", feature_name) + .orderBy(feature_name) + ) + assert ( + observed_df.collect() == expected_df.collect() + ), "Output doesn't meet the expectation." + + @pytest.fixture(autouse=True) + def _setup(self: TestCommonVepFeatureLogic, spark: SparkSession) -> None: + """Set up testing fixtures.""" + self.sample_study_locus = StudyLocus( + _df=spark.createDataFrame( + [ + { + "studyLocusId": "1", + "variantId": "var1", + "studyId": "study1", + "locus": [ + { + "variantId": "var1", + "posteriorProbability": 0.5, + }, + ], + "chromosome": "1", + }, + ], + StudyLocus.get_schema(), + ), + _schema=StudyLocus.get_schema(), + ) + self.sample_variant_index = VariantIndex( + _df=spark.createDataFrame( + [ + ( + "var1", + "chrom", + 1, + "A", + "T", + [ + { + "targetId": "gene1", + "consequenceScore": 0.66, + "isEnsemblCanonical": True, + }, + { + "targetId": "gene2", + "consequenceScore": 1.0, + "isEnsemblCanonical": True, + }, + ], + ), + ], + schema=StructType( + [ + StructField("variantId", StringType(), True), + StructField("chromosome", StringType(), True), + StructField("position", IntegerType(), True), + StructField("referenceAllele", StringType(), True), + StructField("alternateAllele", StringType(), True), + StructField( + "transcriptConsequences", + ArrayType( + StructType( + [ + StructField("targetId", StringType(), True), + StructField( + "isEnsemblCanonical", BooleanType(), True + ), + StructField( + "consequenceScore", FloatType(), True + ), + ] + ) + ), + True, + ), + ] + ), + ), + _schema=VariantIndex.get_schema(), + ) From fca55beb702250db1d8e4da893482d3dbb3fdaa5 Mon Sep 17 00:00:00 2001 From: Yakov Date: Thu, 3 Oct 2024 20:53:49 +0100 Subject: [PATCH 078/188] feat: optimisation of qc step (#813) * feat: optimisation of qc step * fix: adding Z2 filter * fix: v1 --- src/gentropy/config.py | 6 ++-- .../method/sumstat_quality_controls.py | 31 ++++--------------- src/gentropy/sumstat_qc_step.py | 2 +- tests/gentropy/method/test_qc_of_sumstats.py | 4 +-- 4 files changed, 13 insertions(+), 30 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 6faf59602..3ed439333 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -502,7 +502,7 @@ class FinemapperConfig(StepConfig): @dataclass -class GWASQCStep(StepConfig): +class SummaryStatisticsQCStepConfig(StepConfig): """GWAS QC step configuration.""" gwas_path: str = MISSING @@ -614,7 +614,9 @@ def register_config() -> None: group="step", name="window_based_clumping", node=WindowBasedClumpingStepConfig ) cs.store(group="step", name="susie_finemapping", node=FinemapperConfig) - cs.store(group="step", name="summary_statistics_qc", node=GWASQCStep) + cs.store( + group="step", name="summary_statistics_qc", node=SummaryStatisticsQCStepConfig + ) cs.store( group="step", name="locus_breaker_clumping", node=LocusBreakerClumpingConfig ) diff --git a/src/gentropy/method/sumstat_quality_controls.py b/src/gentropy/method/sumstat_quality_controls.py index 1647851de..eadfc58d6 100644 --- a/src/gentropy/method/sumstat_quality_controls.py +++ b/src/gentropy/method/sumstat_quality_controls.py @@ -69,13 +69,11 @@ def _calculate_logpval(z2: float) -> float: @staticmethod def sumstat_qc_pz_check( gwas_for_qc: SummaryStatistics, - limit: int = 10_000_000, ) -> DataFrame: """The PZ check for QC of GWAS summary statstics. It runs linear regression between reported p-values and p-values infered from z-scores. Args: gwas_for_qc (SummaryStatistics): The instance of the SummaryStatistics class. - limit (int): The limit for the number of variants to be used for the estimation. Returns: DataFrame: PySpark DataFrame with the results of the linear regression for each study. @@ -86,17 +84,10 @@ def sumstat_qc_pz_check( SummaryStatisticsQC._calculate_logpval, t.DoubleType() ) - window = Window.partitionBy("studyId").orderBy("studyId") - - gwas_df = ( - gwas_df.withColumn("row_num", row_number().over(window)) - .filter(f.col("row_num") <= limit) - .drop("row_num") - ) - qc_c = ( - gwas_df.withColumn("zscore", f.col("beta") / f.col("standardError")) - .withColumn("new_logpval", calculate_logpval_udf(f.col("zscore") ** 2)) + gwas_df.withColumn("Z2", (f.col("beta") / f.col("standardError")) ** 2) + .filter(f.col("Z2") <= 100) + .withColumn("new_logpval", calculate_logpval_udf(f.col("Z2"))) .withColumn("log_mantissa", log10("pValueMantissa")) .withColumn( "diffpval", @@ -194,24 +185,16 @@ def sumstat_n_eff_check( @staticmethod def gc_lambda_check( gwas_for_qc: SummaryStatistics, - limit: int = 10_000_000, ) -> DataFrame: """The genomic control lambda check for QC of GWAS summary statstics. Args: gwas_for_qc (SummaryStatistics): The instance of the SummaryStatistics class. - limit (int): The limit for the number of variants to be used for the estimation. Returns: DataFrame: PySpark DataFrame with the genomic control lambda for each study. """ gwas_df = gwas_for_qc._df - window = Window.partitionBy("studyId").orderBy("studyId") - gwas_df = ( - gwas_df.withColumn("row_num", row_number().over(window)) - .filter(f.col("row_num") <= limit) - .drop("row_num") - ) qc_c = ( gwas_df.select("studyId", "beta", "standardError") @@ -254,22 +237,20 @@ def number_of_snps( @staticmethod def get_quality_control_metrics( gwas: SummaryStatistics, - limit: int = 100_000_000, - pval_threshold: float = 5e-8, + pval_threshold: float = 1e-8, ) -> DataFrame: """The function calculates the quality control metrics for the summary statistics. Args: gwas (SummaryStatistics): The instance of the SummaryStatistics class. - limit (int): The limit for the number of variants to be used for the estimation. pval_threshold (float): The threshold for the p-value. Returns: DataFrame: PySpark DataFrame with the quality control metrics for the summary statistics. """ qc1 = SummaryStatisticsQC.sumstat_qc_beta_check(gwas_for_qc=gwas) - qc2 = SummaryStatisticsQC.sumstat_qc_pz_check(gwas_for_qc=gwas, limit=limit) - qc4 = SummaryStatisticsQC.gc_lambda_check(gwas_for_qc=gwas, limit=limit) + qc2 = SummaryStatisticsQC.sumstat_qc_pz_check(gwas_for_qc=gwas) + qc4 = SummaryStatisticsQC.gc_lambda_check(gwas_for_qc=gwas) qc5 = SummaryStatisticsQC.number_of_snps( gwas_for_qc=gwas, pval_threshold=pval_threshold ) diff --git a/src/gentropy/sumstat_qc_step.py b/src/gentropy/sumstat_qc_step.py index 333ab19f3..49ddc3c4a 100644 --- a/src/gentropy/sumstat_qc_step.py +++ b/src/gentropy/sumstat_qc_step.py @@ -30,7 +30,7 @@ def __init__( ( SummaryStatisticsQC.get_quality_control_metrics( - gwas=gwas, limit=100_000_000, pval_threshold=pval_threshold + gwas=gwas, pval_threshold=pval_threshold ) .write.mode(session.write_mode) .parquet(output_path) diff --git a/tests/gentropy/method/test_qc_of_sumstats.py b/tests/gentropy/method/test_qc_of_sumstats.py index 8f63e6ba2..cb8d3443e 100644 --- a/tests/gentropy/method/test_qc_of_sumstats.py +++ b/tests/gentropy/method/test_qc_of_sumstats.py @@ -17,7 +17,7 @@ def test_qc_functions( ) -> None: """Test all sumstat qc functions.""" gwas = sample_summary_statistics.sanity_filter() - QC = SummaryStatisticsQC.get_quality_control_metrics(gwas=gwas, limit=100000) + QC = SummaryStatisticsQC.get_quality_control_metrics(gwas=gwas, pval_threshold=5e-8) QC = QC.toPandas() assert QC["n_variants"].iloc[0] == 1663 @@ -55,7 +55,7 @@ def test_several_studyid( ) gwas._df = gwas_df - QC = SummaryStatisticsQC.get_quality_control_metrics(gwas=gwas, limit=100000) + QC = SummaryStatisticsQC.get_quality_control_metrics(gwas=gwas) QC = QC.toPandas() assert QC.shape == (2, 7) From 68c01689491ac2dba8c0accda9bed5aa64f4cc4f Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Fri, 4 Oct 2024 10:40:13 +0200 Subject: [PATCH 079/188] fix(validation): add `qualityControls` column if missing in StudyLocus dataset when perfroming validation (#814) * fix: coalesce qualityControl column in credible set * fix: resolve missing qualityControls column when validating studyLocus * chore: whitespace * chore: use getter to infer qc column name * chore: drop show * chore: drop warnings for step_test(s) --------- Co-authored-by: Szymon Szyszkowski --- .gitignore | 1 + pyproject.toml | 2 +- src/gentropy/common/spark_helpers.py | 29 ++++++++++++++ src/gentropy/dataset/study_locus.py | 15 ++++++- .../finngen/test_finngen_finemapping.py | 40 +++++++++++++++++++ 5 files changed, 84 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index f4c85d797..b62418654 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ site/ .env .coverage* wandb/ +hail*.log diff --git a/pyproject.toml b/pyproject.toml index f61d82116..47829ddcf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -126,7 +126,7 @@ exclude = ["dist"] addopts = "-n auto --doctest-modules --cov=src/ --cov-report=xml" pythonpath = ["."] testpaths = ["tests/gentropy", "src/gentropy"] -marks = ["step_test"] +markers = ["step_test"] # Semi-strict mode for mypy [tool.mypy] diff --git a/src/gentropy/common/spark_helpers.py b/src/gentropy/common/spark_helpers.py index fb13763b0..4e40ac4f1 100644 --- a/src/gentropy/common/spark_helpers.py +++ b/src/gentropy/common/spark_helpers.py @@ -818,3 +818,32 @@ def get_nested_struct_schema(dtype: t.DataType) -> t.StructType: return get_nested_struct_schema(dtype) case _: raise TypeError("The input data type must be a nested struct.") + + +def get_struct_field_schema(schema: t.StructType, name: str) -> t.DataType: + """Get schema for underlying struct field. + + Args: + schema (t.StructType): Provided schema where the name should be looked in. + name (str): Name of the field to look in the schema + + Returns: + t.DataType: Data type of the StructField with provided name + + Raises: + ValueError: If provided name is not present in the input schema + + Examples: + >>> get_struct_field_schema(t.StructType([t.StructField("a", t.StringType())]), "a") + StringType() + + >>> get_struct_field_schema(t.StructType([t.StructField("a", t.StringType())]), "b") # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Provided name b is not present in the schema + + """ + matching_fields = [f for f in schema.fields if f.name == name] + if not matching_fields: + raise ValueError("Provided name %s is not present in the schema.", name) + return matching_fields[0].dataType diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index bf9998458..4d8136c8e 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -14,6 +14,8 @@ from gentropy.common.schemas import parse_spark_schema from gentropy.common.spark_helpers import ( calculate_neglog_pvalue, + create_empty_column_if_not_exists, + get_struct_field_schema, order_array_of_structs_by_field, ) from gentropy.common.utils import get_logsum @@ -271,10 +273,19 @@ def validate_lead_pvalue(self: StudyLocus, pvalue_cutoff: float) -> StudyLocus: Returns: StudyLocus: Updated study locus with quality control flags. """ + df = self.df + qc_colname = StudyLocus.get_QC_column_name() + if qc_colname not in self.df.columns: + df = self.df.withColumn( + qc_colname, + create_empty_column_if_not_exists( + qc_colname, get_struct_field_schema(StudyLocus.get_schema(), qc_colname) + ), + ) return StudyLocus( _df=( - self.df.withColumn( - "qualityControls", + df.withColumn( + qc_colname, # Because this QC might already run on the dataset, the unique set of flags is generated: f.array_distinct( self._qc_subsignificant_associations( diff --git a/tests/gentropy/datasource/finngen/test_finngen_finemapping.py b/tests/gentropy/datasource/finngen/test_finngen_finemapping.py index ed0b68643..1e5d486b7 100644 --- a/tests/gentropy/datasource/finngen/test_finngen_finemapping.py +++ b/tests/gentropy/datasource/finngen/test_finngen_finemapping.py @@ -2,12 +2,16 @@ from __future__ import annotations +from pathlib import Path + import hail as hl import pytest from pyspark.sql import SparkSession +from gentropy.common.session import Session from gentropy.dataset.study_locus import StudyLocus from gentropy.datasource.finngen.finemapping import FinnGenFinemapping +from gentropy.finngen_finemapping_ingestion import FinnGenFinemappingIngestionStep @pytest.mark.parametrize( @@ -43,3 +47,39 @@ def test_finngen_finemapping_from_finngen_susie_finemapping( ), StudyLocus, ) + + +@pytest.mark.parametrize( + [ + "finngen_susie_finemapping_snp_files", + "finngen_susie_finemapping_cs_summary_files", + ], + [ + pytest.param( + "tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz", + "tests/gentropy/data_samples/finngen_credset_summary_sample.tsv", + id="non block compressed files", + ), + ], +) +@pytest.mark.step_test +def test_finngen_finemapping_ingestion_step( + session: Session, + finngen_susie_finemapping_snp_files: str, + finngen_susie_finemapping_cs_summary_files: str, + tmp_path: Path, +) -> None: + """Test finngen finemapping ingestion step.""" + output_path = tmp_path / "output" + FinnGenFinemappingIngestionStep( + session=session, + finngen_finemapping_out=str(output_path), + finngen_susie_finemapping_cs_summary_files=finngen_susie_finemapping_cs_summary_files, + finngen_susie_finemapping_snp_files=finngen_susie_finemapping_snp_files, + finngen_finemapping_lead_pvalue_threshold=1e-5, + ) + assert output_path.is_dir() + assert (output_path / "_SUCCESS").exists() + + cs = StudyLocus.from_parquet(session=session, path=str(output_path)) + assert cs.df.count() == 1 From b325eaad487ae7729db431d7314565b07e2882fa Mon Sep 17 00:00:00 2001 From: Yakov Date: Fri, 4 Oct 2024 13:46:24 +0100 Subject: [PATCH 080/188] fix: fix of type error in schema checking (#817) * fix: fix of type error in schma checking * fix: fix fix fix --- src/gentropy/method/locus_breaker_clumping.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/gentropy/method/locus_breaker_clumping.py b/src/gentropy/method/locus_breaker_clumping.py index fd7661a22..3ab1e605a 100644 --- a/src/gentropy/method/locus_breaker_clumping.py +++ b/src/gentropy/method/locus_breaker_clumping.py @@ -111,9 +111,7 @@ def locus_breaker( f.lit(None) .cast(t.ArrayType(t.StringType())) .alias("qualityControls"), - StudyLocus.assign_study_locus_id( - ["studyId", "variantId"] - ), + StudyLocus.assign_study_locus_id(["studyId", "variantId"]), ) ), _schema=StudyLocus.get_schema(), @@ -135,6 +133,7 @@ def process_locus_breaker_output( Returns: StudyLocus: clumped study loci with large loci broken by window-based clumping. """ + large_loci_size = int(large_loci_size) small_loci = lbc.filter( (f.col("locusEnd") - f.col("locusStart")) <= large_loci_size ) From 99b1d3c09c10bf753d1daf35f6eec1a12a0e576d Mon Sep 17 00:00:00 2001 From: Yakov Date: Fri, 4 Oct 2024 15:04:30 +0100 Subject: [PATCH 081/188] fix: adding studId to FM log (#816) --- src/gentropy/susie_finemapper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index 1379ad89b..38a621f96 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -590,6 +590,8 @@ def susie_finemapper_from_prepared_dataframes( log_df = pd.DataFrame( { + "studyId": studyId, + "region": region, "N_gwas_before_dedupl": N_gwas_before_dedupl, "N_gwas": N_gwas, "N_ld": N_ld, From 8b291687973d1a4c30d82958831f173d74f825e5 Mon Sep 17 00:00:00 2001 From: Tobi Alegbe Date: Mon, 7 Oct 2024 15:53:37 +0100 Subject: [PATCH 082/188] fix: biosample id duplication (#822) * fix(biosample_index): remove biosampleId column if present in validation and tests * fix(biosample index): update tests for biosample index --- src/gentropy/dataset/study_index.py | 8 +++ tests/gentropy/dataset/test_study_index.py | 83 ++++++++++++++++------ 2 files changed, 70 insertions(+), 21 deletions(-) diff --git a/src/gentropy/dataset/study_index.py b/src/gentropy/dataset/study_index.py index 143396894..e7023ee9b 100644 --- a/src/gentropy/dataset/study_index.py +++ b/src/gentropy/dataset/study_index.py @@ -421,6 +421,14 @@ def validate_biosample(self: StudyIndex, biosample_index: BiosampleIndex) -> Stu """ biosample_set = biosample_index.df.select("biosampleId", f.lit(True).alias("isIdFound")) + # If biosampleId in df, we need to drop it: + if "biosampleId" in self.df.columns: + self.df = self.df.drop("biosampleId") + + # As the biosampleFromSourceId is not a mandatory field of study index, we return if the column is not there: + if "biosampleFromSourceId" not in self.df.columns: + return self + validated_df = ( self.df.join(biosample_set, self.df.biosampleFromSourceId == biosample_set.biosampleId, how="left") .withColumn( diff --git a/tests/gentropy/dataset/test_study_index.py b/tests/gentropy/dataset/test_study_index.py index b4e092317..303642d5e 100644 --- a/tests/gentropy/dataset/test_study_index.py +++ b/tests/gentropy/dataset/test_study_index.py @@ -186,7 +186,7 @@ def create_study_index(drop_column: str) -> StudyIndex: self.study_index = create_study_index("") self.study_index_no_gene = create_study_index("geneId") - self.study_index_no_biosample_id = create_study_index("biosampleId") + self.study_index_no_biosample_id = create_study_index("biosampleFromSourceId") self.gene_index = GeneIndex( _df=spark.createDataFrame(self.GENE_DATA, self.GENE_COLUMNS), @@ -207,15 +207,13 @@ def test_biosample_validation_type(self: TestQTLValidation) -> None: validated = self.study_index.validate_biosample(self.biosample_index) assert isinstance(validated, StudyIndex) - @pytest.mark.parametrize("gene_or_biosample", ["gene", "biosample"]) - def test_qtl_validation_correctness( - self: TestQTLValidation, gene_or_biosample: str - ) -> None: + @pytest.mark.parametrize("test", ["gene", "biosample"]) + def test_qtl_validation_correctness(self: TestQTLValidation, test: str) -> None: """Testing if the QTL validation only flags the expected studies.""" - if gene_or_biosample == "gene": + if test == "gene": validated = self.study_index.validate_target(self.gene_index).persist() bad_study = "s2" - if gene_or_biosample == "biosample": + if test == "biosample": validated = self.study_index.validate_biosample( self.biosample_index ).persist() @@ -239,17 +237,35 @@ def test_biosample_validation_correctness(self: TestQTLValidation) -> None: """Testing if the biosample validation only flags the expected studies.""" self.test_qtl_validation_correctness("biosample") - @pytest.mark.parametrize("gene_or_biosample", ["gene", "biosample"]) - def test_qtl_validation_no_relevant_column( - self: TestQTLValidation, gene_or_biosample: str + @pytest.mark.parametrize( + "drop,test", + [ + ("gene", "gene"), + ("gene", "biosample"), + ("biosample", "biosample"), + ("biosample", "gene"), + ], + ) + def test_qtl_validation_drop_relevant_column( + self: TestQTLValidation, drop: str, test: str ) -> None: - """Testing what happens if no relevant column is present.""" - if gene_or_biosample == "gene": - validated = self.study_index_no_gene.validate_target(self.gene_index) - if gene_or_biosample == "biosample": - validated = self.study_index_no_biosample_id.validate_biosample( - self.biosample_index - ) + """Testing what happens if an expected column is not present.""" + if drop == "gene": + if test == "gene": + validated = self.study_index_no_gene.validate_target(self.gene_index) + if test == "biosample": + validated = self.study_index_no_gene.validate_biosample( + self.biosample_index + ) + if drop == "biosample": + if test == "gene": + validated = self.study_index_no_biosample_id.validate_target( + self.gene_index + ) + if test == "biosample": + validated = self.study_index_no_biosample_id.validate_biosample( + self.biosample_index + ) # Asserty type: assert isinstance(validated, StudyIndex) @@ -259,11 +275,36 @@ def test_qtl_validation_no_relevant_column( def test_qtl_validation_no_gene_column(self: TestQTLValidation) -> None: """Testing what happens if no gene column is present.""" - self.test_qtl_validation_no_relevant_column("gene") + self.test_qtl_validation_drop_relevant_column(test="gene", drop="gene") + + def test_qtl_validation_no_biosample_from_source_column( + self: TestQTLValidation, + ) -> None: + """Testing what happens if no biosampleFromSourceId column is present.""" + self.test_qtl_validation_drop_relevant_column( + test="biosample", drop="biosample" + ) - def test_qtl_validation_no_biosample_column(self: TestQTLValidation) -> None: - """Testing what happens if no biosample column is present.""" - self.test_qtl_validation_no_relevant_column("biosample") + def test_qtl_validation_existing_gene_column(self: TestQTLValidation) -> None: + """Testing what happens if no gene column is present.""" + self.test_qtl_validation_drop_relevant_column(test="gene", drop="biosample") + + def test_qtl_validation_existing_biosample_from_source_column( + self: TestQTLValidation, + ) -> None: + """Testing what happens if a biosampleFromSourceId column is present.""" + self.test_qtl_validation_drop_relevant_column(test="biosample", drop="gene") + + def test_qtl_validation_existing_biosample_column(self: TestQTLValidation) -> None: + """Testing what happens if a biosampleId column is present in study index as well as biosampleFromSourceId.""" + # Append a biosample column filled with null to the self.study_index then validate: + validated = StudyIndex( + _df=self.study_index.df.withColumn( + "biosampleId", f.lit(None).cast("string") + ), + _schema=StudyIndex.get_schema(), + ).validate_biosample(self.biosample_index) + assert isinstance(validated, StudyIndex) class TestUniquenessValidation: From 27d82065492e2668a537460917fc127a8cf47a19 Mon Sep 17 00:00:00 2001 From: Vivien Ho <56025826+vivienho@users.noreply.github.com> Date: Tue, 8 Oct 2024 15:18:35 +0100 Subject: [PATCH 083/188] feat: change LD annotation for PICS fine-mapping to use major ancestry (#821) * feat: add functions to get the major population and calculate r2 * refactor: remove functions not needed anymore * refactor: change r2Overall to r2Major * test: add tests for both of the added functions * test: remove tests no longer needed * revert: change r2Major back to r2Overall --- src/gentropy/method/ld.py | 114 ++++++++++--------- tests/gentropy/method/test_ld.py | 185 +++++++++++++++++++++++++++---- 2 files changed, 228 insertions(+), 71 deletions(-) diff --git a/src/gentropy/method/ld.py b/src/gentropy/method/ld.py index 68b78b103..64d47451d 100644 --- a/src/gentropy/method/ld.py +++ b/src/gentropy/method/ld.py @@ -6,6 +6,7 @@ from pyspark.sql import functions as f +from gentropy.common.spark_helpers import order_array_of_structs_by_field from gentropy.dataset.study_locus import StudyLocus, StudyLocusQualityCheck if TYPE_CHECKING: @@ -19,60 +20,64 @@ class LDAnnotator: """Class to annotate linkage disequilibrium (LD) operations from GnomAD.""" @staticmethod - def _calculate_weighted_r_overall(ld_set: Column) -> Column: - """Aggregation of weighted R information using ancestry proportions. + def _get_major_population(ordered_populations: Column) -> Column: + """Get major population based on an ldPopulationStructure array ordered by relativeSampleSize. + + If there is a tie for the major population, nfe is selected if it is one of the major populations. + The first population in the array is selected if there is no tie for the major population, or there is a tie but nfe is not one of the major populations. Args: - ld_set (Column): LD set + ordered_populations (Column): ldPopulationStructure array ordered by relativeSampleSize Returns: - Column: LD set with added 'r2Overall' field + Column: major population """ - return f.transform( - ld_set, - lambda x: f.struct( - x["tagVariantId"].alias("tagVariantId"), - # r2Overall is the accumulated sum of each r2 relative to the population size - f.aggregate( - x["rValues"], - f.lit(0.0), - lambda acc, y: acc - + f.coalesce( - f.pow(y["r"], 2) * y["relativeSampleSize"], f.lit(0.0) - ), # we use coalesce to avoid problems when r/relativeSampleSize is null - ).alias("r2Overall"), - ), + major_population_size = ordered_populations["relativeSampleSize"][0] + major_populations = f.filter( + ordered_populations, + lambda x: x["relativeSampleSize"] == major_population_size + ) + # Check if nfe (Non-Finnish European) is one of the major populations + has_nfe = f.filter( + major_populations, + lambda x: x["ldPopulation"] == "nfe" + ) + return f.when( + (f.size(major_populations) > 1) & (f.size(has_nfe) == 1), + f.lit("nfe") + ).otherwise( + ordered_populations["ldPopulation"][0] ) @staticmethod - def _add_population_size(ld_set: Column, study_populations: Column) -> Column: - """Add population size to each rValues entry in the ldSet. + def _calculate_r2_major(ld_set: Column, major_population: Column) -> Column: + """Calculate R2 using R of the major population in the study. Args: ld_set (Column): LD set - study_populations (Column): Study populations + major_population (Column): Major population of the study Returns: - Column: LD set with added 'relativeSampleSize' field + Column: LD set with added 'r2Overall' field """ - # Create a population to relativeSampleSize map from the struct - populations_map = f.map_from_arrays( - study_populations["ldPopulation"], - study_populations["relativeSampleSize"], - ) - return f.transform( + ld_set_with_major_pop = f.transform( ld_set, lambda x: f.struct( x["tagVariantId"].alias("tagVariantId"), - f.transform( + f.filter( x["rValues"], - lambda y: f.struct( - y["population"].alias("population"), - y["r"].alias("r"), - populations_map[y["population"]].alias("relativeSampleSize"), - ), - ).alias("rValues"), - ), + lambda y: y["population"] == major_population + ).alias("rValues") + ) + ) + return f.transform( + ld_set_with_major_pop, + lambda x: f.struct( + x["tagVariantId"].alias("tagVariantId"), + f.coalesce( + f.pow(x["rValues"]["r"][0], 2), f.lit(0.0) + ).alias("r2Overall") + ) ) @staticmethod @@ -126,10 +131,10 @@ def ld_annotate( """Annotate linkage disequilibrium (LD) information to a set of studyLocus. This function: - 1. Annotates study locus with population structure information from the study index + 1. Annotates study locus with population structure information ordered by relativeSampleSize from the study index 2. Joins the LD index to the StudyLocus - 3. Adds the population size of the study to each rValues entry in the ldSet - 4. Calculates the overall R weighted by the ancestry proportions in every given study. + 3. Gets the major population from the population structure + 4. Calculates R2 by using the R of the major ancestry 5. Flags associations with variants that are not found in the LD reference 6. Rescues lead variant when no LD information is available but lead variant is available @@ -150,9 +155,14 @@ def ld_annotate( associations.df # Drop ldSet column if already available .select(*[col for col in associations.df.columns if col != "ldSet"]) - # Annotate study locus with population structure from study index + # Annotate study locus with population structure ordered by relativeSampleSize from study index .join( - studies.df.select("studyId", "ldPopulationStructure"), + studies.df.select( + "studyId", + order_array_of_structs_by_field( + "ldPopulationStructure", "relativeSampleSize" + ).alias("ldPopulationStructure") + ), on="studyId", how="left", ) @@ -162,25 +172,27 @@ def ld_annotate( on=["variantId", "chromosome"], how="left", ) - # Add population size to each rValues entry in the ldSet if population structure available: + # Get major population from population structure if population structure available .withColumn( - "ldSet", + "majorPopulation", f.when( f.col("ldPopulationStructure").isNotNull(), - cls._add_population_size( - f.col("ldSet"), f.col("ldPopulationStructure") - ), - ), + cls._get_major_population( + f.col("ldPopulationStructure") + ) + ) ) - # Aggregate weighted R information using ancestry proportions + # Calculate R2 using R of the major population .withColumn( "ldSet", f.when( f.col("ldPopulationStructure").isNotNull(), - cls._calculate_weighted_r_overall(f.col("ldSet")), - ), + cls._calculate_r2_major( + f.col("ldSet"), f.col("majorPopulation") + ) + ) ) - .drop("ldPopulationStructure") + .drop("ldPopulationStructure", "majorPopulation") # Filter the LD set by the R2 threshold and set to null if no LD information passes the threshold .withColumn( "ldSet", diff --git a/tests/gentropy/method/test_ld.py b/tests/gentropy/method/test_ld.py index 8fb86ff31..7e7237ac9 100644 --- a/tests/gentropy/method/test_ld.py +++ b/tests/gentropy/method/test_ld.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import pyspark.sql.functions as f import pyspark.sql.types as t @@ -22,32 +22,177 @@ class TestLDAnnotator: """Test LDAnnotatorGnomad.""" - def test__add_population_size( + @pytest.mark.parametrize( + ("observed", "expected"), + [ + # no tie in relativeSampleSize + ( + # observed ldPopulationStructure + [ + Row( + ldPopulationStructure=[ + {"ldPopulation": "pop1", "relativeSampleSize": 0.5}, + {"ldPopulation": "pop2", "relativeSampleSize": 0.3}, + {"ldPopulation": "pop3", "relativeSampleSize": 0.2}, + ], + ) + ], + # expected majorPopulation + "pop1", + ), + # tie in relativeSampleSize, "nfe" is not one of the tied populations + ( + # observed ldPopulationStructure + [ + Row( + ldPopulationStructure=[ + {"ldPopulation": "pop1", "relativeSampleSize": 0.4}, + {"ldPopulation": "pop2", "relativeSampleSize": 0.4}, + {"ldPopulation": "pop3", "relativeSampleSize": 0.2}, + ], + ) + ], + # expected majorPopulation + "pop1", + ), + # tie in relativeSampleSize, "nfe" is one of the tied populations + ( + # observed ldPopulationStructure + [ + Row( + ldPopulationStructure=[ + {"ldPopulation": "pop1", "relativeSampleSize": 0.4}, + {"ldPopulation": "nfe", "relativeSampleSize": 0.4}, + {"ldPopulation": "pop3", "relativeSampleSize": 0.2}, + ], + ) + ], + # expected majorPopulation + "nfe", + ), + ], + ) + def test__get_major_population( self: TestLDAnnotator, + spark: SparkSession, + observed: list[Any], + expected: list[Any], ) -> None: - """Test _add_population_size.""" - result_df = self.observed_df.select( - LDAnnotator._add_population_size( - f.col("ldSet"), f.col("ldPopulationStructure") - ).alias("ldSet") + """Test _get_major_population.""" + schema = t.StructType( + [ + t.StructField( + "ldPopulationStructure", + t.ArrayType( + t.StructType( + [ + t.StructField("ldPopulation", t.StringType(), True), + t.StructField( + "relativeSampleSize", t.DoubleType(), True + ), + ] + ) + ), + True, + ), + ] + ) + observed_df = spark.createDataFrame(observed, schema) + result_df = observed_df.withColumn( + "majorPopulation", + LDAnnotator._get_major_population(f.col("ldPopulationStructure")), ) - expected = [0.8, None] - for i, row in enumerate(result_df.collect()): - assert row["ldSet"][0]["rValues"][i]["relativeSampleSize"] == pytest.approx( - expected[i] - ) + assert result_df.collect()[0]["majorPopulation"] == pytest.approx(expected) - def test__calculate_weighted_r_overall( + @pytest.mark.parametrize( + ("observed", "expected"), + [ + # r available for majorPopulation + ( + # observed ldSet and majorPopulation + [ + Row( + majorPopulation="pop1", + ldSet=[ + { + "tagVariantId": "tag1", + "rValues": [ + {"population": "pop1", "r": 0.5}, + {"population": "pop2", "r": 0.6}, + ], + } + ], + ) + ], + # expected r2Overall + 0.25, + ), + # r not available for majorPopulation + ( + # observed ldSet and majorPopulation + [ + Row( + majorPopulation="pop3", + ldSet=[ + { + "tagVariantId": "tag1", + "rValues": [ + {"population": "pop1", "r": 0.5}, + {"population": "pop2", "r": 0.6}, + ], + } + ], + ) + ], + # expected r2Overall + 0.0, + ), + ], + ) + def test__calculate_r2_major( self: TestLDAnnotator, + spark: SparkSession, + observed: list[Any], + expected: list[Any], ) -> None: - """Test _calculate_weighted_r_overall.""" - result_df = self.observed_df.withColumn( + """Test _calculate_r2_major.""" + schema = t.StructType( + [ + t.StructField("majorPopulation", t.StringType(), True), + t.StructField( + "ldSet", + t.ArrayType( + t.StructType( + [ + t.StructField("tagVariantId", t.StringType(), True), + t.StructField( + "rValues", + t.ArrayType( + t.StructType( + [ + t.StructField( + "population", t.StringType(), True + ), + t.StructField( + "r", t.DoubleType(), True + ), + ] + ) + ), + True, + ), + ] + ) + ), + True, + ), + ] + ) + observed_df = spark.createDataFrame(observed, schema) + result_df = observed_df.withColumn( "ldSet", - LDAnnotator._add_population_size( - f.col("ldSet"), f.col("ldPopulationStructure") - ), - ).withColumn("ldSet", LDAnnotator._calculate_weighted_r_overall(f.col("ldSet"))) - expected = 0.2 + LDAnnotator._calculate_r2_major(f.col("ldSet"), f.col("majorPopulation")), + ) assert result_df.collect()[0]["ldSet"][0]["r2Overall"] == pytest.approx( expected ) From f3490d3e28590e5c771d01bf80859427c632a381 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 8 Oct 2024 15:59:46 +0100 Subject: [PATCH 084/188] build(deps-dev): bump ipython from 8.27.0 to 8.28.0 (#819) Bumps [ipython](https://github.com/ipython/ipython) from 8.27.0 to 8.28.0. - [Release notes](https://github.com/ipython/ipython/releases) - [Commits](https://github.com/ipython/ipython/compare/8.27.0...8.28.0) --- updated-dependencies: - dependency-name: ipython dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- poetry.lock | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index bf854b313..d70e16e9f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1899,13 +1899,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio [[package]] name = "ipython" -version = "8.27.0" +version = "8.28.0" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.10" files = [ - {file = "ipython-8.27.0-py3-none-any.whl", hash = "sha256:f68b3cb8bde357a5d7adc9598d57e22a45dfbea19eb6b98286fa3b288c9cd55c"}, - {file = "ipython-8.27.0.tar.gz", hash = "sha256:0b99a2dc9f15fd68692e898e5568725c6d49c527d36a9fb5960ffbdeaa82ff7e"}, + {file = "ipython-8.28.0-py3-none-any.whl", hash = "sha256:530ef1e7bb693724d3cdc37287c80b07ad9b25986c007a53aa1857272dac3f35"}, + {file = "ipython-8.28.0.tar.gz", hash = "sha256:0d0d15ca1e01faeb868ef56bc7ee5a0de5bd66885735682e8a322ae289a13d1a"}, ] [package.dependencies] @@ -3952,6 +3952,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, From 60f6bfa25036963790d3fe64bfaed50dc9a86a38 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 9 Oct 2024 11:17:13 +0100 Subject: [PATCH 085/188] build(deps-dev): bump pre-commit from 3.8.0 to 4.0.0 (#820) Bumps [pre-commit](https://github.com/pre-commit/pre-commit) from 3.8.0 to 4.0.0. - [Release notes](https://github.com/pre-commit/pre-commit/releases) - [Changelog](https://github.com/pre-commit/pre-commit/blob/main/CHANGELOG.md) - [Commits](https://github.com/pre-commit/pre-commit/compare/v3.8.0...v4.0.0) --- updated-dependencies: - dependency-name: pre-commit dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index d70e16e9f..9cdc7b88e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3321,13 +3321,13 @@ tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "p [[package]] name = "pre-commit" -version = "3.8.0" +version = "4.0.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." optional = false python-versions = ">=3.9" files = [ - {file = "pre_commit-3.8.0-py2.py3-none-any.whl", hash = "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f"}, - {file = "pre_commit-3.8.0.tar.gz", hash = "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af"}, + {file = "pre_commit-4.0.0-py2.py3-none-any.whl", hash = "sha256:0ca2341cf94ac1865350970951e54b1a50521e57b7b500403307aed4315a1234"}, + {file = "pre_commit-4.0.0.tar.gz", hash = "sha256:5d9807162cc5537940f94f266cbe2d716a75cfad0d78a317a92cac16287cfed6"}, ] [package.dependencies] @@ -5219,4 +5219,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "50a797b217805183c5967246c1ca4b339037ce60e44a6c43b5c3fc6a9fb2832a" +content-hash = "e786d680aaa9f4a57bfc91fc2a18002199156ccc840c52e372171026506cdf04" diff --git a/pyproject.toml b/pyproject.toml index 47829ddcf..6f0e2e919 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ skops = ">=0.9,<0.11" google-cloud-secret-manager = "^2.20.0" [tool.poetry.dev-dependencies] -pre-commit = "^3.8.0" +pre-commit = "^4.0.0" mypy = "^1.11" pep8-naming = "^0.14.1" interrogate = "^1.7.0" From b7dce8f48cf4780ec15eb00083988e7bb3e66516 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Wed, 9 Oct 2024 15:33:29 +0100 Subject: [PATCH 086/188] feat: decouple feature generation from L2G training step (#823) * fix: join between gold standard and credible set based on studyId and variantId * fix: minor bugs to generate the feature matrix * fix(colocalisation): safeguard existing rightStudyType when applying `append_study_metadata` * feat: add feature generation step (working interactively) * feat(l2g): remove feature generation from `LocusToGeneStep` * fix: correct feature names * feat: filter gwas credible sets in `L2GPrediction.from_credible_set` * chore: update docs * chore: pass credible set to `L2GGoldStandard.build_feature_matrix` in test * chore: uncomment code --- docs/python_api/steps/l2g.md | 4 +- src/gentropy/config.py | 60 +++++- src/gentropy/dataset/colocalisation.py | 14 +- src/gentropy/dataset/l2g_features/distance.py | 4 +- src/gentropy/dataset/l2g_gold_standard.py | 15 +- src/gentropy/dataset/l2g_prediction.py | 17 +- src/gentropy/l2g.py | 196 +++++++++--------- .../open_targets/test_l2g_gold_standard.py | 3 +- 8 files changed, 195 insertions(+), 118 deletions(-) diff --git a/docs/python_api/steps/l2g.md b/docs/python_api/steps/l2g.md index 847569e36..556e5a275 100644 --- a/docs/python_api/steps/l2g.md +++ b/docs/python_api/steps/l2g.md @@ -1,5 +1,7 @@ --- -title: locus_to_gene +title: Locus to Gene (L2G) --- +::: gentropy.l2g.LocusToGeneFeatureMatrixStep + ::: gentropy.l2g.LocusToGeneStep diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 3ed439333..798a6b7bf 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -236,9 +236,6 @@ class LocusToGeneConfig(StepConfig): predictions_path: str = MISSING credible_set_path: str = MISSING variant_index_path: str = MISSING - colocalisation_path: str = MISSING - study_index_path: str = MISSING - gene_index_path: str = MISSING model_path: str | None = None feature_matrix_path: str | None = None gold_standard_curation_path: str | None = None @@ -282,10 +279,60 @@ class LocusToGeneConfig(StepConfig): wandb_run_name: str | None = None hf_hub_repo_id: str | None = "opentargets/locus_to_gene" download_from_hub: bool = True - write_feature_matrix: bool = True _target_: str = "gentropy.l2g.LocusToGeneStep" +@dataclass +class LocusToGeneFeatureMatrixConfig(StepConfig): + """Locus to gene feature matrix step configuration.""" + + session: Any = field( + default_factory=lambda: { + "extended_spark_conf": { + "spark.driver.memory": "48g", + "spark.executor.memory": "48g", + "spark.sql.shuffle.partitions": "800", + } + } + ) + credible_set_path: str = MISSING + variant_index_path: str | None = None + colocalisation_path: str | None = None + study_index_path: str | None = None + gene_index_path: str | None = None + feature_matrix_path: str = MISSING + features_list: list[str] = field( + default_factory=lambda: [ + # max CLPP for each (study, locus, gene) aggregating over a specific qtl type + "eQtlColocClppMaximum", + "pQtlColocClppMaximum", + "sQtlColocClppMaximum", + "tuQtlColocClppMaximum", + # max H4 for each (study, locus, gene) aggregating over a specific qtl type + "eQtlColocH4Maximum", + "pQtlColocH4Maximum", + "sQtlColocH4Maximum", + "tuQtlColocH4Maximum", + # distance to gene footprint + "distanceSentinelFootprint", + "distanceSentinelFootprintNeighbourhood", + "distanceFootprintMean", + "distanceFootprintMeanNeighbourhood", + # distance to gene tss + "distanceTssMean", + "distanceTssMeanNeighbourhood", + "distanceSentinelTss", + "distanceSentinelTssNeighbourhood", + # vep + "vepMaximum", + "vepMaximumNeighbourhood", + "vepMean", + "vepMeanNeighbourhood", + ] + ) + _target_: str = "gentropy.l2g.LocusToGeneFeatureMatrixStep" + + @dataclass class PICSConfig(StepConfig): """PICS step configuration.""" @@ -597,6 +644,11 @@ def register_config() -> None: cs.store(group="step", name="ld_based_clumping", node=LDBasedClumpingConfig) cs.store(group="step", name="ld_index", node=LDIndexConfig) cs.store(group="step", name="locus_to_gene", node=LocusToGeneConfig) + cs.store( + group="step", + name="locus_to_gene_feature_matrix", + node=LocusToGeneFeatureMatrixConfig, + ) cs.store(group="step", name="finngen_studies", node=FinngenStudiesConfig) cs.store( diff --git a/src/gentropy/dataset/colocalisation.py b/src/gentropy/dataset/colocalisation.py index c85209462..c9083f67a 100644 --- a/src/gentropy/dataset/colocalisation.py +++ b/src/gentropy/dataset/colocalisation.py @@ -60,7 +60,9 @@ def extract_maximum_coloc_probability_per_region_and_gene( """ from gentropy.colocalisation import ColocalisationStep - valid_qtls = list(EqtlCatalogueStudyIndex.method_to_study_type_mapping.values()) + valid_qtls = list( + set(EqtlCatalogueStudyIndex.method_to_study_type_mapping.values()) + ) if filter_by_qtl and filter_by_qtl not in valid_qtls: raise ValueError(f"There are no studies with QTL type {filter_by_qtl}") @@ -91,7 +93,7 @@ def extract_maximum_coloc_probability_per_region_and_gene( self.append_study_metadata( study_locus, study_index, - metadata_cols=["geneId"], + metadata_cols=["geneId", "studyType"], colocalisation_side="right", ) # it also filters based on method and qtl type @@ -147,6 +149,12 @@ def append_study_metadata( ) .distinct() ) + coloc_df = ( + # drop `rightStudyType` in case it is requested + self.df.drop("rightStudyType") + if "studyType" in metadata_cols and colocalisation_side == "right" + else self.df + ) return ( # Append that to the respective side of the colocalisation dataset study_loci_w_metadata.selectExpr( @@ -155,5 +163,5 @@ def append_study_metadata( f"{col} as {colocalisation_side}{col[0].upper() + col[1:]}" for col in metadata_cols ], - ).join(self.df, f"{colocalisation_side}StudyLocusId", "right") + ).join(coloc_df, f"{colocalisation_side}StudyLocusId", "right") ) diff --git a/src/gentropy/dataset/l2g_features/distance.py b/src/gentropy/dataset/l2g_features/distance.py index ea030108c..8d42d30ed 100644 --- a/src/gentropy/dataset/l2g_features/distance.py +++ b/src/gentropy/dataset/l2g_features/distance.py @@ -349,7 +349,7 @@ class DistanceSentinelFootprintFeature(L2GFeature): fill_na_value = 500_000 feature_dependency_type = VariantIndex - feature_name = "distanceSentinelFootprintMinimum" + feature_name = "distanceSentinelFootprint" @classmethod def compute( @@ -388,7 +388,7 @@ class DistanceSentinelFootprintNeighbourhoodFeature(L2GFeature): fill_na_value = 500_000 feature_dependency_type = VariantIndex - feature_name = "DistanceSentinelFootprintNeighbourhoodFeature" + feature_name = "distanceSentinelFootprintNeighbourhood" @classmethod def compute( diff --git a/src/gentropy/dataset/l2g_gold_standard.py b/src/gentropy/dataset/l2g_gold_standard.py index 064f6cc0e..e1083fbf0 100644 --- a/src/gentropy/dataset/l2g_gold_standard.py +++ b/src/gentropy/dataset/l2g_gold_standard.py @@ -11,6 +11,7 @@ from gentropy.common.schemas import parse_spark_schema from gentropy.common.spark_helpers import get_record_with_maximum_value from gentropy.dataset.dataset import Dataset +from gentropy.dataset.study_locus import StudyLocus if TYPE_CHECKING: from pyspark.sql import DataFrame @@ -107,11 +108,13 @@ def process_gene_interactions( def build_feature_matrix( self: L2GGoldStandard, full_feature_matrix: L2GFeatureMatrix, + credible_set: StudyLocus, ) -> L2GFeatureMatrix: """Return a feature matrix for study loci in the gold standard. Args: full_feature_matrix (L2GFeatureMatrix): Feature matrix for all study loci to join on + credible_set (StudyLocus): Full credible sets to annotate the feature matrix with variant and study IDs and perform the join Returns: L2GFeatureMatrix: Feature matrix for study loci in the gold standard @@ -120,10 +123,16 @@ def build_feature_matrix( return L2GFeatureMatrix( _df=full_feature_matrix._df.join( - f.broadcast(self.df.drop("variantId", "studyId", "sources")), - on=["studyLocusId", "geneId"], + credible_set.df.select("studyLocusId", "variantId", "studyId"), + "studyLocusId", + "left", + ) + .join( + f.broadcast(self.df.drop("studyLocusId", "sources")), + on=["studyId", "variantId", "geneId"], how="inner", - ), + ) + .distinct(), with_gold_standard=True, ) diff --git a/src/gentropy/dataset/l2g_prediction.py b/src/gentropy/dataset/l2g_prediction.py index 97e58f526..c29b359af 100644 --- a/src/gentropy/dataset/l2g_prediction.py +++ b/src/gentropy/dataset/l2g_prediction.py @@ -5,12 +5,13 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Type +import pyspark.sql.functions as f + from gentropy.common.schemas import parse_spark_schema from gentropy.common.session import Session from gentropy.dataset.dataset import Dataset from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix from gentropy.dataset.study_locus import StudyLocus -from gentropy.method.l2g.feature_factory import L2GFeatureInputLoader from gentropy.method.l2g.model import LocusToGeneModel if TYPE_CHECKING: @@ -40,8 +41,8 @@ def from_credible_set( cls: Type[L2GPrediction], session: Session, credible_set: StudyLocus, + feature_matrix: L2GFeatureMatrix, features_list: list[str], - features_input_loader: L2GFeatureInputLoader, model_path: str | None, hf_token: str | None = None, download_from_hub: bool = True, @@ -51,8 +52,8 @@ def from_credible_set( Args: session (Session): Session object that contains the Spark session credible_set (StudyLocus): Dataset containing credible sets from GWAS only + feature_matrix (L2GFeatureMatrix): Dataset containing all credible sets and their annotations features_list (list[str]): List of features to use for the model - features_input_loader (L2GFeatureInputLoader): Loader with all feature dependencies model_path (str | None): Path to the model file. It can be either in the filesystem or the name on the Hugging Face Hub (in the form of username/repo_name). hf_token (str | None): Hugging Face token to download the model from the Hub. Only required if the model is private. download_from_hub (bool): Whether to download the model from the Hugging Face Hub. Defaults to True. @@ -70,10 +71,12 @@ def from_credible_set( # Prepare data fm = ( - L2GFeatureMatrix.from_features_list( - study_loci_to_annotate=credible_set, - features_list=features_list, - features_input_loader=features_input_loader, + L2GFeatureMatrix( + _df=( + credible_set.df.filter(f.col("studyType") == "gwas") + .select("studyLocusId") + .join(feature_matrix._df, "studyLocusId") + ) ) .fill_na() .select_features(features_list) diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 7962da484..296aba3d2 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -24,29 +24,89 @@ from gentropy.method.l2g.trainer import LocusToGeneTrainer +class LocusToGeneFeatureMatrixStep: + """Annotate credible set with functional genomics features.""" + + def __init__( + self, + session: Session, + *, + features_list: list[str] = LocusToGeneConfig().features_list, + credible_set_path: str, + variant_index_path: str | None = None, + colocalisation_path: str | None = None, + study_index_path: str | None = None, + gene_index_path: str | None = None, + feature_matrix_path: str, + ) -> None: + """Initialise the step and run the logic based on mode. + + Args: + session (Session): Session object that contains the Spark session + features_list (list[str]): List of features to use for the model + credible_set_path (str): Path to the credible set dataset necessary to build the feature matrix + variant_index_path (str | None): Path to the variant index dataset + colocalisation_path (str | None): Path to the colocalisation dataset + study_index_path (str | None): Path to the study index dataset + gene_index_path (str | None): Path to the gene index dataset + feature_matrix_path (str): Path to the L2G feature matrix output dataset + """ + credible_set = StudyLocus.from_parquet( + session, credible_set_path, recursiveFileLookup=True + ) + studies = ( + StudyIndex.from_parquet(session, study_index_path, recursiveFileLookup=True) + if study_index_path + else None + ) + variant_index = ( + VariantIndex.from_parquet(session, variant_index_path) + if variant_index_path + else None + ) + coloc = ( + Colocalisation.from_parquet( + session, colocalisation_path, recursiveFileLookup=True + ) + if colocalisation_path + else None + ) + gene_index = ( + GeneIndex.from_parquet(session, gene_index_path, recursiveFileLookup=True) + if gene_index_path + else None + ) + features_input_loader = L2GFeatureInputLoader( + variant_index=variant_index, + colocalisation=coloc, + study_index=studies, + study_locus=credible_set, + gene_index=gene_index, + ) + + fm = credible_set.build_feature_matrix(features_list, features_input_loader) + fm._df.write.mode(session.write_mode).parquet(feature_matrix_path) + + class LocusToGeneStep: """Locus to gene step.""" def __init__( self, session: Session, - hyperparameters: dict[str, Any], + hyperparameters: dict[str, Any] = LocusToGeneConfig().hyperparameters, *, run_mode: str, - features_list: list[str], - download_from_hub: bool, + features_list: list[str] = LocusToGeneConfig().features_list, + download_from_hub: bool = LocusToGeneConfig().download_from_hub, wandb_run_name: str, model_path: str | None = None, credible_set_path: str, + feature_matrix_path: str, gold_standard_curation_path: str | None = None, variant_index_path: str | None = None, - colocalisation_path: str | None = None, - study_index_path: str | None = None, - gene_index_path: str | None = None, gene_interactions_path: str | None = None, predictions_path: str | None = None, - feature_matrix_path: str | None = None, - write_feature_matrix: bool, hf_hub_repo_id: str | None = LocusToGeneConfig().hf_hub_repo_id, ) -> None: """Initialise the step and run the logic based on mode. @@ -60,15 +120,11 @@ def __init__( wandb_run_name (str): Name of the run to track model training in Weights and Biases model_path (str | None): Path to the model. It can be either in the filesystem or the name on the Hugging Face Hub (in the form of username/repo_name). credible_set_path (str): Path to the credible set dataset necessary to build the feature matrix + feature_matrix_path (str): Path to the L2G feature matrix input dataset gold_standard_curation_path (str | None): Path to the gold standard curation file - variant_index_path (str | None): Path to the variant index dataset - colocalisation_path (str | None): Path to the colocalisation dataset - study_index_path (str | None): Path to the study index dataset - gene_index_path (str | None): Path to the gene index dataset + variant_index_path (str | None): Path to the variant index gene_interactions_path (str | None): Path to the gene interactions dataset predictions_path (str | None): Path to the L2G predictions output dataset - feature_matrix_path (str | None): Path to the L2G feature matrix output dataset - write_feature_matrix (bool): Whether to write the full feature matrix to the filesystem hf_hub_repo_id (str | None): Hugging Face Hub repository ID. If provided, the model will be uploaded to Hugging Face. Raises: @@ -85,7 +141,6 @@ def __init__( self.predictions_path = predictions_path self.features_list = list(features_list) self.hyperparameters = dict(hyperparameters) - self.feature_matrix_path = feature_matrix_path self.wandb_run_name = wandb_run_name self.hf_hub_repo_id = hf_hub_repo_id self.download_from_hub = download_from_hub @@ -93,36 +148,15 @@ def __init__( # Load common inputs self.credible_set = StudyLocus.from_parquet( session, credible_set_path, recursiveFileLookup=True - ).filter(f.col("studyType") == "gwas") - self.studies = ( - StudyIndex.from_parquet(session, study_index_path, recursiveFileLookup=True) - if study_index_path - else None + ) + self.feature_matrix = L2GFeatureMatrix( + _df=session.load_data(feature_matrix_path), features_list=self.features_list ) self.variant_index = ( VariantIndex.from_parquet(session, variant_index_path) if variant_index_path else None ) - self.coloc = ( - Colocalisation.from_parquet( - session, colocalisation_path, recursiveFileLookup=True - ) - if colocalisation_path - else None - ) - self.gene_index = ( - GeneIndex.from_parquet(session, gene_index_path, recursiveFileLookup=True) - if gene_index_path - else None - ) - self.features_input_loader = L2GFeatureInputLoader( - variant_index=self.variant_index, - coloc=self.coloc, - studies=self.studies, - study_locus=self.credible_set, - gene_index=self.gene_index, - ) if run_mode == "predict": self.run_predict() @@ -140,28 +174,21 @@ def __init__( self.run_train() def run_predict(self) -> None: - """Run the prediction step. - - Raises: - ValueError: If not all dependencies in prediction mode are set - """ - if self.studies and self.coloc: - predictions = L2GPrediction.from_credible_set( - self.session, - self.credible_set, - self.features_list, - self.features_input_loader, - model_path=self.model_path, - hf_token=access_gcp_secret("hfhub-key", "open-targets-genetics-dev"), - download_from_hub=self.download_from_hub, + """Run the prediction step.""" + predictions = L2GPrediction.from_credible_set( + self.session, + self.credible_set, + self.feature_matrix, + self.features_list, + model_path=self.model_path, + hf_token=access_gcp_secret("hfhub-key", "open-targets-genetics-dev"), + download_from_hub=self.download_from_hub, + ) + if self.predictions_path: + predictions.df.write.mode(self.session.write_mode).parquet( + self.predictions_path ) - if self.predictions_path: - predictions.df.write.mode(self.session.write_mode).parquet( - self.predictions_path - ) - self.session.logger.info(self.predictions_path) - else: - raise ValueError("Dependencies for predict mode not set.") + self.session.logger.info(self.predictions_path) def run_train(self) -> None: """Run the training step.""" @@ -170,11 +197,8 @@ def run_train(self) -> None: and self.interactions and self.wandb_run_name and self.model_path - and self.variant_index ): wandb_key = access_gcp_secret("wandb-key", "open-targets-genetics-dev") - # Process gold standard and L2G features - data = self._generate_feature_matrix(write_feature_matrix=True) # Instantiate classifier and train model l2g_model = LocusToGeneModel( @@ -183,7 +207,8 @@ def run_train(self) -> None: ) wandb_login(key=wandb_key) trained_model = LocusToGeneTrainer( - model=l2g_model, feature_matrix=data + model=l2g_model, + feature_matrix=self._annotate_gold_standards_w_feature_matrix(), ).train(self.wandb_run_name) if trained_model.training_data and trained_model.model and self.model_path: trained_model.save(self.model_path) @@ -202,46 +227,31 @@ def run_train(self) -> None: commit_message="chore: update model", ) - def _generate_feature_matrix(self, write_feature_matrix: bool) -> L2GFeatureMatrix: + def _annotate_gold_standards_w_feature_matrix(self) -> L2GFeatureMatrix: """Generate the feature matrix of annotated gold standards. - Args: - write_feature_matrix (bool): Whether to write the feature matrix for all credible sets to disk - Returns: L2GFeatureMatrix: Feature matrix with gold standards annotated with features. Raises: - ValueError: If write_feature_matrix is set to True but a path is not provided or if dependencies to build features are not set. + ValueError: Not all training dependencies are defined """ - if ( - self.gs_curation - and self.interactions - and self.studies - and self.variant_index - ): + if self.gs_curation and self.interactions and self.variant_index: study_locus_overlap = StudyLocus( _df=self.credible_set.df.join( f.broadcast( - self.gs_curation.withColumn( - "variantId", + self.gs_curation.select( f.concat_ws( "_", f.col("sentinel_variant.locus_GRCh38.chromosome"), f.col("sentinel_variant.locus_GRCh38.position"), f.col("sentinel_variant.alleles.reference"), f.col("sentinel_variant.alleles.alternative"), - ), - ).select( - StudyLocus.assign_study_locus_id( - [ - "association_info.otg_id", # studyId - "variantId", - ] - ), + ).alias("variantId"), + f.col("association_info.otg_id").alias("studyId"), ) ), - "studyLocusId", + ["studyId", "variantId"], "inner", ), _schema=StudyLocus.get_schema(), @@ -254,18 +264,10 @@ def _generate_feature_matrix(self, write_feature_matrix: bool) -> L2GFeatureMatr interactions=self.interactions, ) - fm = self.credible_set.build_feature_matrix( - self.features_list, self.features_input_loader - ) - if write_feature_matrix: - if not self.feature_matrix_path: - raise ValueError("feature_matrix_path must be set.") - fm._df.write.mode(self.session.write_mode).parquet( - self.feature_matrix_path - ) - return ( - gold_standards.build_feature_matrix(fm) + gold_standards.build_feature_matrix( + self.feature_matrix, self.credible_set + ) .fill_na() .select_features(self.features_list) ) diff --git a/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py index aa36359ca..e6afc942f 100644 --- a/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py +++ b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py @@ -171,5 +171,6 @@ def test_build_feature_matrix( ) fm = mock_study_locus.build_feature_matrix(features_list, loader) assert isinstance( - mock_l2g_gold_standard.build_feature_matrix(fm), L2GFeatureMatrix + mock_l2g_gold_standard.build_feature_matrix(fm, mock_study_locus), + L2GFeatureMatrix, ), "Feature matrix should be of type L2GFeatureMatrix" From 9f0111cb3dd0cde70831cb658415e1bbe20ff28e Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Wed, 9 Oct 2024 16:20:01 +0100 Subject: [PATCH 087/188] fix: mhc flag incorrect (#825) --- src/gentropy/dataset/study_locus.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 4d8136c8e..f03b98029 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -279,7 +279,8 @@ def validate_lead_pvalue(self: StudyLocus, pvalue_cutoff: float) -> StudyLocus: df = self.df.withColumn( qc_colname, create_empty_column_if_not_exists( - qc_colname, get_struct_field_schema(StudyLocus.get_schema(), qc_colname) + qc_colname, + get_struct_field_schema(StudyLocus.get_schema(), qc_colname), ), ) return StudyLocus( @@ -940,7 +941,7 @@ def qc_MHC_region(self: StudyLocus) -> StudyLocus: "qualityControls", self.update_quality_flag( f.col("qualityControls"), - ~( + ( (f.col("chromosome") == region.chromosome) & ( (f.col("position") <= region.end) From 31e217b7b4702fec0f91ce68cbe011c5efa78b07 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Thu, 10 Oct 2024 13:28:35 +0100 Subject: [PATCH 088/188] revert: no longer rescuing variants not in LD matrix when overlapping with SuSiE (#827) * revert: no longer rescueing variants not in LD matrix * test: update test to cover new expectation --- src/gentropy/dataset/study_locus.py | 7 +------ tests/gentropy/dataset/test_study_locus.py | 4 ++-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index f03b98029..e9be88a9d 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -1047,12 +1047,7 @@ def qc_explained_by_SuSiE(self: StudyLocus) -> StudyLocus: # credible set in SuSiE overlapping region f.col("inSuSiE") # credible set not based on SuSiE - & (f.col("finemappingMethod") != "SuSiE-inf") - # credible set not already flagged as unresolved LD - & ~f.array_contains( - f.col("qualityControls"), - StudyLocusQualityCheck.UNRESOLVED_LD.value, - ), + & (f.col("finemappingMethod") != "SuSiE-inf"), StudyLocusQualityCheck.EXPLAINED_BY_SUSIE, ), ) diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index 3f6cfcb59..94da005b9 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -985,7 +985,7 @@ class TestStudyLocusSuSiERedundancyFlagging: [{"variantId": "X_3_A_A"}, {"variantId": "X_5_A_A"}], [], ), - # NOT to be flagged (Unresolved LD) + # To be flagged (Unresolved LD flag on it) ( "5", "v5", @@ -1080,4 +1080,4 @@ def test_qc_explained_by_SuSiE_correctness( ) ) .count() - ) == 2 + ) == 3 From 58333c0296f2f923410e965124ea76ef95a4592e Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Thu, 10 Oct 2024 13:58:06 +0100 Subject: [PATCH 089/188] fix: empty inSilicoPredictors object in GnomAD variant index (#807) * fix: empty inSilicoPredictors object in variant index * fix: object filtering expression * fix(hail): limit removed --- src/gentropy/datasource/gnomad/variants.py | 26 ++++++++++++++-------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/gentropy/datasource/gnomad/variants.py b/src/gentropy/datasource/gnomad/variants.py index fc68a3154..0575261c2 100644 --- a/src/gentropy/datasource/gnomad/variants.py +++ b/src/gentropy/datasource/gnomad/variants.py @@ -131,16 +131,24 @@ def as_variant_index(self: GnomADVariants) -> VariantIndex: .drop("locus", "alleles") .select_globals() .to_spark(flatten=False) - .withColumn( - "variantId", - VariantIndex.hash_long_variant_ids( - f.col("variantId"), - f.col("chromosome"), - f.col("position"), - self.lenght_threshold, - ), + .withColumns( + { + # Once The parsing is done, we have to drop objects with no score from inSilicoPredictors: + "inSilicoPredictors": f.filter( + f.col("inSilicoPredictors"), + lambda predictor: predictor["score"].isNotNull(), + ), + # Generate a variantId that is hashed for long variant ids: + "variantId": VariantIndex.hash_long_variant_ids( + f.col("variantId"), + f.col("chromosome"), + f.col("position"), + self.lenght_threshold, + ), + # We are not capturing the most severe consequence from GnomAD, but this column needed for the schema: + "mostSevereConsequenceId": f.lit(None).cast(t.StringType()), + } ) - .withColumn("mostSevereConsequenceId", f.lit(None).cast(t.StringType())) ), _schema=VariantIndex.get_schema(), ) From c7c602a9a784c5b19b413da0ea32261a6fbd0804 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Fri, 11 Oct 2024 07:33:16 +0100 Subject: [PATCH 090/188] fix: write mode added to validation steps (#826) --- src/gentropy/study_locus_validation.py | 12 ++++++------ src/gentropy/study_validation.py | 16 +++++++++------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index 486b31ca5..0be046a67 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -51,10 +51,10 @@ def __init__( .assign_confidence() ).persist() # we will need this for 2 types of outputs - study_locus_with_qc.valid_rows( - invalid_qc_reasons, invalid=True - ).df.write.parquet(invalid_study_locus_path) + study_locus_with_qc.valid_rows(invalid_qc_reasons, invalid=True).df.write.mode( + session.write_mode + ).parquet(invalid_study_locus_path) - study_locus_with_qc.valid_rows(invalid_qc_reasons).df.write.parquet( - valid_study_locus_path - ) + study_locus_with_qc.valid_rows(invalid_qc_reasons).df.write.mode( + session.write_mode + ).parquet(valid_study_locus_path) diff --git a/src/gentropy/study_validation.py b/src/gentropy/study_validation.py index 573298757..a9bebe25e 100644 --- a/src/gentropy/study_validation.py +++ b/src/gentropy/study_validation.py @@ -66,13 +66,15 @@ def __init__( .validate_study_type() # Flagging non-supported study types. .validate_target(target_index) # Flagging QTL studies with invalid targets .validate_disease(disease_index) # Flagging invalid EFOs - .validate_biosample(biosample_index) # Flagging QTL studies with invalid biosamples + .validate_biosample( + biosample_index + ) # Flagging QTL studies with invalid biosamples ).persist() # we will need this for 2 types of outputs - study_index_with_qc.valid_rows( - invalid_qc_reasons, invalid=True - ).df.write.parquet(invalid_study_index_path) + study_index_with_qc.valid_rows(invalid_qc_reasons, invalid=True).df.write.mode( + session.write_mode + ).parquet(invalid_study_index_path) - study_index_with_qc.valid_rows(invalid_qc_reasons).df.write.parquet( - valid_study_index_path - ) + study_index_with_qc.valid_rows(invalid_qc_reasons).df.write.mode( + session.write_mode + ).parquet(valid_study_index_path) From e3d32ba1e01db3ad8a45ba40027f836703ceeff3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Fri, 11 Oct 2024 09:52:46 +0100 Subject: [PATCH 091/188] feat(l2g): merge sQTL and tuQTL colocalisation features (#824) * feat: merge tuQTL colocalisation results into sQTL features * fix: add colocalisation neighbourhood features in the l2g default features list * fix: minor bug --- .../datasets/l2g_features/colocalisation.md | 4 - src/gentropy/config.py | 10 +- src/gentropy/dataset/colocalisation.py | 21 +- .../dataset/l2g_features/colocalisation.py | 196 ++---------------- src/gentropy/method/l2g/feature_factory.py | 8 - tests/gentropy/dataset/test_l2g_feature.py | 8 - 6 files changed, 35 insertions(+), 212 deletions(-) diff --git a/docs/python_api/datasets/l2g_features/colocalisation.md b/docs/python_api/datasets/l2g_features/colocalisation.md index a3928c4ab..c38c690d7 100644 --- a/docs/python_api/datasets/l2g_features/colocalisation.md +++ b/docs/python_api/datasets/l2g_features/colocalisation.md @@ -7,19 +7,15 @@ title: From colocalisation ::: gentropy.dataset.l2g_features.colocalisation.EQtlColocClppMaximumFeature ::: gentropy.dataset.l2g_features.colocalisation.PQtlColocClppMaximumFeature ::: gentropy.dataset.l2g_features.colocalisation.SQtlColocClppMaximumFeature -::: gentropy.dataset.l2g_features.colocalisation.TuQtlColocClppMaximumFeature ::: gentropy.dataset.l2g_features.colocalisation.EQtlColocH4MaximumFeature ::: gentropy.dataset.l2g_features.colocalisation.PQtlColocH4MaximumFeature ::: gentropy.dataset.l2g_features.colocalisation.SQtlColocH4MaximumFeature -::: gentropy.dataset.l2g_features.colocalisation.TuQtlColocH4MaximumFeature ::: gentropy.dataset.l2g_features.colocalisation.EQtlColocClppMaximumNeighbourhoodFeature ::: gentropy.dataset.l2g_features.colocalisation.PQtlColocClppMaximumNeighbourhoodFeature ::: gentropy.dataset.l2g_features.colocalisation.SQtlColocClppMaximumNeighbourhoodFeature -::: gentropy.dataset.l2g_features.colocalisation.TuQtlColocClppMaximumNeighbourhoodFeature ::: gentropy.dataset.l2g_features.colocalisation.EQtlColocH4MaximumNeighbourhoodFeature ::: gentropy.dataset.l2g_features.colocalisation.PQtlColocH4MaximumNeighbourhoodFeature ::: gentropy.dataset.l2g_features.colocalisation.SQtlColocH4MaximumNeighbourhoodFeature -::: gentropy.dataset.l2g_features.colocalisation.TuQtlColocH4MaximumNeighbourhoodFeature ## Common logic diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 798a6b7bf..50eb4af72 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -246,12 +246,18 @@ class LocusToGeneConfig(StepConfig): "eQtlColocClppMaximum", "pQtlColocClppMaximum", "sQtlColocClppMaximum", - "tuQtlColocClppMaximum", # max H4 for each (study, locus, gene) aggregating over a specific qtl type "eQtlColocH4Maximum", "pQtlColocH4Maximum", "sQtlColocH4Maximum", - "tuQtlColocH4Maximum", + # max CLPP for each (study, locus, gene) aggregating over a specific qtl type and in relation with the mean in the vicinity + "eQtlColocClppMaximumNeighbourhood", + "pQtlColocClppMaximumNeighbourhood", + "sQtlColocClppMaximumNeighbourhood", + # max H4 for each (study, locus, gene) aggregating over a specific qtl type and in relation with the mean in the vicinity + "eQtlColocH4MaximumNeighbourhood", + "pQtlColocH4MaximumNeighbourhood", + "sQtlColocH4MaximumNeighbourhood", # distance to gene footprint "distanceSentinelFootprint", "distanceSentinelFootprintNeighbourhood", diff --git a/src/gentropy/dataset/colocalisation.py b/src/gentropy/dataset/colocalisation.py index c9083f67a..4b85b68d6 100644 --- a/src/gentropy/dataset/colocalisation.py +++ b/src/gentropy/dataset/colocalisation.py @@ -42,7 +42,7 @@ def extract_maximum_coloc_probability_per_region_and_gene( study_index: StudyIndex, *, filter_by_colocalisation_method: str, - filter_by_qtl: str | None = None, + filter_by_qtls: str | list[str] | None = None, ) -> DataFrame: """Get maximum colocalisation probability for a (studyLocus, gene) window. @@ -50,7 +50,7 @@ def extract_maximum_coloc_probability_per_region_and_gene( study_locus (StudyLocus): Dataset containing study loci to filter the colocalisation dataset on and the geneId linked to the region study_index (StudyIndex): Study index to use to get study metadata filter_by_colocalisation_method (str): optional filter to apply on the colocalisation dataset - filter_by_qtl (str | None): optional filter to apply on the colocalisation dataset + filter_by_qtls (str | list[str] | None): optional filter to apply on the colocalisation dataset Returns: DataFrame: table with the maximum colocalisation scores for the provided study loci @@ -63,8 +63,15 @@ def extract_maximum_coloc_probability_per_region_and_gene( valid_qtls = list( set(EqtlCatalogueStudyIndex.method_to_study_type_mapping.values()) ) - if filter_by_qtl and filter_by_qtl not in valid_qtls: - raise ValueError(f"There are no studies with QTL type {filter_by_qtl}") + + if filter_by_qtls: + filter_by_qtls = ( + list(map(str.lower, [filter_by_qtls])) + if isinstance(filter_by_qtls, str) + else list(map(str.lower, filter_by_qtls)) + ) + if any(qtl not in valid_qtls for qtl in filter_by_qtls): + raise ValueError(f"There are no studies with QTL type {filter_by_qtls}") if filter_by_colocalisation_method not in [ "ECaviar", @@ -82,10 +89,8 @@ def extract_maximum_coloc_probability_per_region_and_gene( f.col("rightGeneId").isNotNull(), f.lower("colocalisationMethod") == filter_by_colocalisation_method.lower(), ] - if filter_by_qtl: - coloc_filtering_expr.append( - f.lower("rightStudyType") == filter_by_qtl.lower() - ) + if filter_by_qtls: + coloc_filtering_expr.append(f.lower("rightStudyType").isin(filter_by_qtls)) filtered_colocalisation = ( # Bring rightStudyType and rightGeneId and filter by rows where the gene is null, diff --git a/src/gentropy/dataset/l2g_features/colocalisation.py b/src/gentropy/dataset/l2g_features/colocalisation.py index c44573b72..c61daa909 100644 --- a/src/gentropy/dataset/l2g_features/colocalisation.py +++ b/src/gentropy/dataset/l2g_features/colocalisation.py @@ -23,7 +23,7 @@ def common_colocalisation_feature_logic( colocalisation_method: str, colocalisation_metric: str, feature_name: str, - qtl_type: str, + qtl_types: list[str] | str, *, colocalisation: Colocalisation, study_index: StudyIndex, @@ -36,7 +36,7 @@ def common_colocalisation_feature_logic( colocalisation_method (str): The colocalisation method to filter the data by colocalisation_metric (str): The colocalisation metric to use feature_name (str): The name of the feature to create - qtl_type (str): The type of QTL to filter the data by + qtl_types (list[str] | str): The types of QTL to filter the data by colocalisation (Colocalisation): Dataset with the colocalisation results study_index (StudyIndex): Study index to fetch study type and gene study_locus (StudyLocus): Study locus to traverse between colocalisation and study index @@ -55,7 +55,7 @@ def common_colocalisation_feature_logic( study_locus, study_index, filter_by_colocalisation_method=colocalisation_method, - filter_by_qtl=qtl_type, + filter_by_qtls=qtl_types, ), on=joining_cols, ) @@ -73,7 +73,7 @@ def common_neighbourhood_colocalisation_feature_logic( colocalisation_method: str, colocalisation_metric: str, feature_name: str, - qtl_type: str, + qtl_types: list[str] | str, *, colocalisation: Colocalisation, study_index: StudyIndex, @@ -86,7 +86,7 @@ def common_neighbourhood_colocalisation_feature_logic( colocalisation_method (str): The colocalisation method to filter the data by colocalisation_metric (str): The colocalisation metric to use feature_name (str): The name of the feature to create - qtl_type (str): The type of QTL to filter the data by + qtl_types (list[str] | str): The types of QTL to filter the data by colocalisation (Colocalisation): Dataset with the colocalisation results study_index (StudyIndex): Study index to fetch study type and gene study_locus (StudyLocus): Study locus to traverse between colocalisation and study index @@ -101,7 +101,7 @@ def common_neighbourhood_colocalisation_feature_logic( colocalisation_method, colocalisation_metric, local_feature_name, - qtl_type, + qtl_types, colocalisation=colocalisation, study_index=study_index, study_locus=study_locus, @@ -310,7 +310,7 @@ def compute( """ colocalisation_method = "ECaviar" colocalisation_metric = "clpp" - qtl_type = "sqtl" + qtl_types = ["sqtl", "tuqtl"] return cls( _df=convert_from_wide_to_long( common_colocalisation_feature_logic( @@ -318,7 +318,7 @@ def compute( colocalisation_method, colocalisation_metric, cls.feature_name, - qtl_type, + qtl_types, **feature_dependency, ), id_vars=("studyLocusId", "geneId"), @@ -352,91 +352,7 @@ def compute( """ colocalisation_method = "ECaviar" colocalisation_metric = "clpp" - qtl_type = "sqtl" - return cls( - _df=convert_from_wide_to_long( - common_neighbourhood_colocalisation_feature_logic( - study_loci_to_annotate, - colocalisation_method, - colocalisation_metric, - cls.feature_name, - qtl_type, - **feature_dependency, - ), - id_vars=("studyLocusId", "geneId"), - var_name="featureName", - value_name="featureValue", - ), - _schema=cls.get_schema(), - ) - - -class TuQtlColocClppMaximumFeature(L2GFeature): - """Max CLPP for each (study, locus, gene) aggregating over all tuQTLs.""" - - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] - feature_name = "tuQtlColocClppMaximum" - - @classmethod - def compute( - cls: type[TuQtlColocClppMaximumFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: dict[str, Any], - ) -> TuQtlColocClppMaximumFeature: - """Computes the feature. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (dict[str, Any]): Dataset with the colocalisation results - - Returns: - TuQtlColocClppMaximumFeature: Feature dataset - """ - colocalisation_method = "ECaviar" - colocalisation_metric = "clpp" - qtl_type = "tuqtl" - return cls( - _df=convert_from_wide_to_long( - common_colocalisation_feature_logic( - study_loci_to_annotate, - colocalisation_method, - colocalisation_metric, - cls.feature_name, - qtl_type, - **feature_dependency, - ), - id_vars=("studyLocusId", "geneId"), - var_name="featureName", - value_name="featureValue", - ), - _schema=cls.get_schema(), - ) - - -class TuQtlColocClppMaximumNeighbourhoodFeature(L2GFeature): - """Max CLPP for each (study, locus) aggregating over all tuQTLs.""" - - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] - feature_name = "tuQtlColocClppMaximumNeighbourhood" - - @classmethod - def compute( - cls: type[TuQtlColocClppMaximumNeighbourhoodFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: dict[str, Any], - ) -> TuQtlColocClppMaximumNeighbourhoodFeature: - """Computes the feature. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (dict[str, Any]): Dataset with the colocalisation results - - Returns: - TuQtlColocClppMaximumNeighbourhoodFeature: Feature dataset - """ - colocalisation_method = "ECaviar" - colocalisation_metric = "clpp" - qtl_type = "tuqtl" + qtl_types = ["sqtl", "tuqtl"] return cls( _df=convert_from_wide_to_long( common_neighbourhood_colocalisation_feature_logic( @@ -444,7 +360,7 @@ def compute( colocalisation_method, colocalisation_metric, cls.feature_name, - qtl_type, + qtl_types, **feature_dependency, ), id_vars=("studyLocusId", "geneId"), @@ -646,7 +562,7 @@ def compute( """ colocalisation_method = "Coloc" colocalisation_metric = "h4" - qtl_type = "sqtl" + qtl_types = ["sqtl", "tuqtl"] return cls( _df=convert_from_wide_to_long( common_colocalisation_feature_logic( @@ -654,7 +570,7 @@ def compute( colocalisation_method, colocalisation_metric, cls.feature_name, - qtl_type, + qtl_types, **feature_dependency, ), id_vars=("studyLocusId", "geneId"), @@ -688,7 +604,7 @@ def compute( """ colocalisation_method = "Coloc" colocalisation_metric = "h4" - qtl_type = "sqtl" + qtl_types = ["sqtl", "tuqtl"] return cls( _df=convert_from_wide_to_long( common_neighbourhood_colocalisation_feature_logic( @@ -696,91 +612,7 @@ def compute( colocalisation_method, colocalisation_metric, cls.feature_name, - qtl_type, - **feature_dependency, - ), - id_vars=("studyLocusId", "geneId"), - var_name="featureName", - value_name="featureValue", - ), - _schema=cls.get_schema(), - ) - - -class TuQtlColocH4MaximumFeature(L2GFeature): - """Max H4 for each (study, locus, gene) aggregating over all tuQTLs.""" - - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] - feature_name = "tuQtlColocH4Maximum" - - @classmethod - def compute( - cls: type[TuQtlColocH4MaximumFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: dict[str, Any], - ) -> TuQtlColocH4MaximumFeature: - """Computes the feature. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (dict[str, Any]): Dataset with the colocalisation results - - Returns: - TuQtlColocH4MaximumFeature: Feature dataset - """ - colocalisation_method = "Coloc" - colocalisation_metric = "h4" - qtl_type = "tuqtl" - return cls( - _df=convert_from_wide_to_long( - common_colocalisation_feature_logic( - study_loci_to_annotate, - colocalisation_method, - colocalisation_metric, - cls.feature_name, - qtl_type, - **feature_dependency, - ), - id_vars=("studyLocusId", "geneId"), - var_name="featureName", - value_name="featureValue", - ), - _schema=cls.get_schema(), - ) - - -class TuQtlColocH4MaximumNeighbourhoodFeature(L2GFeature): - """Max H4 for each (study, locus) aggregating over all tuQTLs.""" - - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] - feature_name = "tuQtlColocH4MaximumNeighbourhood" - - @classmethod - def compute( - cls: type[TuQtlColocH4MaximumNeighbourhoodFeature], - study_loci_to_annotate: StudyLocus | L2GGoldStandard, - feature_dependency: dict[str, Any], - ) -> TuQtlColocH4MaximumNeighbourhoodFeature: - """Computes the feature. - - Args: - study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation - feature_dependency (dict[str, Any]): Dataset with the colocalisation results - - Returns: - TuQtlColocH4MaximumNeighbourhoodFeature: Feature dataset - """ - colocalisation_method = "Coloc" - colocalisation_metric = "h4" - qtl_type = "tuqtl" - return cls( - _df=convert_from_wide_to_long( - common_colocalisation_feature_logic( - study_loci_to_annotate, - colocalisation_method, - colocalisation_metric, - cls.feature_name, - qtl_type, + qtl_types, **feature_dependency, ), id_vars=("studyLocusId", "geneId"), diff --git a/src/gentropy/method/l2g/feature_factory.py b/src/gentropy/method/l2g/feature_factory.py index 41db44806..1077fc825 100644 --- a/src/gentropy/method/l2g/feature_factory.py +++ b/src/gentropy/method/l2g/feature_factory.py @@ -17,10 +17,6 @@ SQtlColocClppMaximumNeighbourhoodFeature, SQtlColocH4MaximumFeature, SQtlColocH4MaximumNeighbourhoodFeature, - TuQtlColocClppMaximumFeature, - TuQtlColocClppMaximumNeighbourhoodFeature, - TuQtlColocH4MaximumFeature, - TuQtlColocH4MaximumNeighbourhoodFeature, ) from gentropy.dataset.l2g_features.distance import ( DistanceFootprintMeanFeature, @@ -113,16 +109,12 @@ class FeatureFactory: "pQtlColocClppMaximumNeighbourhood": PQtlColocClppMaximumNeighbourhoodFeature, "sQtlColocClppMaximum": SQtlColocClppMaximumFeature, "sQtlColocClppMaximumNeighbourhood": SQtlColocClppMaximumNeighbourhoodFeature, - "tuQtlColocClppMaximum": TuQtlColocClppMaximumFeature, - "tuQtlColocClppMaximumNeighbourhood": TuQtlColocClppMaximumNeighbourhoodFeature, "eQtlColocH4Maximum": EQtlColocH4MaximumFeature, "eQtlColocH4MaximumNeighbourhood": EQtlColocH4MaximumNeighbourhoodFeature, "pQtlColocH4Maximum": PQtlColocH4MaximumFeature, "pQtlColocH4MaximumNeighbourhood": PQtlColocH4MaximumNeighbourhoodFeature, "sQtlColocH4Maximum": SQtlColocH4MaximumFeature, "sQtlColocH4MaximumNeighbourhood": SQtlColocH4MaximumNeighbourhoodFeature, - "tuQtlColocH4Maximum": TuQtlColocH4MaximumFeature, - "tuQtlColocH4MaximumNeighbourhood": TuQtlColocH4MaximumNeighbourhoodFeature, "vepMean": VepMeanFeature, "vepMeanNeighbourhood": VepMeanNeighbourhoodFeature, "vepMaximum": VepMaximumFeature, diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py index c674280ac..6d3b1b3af 100644 --- a/tests/gentropy/dataset/test_l2g_feature.py +++ b/tests/gentropy/dataset/test_l2g_feature.py @@ -32,10 +32,6 @@ SQtlColocClppMaximumNeighbourhoodFeature, SQtlColocH4MaximumFeature, SQtlColocH4MaximumNeighbourhoodFeature, - TuQtlColocClppMaximumFeature, - TuQtlColocClppMaximumNeighbourhoodFeature, - TuQtlColocH4MaximumFeature, - TuQtlColocH4MaximumNeighbourhoodFeature, common_colocalisation_feature_logic, common_neighbourhood_colocalisation_feature_logic, ) @@ -75,19 +71,15 @@ EQtlColocH4MaximumFeature, PQtlColocH4MaximumFeature, SQtlColocH4MaximumFeature, - TuQtlColocH4MaximumFeature, EQtlColocClppMaximumFeature, PQtlColocClppMaximumFeature, SQtlColocClppMaximumFeature, - TuQtlColocClppMaximumFeature, EQtlColocClppMaximumNeighbourhoodFeature, PQtlColocClppMaximumNeighbourhoodFeature, SQtlColocClppMaximumNeighbourhoodFeature, - TuQtlColocClppMaximumNeighbourhoodFeature, EQtlColocH4MaximumNeighbourhoodFeature, PQtlColocH4MaximumNeighbourhoodFeature, SQtlColocH4MaximumNeighbourhoodFeature, - TuQtlColocH4MaximumNeighbourhoodFeature, DistanceTssMeanFeature, DistanceTssMeanNeighbourhoodFeature, DistanceFootprintMeanFeature, From fb6111d1a69799d60ce89b8c1753681a0075f91d Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Fri, 11 Oct 2024 15:21:09 +0100 Subject: [PATCH 092/188] fix: adding single point statistics to pics loci (#832) * fix: adding single point statistics to pics loci * fix: patching field update logic * refactor: simplifying the pics process slightly * test: for lead statistics propagation * refactor: udf as decorator * fix: removing commented rows * fix: reverting udf decorator - tests would fail otherwise --- src/gentropy/method/pics.py | 78 +++++++++++++++++++----------- tests/gentropy/method/test_pics.py | 76 ++++++++++++++++++++++++++++- 2 files changed, 124 insertions(+), 30 deletions(-) diff --git a/src/gentropy/method/pics.py b/src/gentropy/method/pics.py index 5fd084efd..b1dc46e12 100644 --- a/src/gentropy/method/pics.py +++ b/src/gentropy/method/pics.py @@ -17,6 +17,18 @@ class PICS: """Probabilistic Identification of Causal SNPs (PICS), an algorithm estimating the probability that an individual variant is causal considering the haplotype structure and observed pattern of association at the genetic locus.""" + # The fields for the picsed locus + ldSet tagVariantId is renamed to variantId: + PICSED_LOCUS_SCHEMA = t.ArrayType( + t.StructType( + [ + t.StructField("variantId", t.StringType(), True), + t.StructField("r2Overall", t.DoubleType(), True), + t.StructField("posteriorProbability", t.DoubleType(), True), + t.StructField("standardError", t.DoubleType(), True), + ] + ) + ) + @staticmethod def _pics_relative_posterior_probability( neglog_p: float, pics_snp_mu: float, pics_snp_std: float @@ -195,36 +207,24 @@ def finemap( Returns: StudyLocus: Study locus with PICS results """ - # Register UDF by defining the structure of the output locus array of structs - # it also renames tagVariantId to variantId - - picsed_ldset_schema = t.ArrayType( - t.StructType( - [ - t.StructField("tagVariantId", t.StringType(), True), - t.StructField("r2Overall", t.DoubleType(), True), - t.StructField("posteriorProbability", t.DoubleType(), True), - t.StructField("standardError", t.DoubleType(), True), - ] - ) - ) - picsed_study_locus_schema = t.ArrayType( - t.StructType( - [ - t.StructField("variantId", t.StringType(), True), - t.StructField("r2Overall", t.DoubleType(), True), - t.StructField("posteriorProbability", t.DoubleType(), True), - t.StructField("standardError", t.DoubleType(), True), - ] - ) - ) - _finemap_udf = f.udf( - lambda locus, neglog_p: PICS._finemap(locus, neglog_p, k), - picsed_ldset_schema, + # Finemapping method is an optional column: + finemapping_method_expression = ( + f.lit("pics") + if "finemappingMethod" not in associations.df.columns + else f.coalesce(f.col("finemappingMethod"), f.lit("pics")) ) + + # Flagging expression for loci that do not qualify for PICS: non_picsable_expr = ( f.size(f.filter(f.col("ldSet"), lambda x: x.r2Overall >= 0.5)) == 0 ) + + # Registering the UDF to be used in the pipeline: + finemap_udf = f.udf( + lambda ld_set, neglog_p: cls._finemap(ld_set, neglog_p, k), + cls.PICSED_LOCUS_SCHEMA, + ) + return StudyLocus( _df=( associations.df @@ -237,11 +237,31 @@ def finemap( "locus", f.when( f.col("ldSet").isNotNull(), - _finemap_udf(f.col("ldSet"), f.col("neglog_pvalue")).cast( - picsed_study_locus_schema + finemap_udf(f.col("ldSet"), f.col("neglog_pvalue")), + ), + ) + # Updating single point statistics in the locus object for the lead variant: + .withColumn( + "locus", + f.transform( + f.col("locus"), + lambda tag: f.when( + f.col("variantId") == tag["variantId"], + tag.withField("pValueMantissa", f.col("pValueMantissa")) + .withField("pValueExponent", f.col("pValueExponent")) + .withField("beta", f.col("beta")), + ).otherwise( + tag.withField( + "pValueMantissa", f.lit(None).cast(t.FloatType()) + ) + .withField( + "pValueExponent", f.lit(None).cast(t.IntegerType()) + ) + .withField("beta", f.lit(None).cast(t.DoubleType())) ), ), ) + # Flagging loci that do not qualify for PICS: .withColumn( "qualityControls", StudyLocus.update_quality_flag( @@ -252,7 +272,7 @@ def finemap( ) .withColumn( "finemappingMethod", - f.coalesce(f.col("finemappingMethod"), f.lit("pics")), + finemapping_method_expression, ) .withColumn( "studyLocusId", diff --git a/tests/gentropy/method/test_pics.py b/tests/gentropy/method/test_pics.py index ff6f115dc..3639b408f 100644 --- a/tests/gentropy/method/test_pics.py +++ b/tests/gentropy/method/test_pics.py @@ -3,7 +3,9 @@ from __future__ import annotations import pyspark.sql.functions as f -from pyspark.sql import Row +import pyspark.sql.types as t +import pytest +from pyspark.sql import Row, SparkSession from gentropy.dataset.study_locus import StudyLocus from gentropy.method.pics import PICS @@ -81,3 +83,75 @@ def test__finemap_udf() -> None: def test_finemap(mock_study_locus: StudyLocus) -> None: """Test finemap function returns study-locus.""" assert isinstance(PICS.finemap(mock_study_locus), StudyLocus) + + +class TestLeadPropagation: + """This test suite is designed to test that the statistics of the lead variant are propagated correctly.""" + + DATA = [ + ("v1", "v1", 1.0), + ("v1", "v2", 0.9), + ("v1", "v3", 0.3), + ] + + @pytest.fixture(autouse=True) + def setup(self, spark: SparkSession) -> None: + """Set up the test suite. + + Args: + spark (SparkSession): The spark session. + """ + df = ( + spark.createDataFrame(self.DATA, ["variantId", "tagVariantId", "r2Overall"]) + .groupBy("variantId") + .agg( + f.collect_list( + f.struct( + f.col("tagVariantId"), + f.col("r2Overall").cast(t.DoubleType()).alias("r2Overall"), + ) + ).alias("ldSet") + ) + .withColumns( + { + "studyLocusId": f.lit("l1"), + "studyId": f.lit("s1"), + "chromosome": f.lit("1"), + "pValueMantissa": f.lit(1.0).cast(t.FloatType()), + "pValueExponent": f.lit(-4).cast(t.IntegerType()), + "beta": f.lit(0.234).cast(t.DoubleType()), + "qualityControls": f.lit(None).cast(t.ArrayType(t.StringType())), + "ldSet": f.filter( + f.col("ldSet"), lambda x: x.tagVariantId.isNotNull() + ), + } + ) + ) + + self.study_locus = StudyLocus(_df=df, _schema=StudyLocus.get_schema()) + + def test_lead_propagation(self: TestLeadPropagation) -> None: + """Testing if all the lead variant statistics are propagated to the tag variants.""" + # Explode all the tags: + finemapped = ( + PICS.finemap(self.study_locus) + .df.select( + f.col("variantId"), + f.col("pValueMantissa"), + f.col("pValueExponent"), + f.col("beta"), + f.explode("locus").alias("locus"), + ) + .collect() + ) + + # Looping through all the tags and checking if the statistics are propagated correctly: + for row in finemapped: + if row["locus"]["variantId"] == row["variantId"]: + assert row["locus"]["pValueMantissa"] == row["pValueMantissa"] + assert row["locus"]["pValueExponent"] == row["pValueExponent"] + assert row["locus"]["beta"] == row["beta"] + else: + assert row["locus"]["pValueMantissa"] is None + assert row["locus"]["pValueExponent"] is None + assert row["locus"]["beta"] is None From 9f446e8f594ca63d359f09683933217b61446b44 Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Fri, 11 Oct 2024 19:46:22 +0100 Subject: [PATCH 093/188] fix(find_overlap): missing right study type in output (#828) * fix: rightStudyType nulls removed * test: improve testing of peak overlap function * fix: overlap test --------- Co-authored-by: David Ochoa --- src/gentropy/dataset/study_locus.py | 3 +- tests/gentropy/conftest.py | 9 ++++ .../gentropy/data_samples/coloc_test.parquet | Bin 0 -> 15598 bytes .../dataset/test_study_locus_overlaps.py | 45 +++++++++++++++++- 4 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 tests/gentropy/data_samples/coloc_test.parquet diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index e9be88a9d..b6fc7b8c9 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -412,6 +412,7 @@ def _overlapping_peaks( .select( f.col("left.studyLocusId").alias("leftStudyLocusId"), f.col("right.studyLocusId").alias("rightStudyLocusId"), + f.col("right.studyType").alias("rightStudyType"), f.col("left.chromosome").alias("chromosome"), ) .distinct() @@ -452,7 +453,6 @@ def _align_overlapping_tags( f.col("chromosome"), f.col("tagVariantId"), f.col("studyLocusId").alias("rightStudyLocusId"), - f.col("studyType").alias("rightStudyType"), *[f.col(col).alias(f"right_{col}") for col in stats_cols], ).join(peak_overlaps, on=["chromosome", "rightStudyLocusId"], how="inner") @@ -464,6 +464,7 @@ def _align_overlapping_tags( "rightStudyLocusId", "leftStudyLocusId", "tagVariantId", + "rightStudyType", ], how="outer", ).select( diff --git a/tests/gentropy/conftest.py b/tests/gentropy/conftest.py index f977e3f74..9ea31ae20 100644 --- a/tests/gentropy/conftest.py +++ b/tests/gentropy/conftest.py @@ -501,6 +501,15 @@ def sample_ukbiobank_studies(spark: SparkSession) -> DataFrame: ) +@pytest.fixture() +def study_locus_sample_for_colocalisation(spark: SparkSession) -> DataFrame: + """Sample study locus data for colocalisation.""" + return StudyLocus( + _df=spark.read.parquet("tests/gentropy/data_samples/coloc_test.parquet"), + _schema=StudyLocus.get_schema(), + ) + + @pytest.fixture() def sample_target_index(spark: SparkSession) -> DataFrame: """Sample target index sample data.""" diff --git a/tests/gentropy/data_samples/coloc_test.parquet b/tests/gentropy/data_samples/coloc_test.parquet new file mode 100644 index 0000000000000000000000000000000000000000..54cc420c45fc521ceedbcb1ba33753f1a8567ef5 GIT binary patch literal 15598 zcmd5@30zZ0x4#zx1R52Q8?5KG zDvP9YNUCV>yps-I6bmw(DKb(9X~-EkQHN zpkqO5*l2YcG>E4@r#6=@yY0qkATpR6G=pj)kxC>A3sK1Ba+#`&QmIsRapyM|z?)pE zQmUe%3mnRwT6X^X(+&}o0|n5|p18RPch;Tqq&7d>KEE+WXPLXo_A;Xrj!?mi5>Q8W z{xz2=zOmF?p_>oiW&cs03sf6R>8box6?-@gEjy-~tMNft!f?Gv6fP1eLgb+eks?SW z4smtkaG}KY!-z1cM63`8K{rGSg-C|Yw1}jZlhnrieS8MsC--`o5hc(rDkUvVo2(6r zR;L-XI$erN93l!+88TIIdd)CZvRa#=QN{F(jSIm)GO=7C4&y+xF43`ZVId(Ri8MrJ z>E7Im1I&@O^p3o?CO%(Wn3MtxB?dsgWesQc{&2j6J zi|ECj9agmSN9S*AsM_;auUtt0AT3T#F;8n>Hnwo#^;od}npo3-0Z-N(|BjMfTJ}oD~f=M{OOeP&)_9Pu4eU4;- zQ{$hhrcb=G;HFPkEnV@}O-(IdA+YWw4wb{9@~^F{agLz&l6Xy<NMfJzXdH ztLmaJJ#^caS-+p~r3V&d_6#}ZOHWJXOy2yHFTGXx^V-V?ed%9oVq%2reQE!f>=t2b zeCZMGb`*ZS#FyT^ZHrVp$Cth&?>BzMEMGcHw|DNjnZESQjXjSC=lIf_do0{qo$X5- zTK;l1d5kZ;v$ELtp3av(lopel(9xIfaA8J&(%Y9_6HCO*XZh0KQRSa{zw)8ydKAnk zeBnd4+;M2#gI|4U$2O70BaZmc#GuyOzg+4=cU$uxx^piddTZP0+%_Ni(9`*Yk9#V7 z=nK`$)^MagbaoBSRodQi>D0TC76^fL9Woa?*1>D?tK z2PKqy(~rB0KUu8srVrnZ{OU?N03_64HUYF1O#r71G;?wrhkh{b|L-76V58 z=})&f^heVXcl>FmogRxWed|w8iRyf7**bq(T#^^zz1p9a{?)mi_i}&QgZz_+3jFCa z%Qmjdp6O379raC;I>Vnn?zxrd9qLb?8b}TuSJ;| z2w(Sr)*t3~^`CP+tNZ%6_Wd^gKKA?gt^7?%JAc#f@c#Ob=j7CVEx#Ea;Wy`y|F-Mn zqgI`0``w8D#BVzjRC@D;17Vx4v$9XzA?r-n1-&wlmNnUe!h5cHadv1CnpT?mYinXL zIy2y}$$_f5X#Kj|Kd;R#L?fFv>mIC|gTCy%Gi*z@IcQm@#TR2b%t4>%CrvE8Uw}B< ze(?DIN&zCXx_uc?QGmXB;MRTAz5+C$PoC554h5+D)84neeF~5^J4Ym}%||PuI{XrJ zBp;!emxpJ3n~yfNX#YcLNj~b&9wKUu<7bUKYYPap#4AkB2dF@~N8R)>A%(%6-D>l>bkLZ3fkjT@oS57Q_z&Xw0K3u6tq`6HFrkh6cl}=XU9;%6cjnD z%cJ}ulTjyUr}(jlHlhB-&8}^`um%-)d7RoSUV^HcULw=i7NJRTAN_o#QxOvM?r`Db z(#5F9>2?`j-iy%5&&TS%-MkQOm6jIo=(G?e7G6XPSqstmPxffaN*16^-K)zs>K34z zvC|3-aSM?D#uy^Qbpc8|oHyZo&3rU{Q{k|$=gmh?{a)m)DV&e!{%gnV(alFaziEDV zqGCRZADHx$w$*&(eyUkShi3E9%pz9v&412A0?+8Sg2(faWHLoeeJ~H5T48wf^42_b zx2&q5_0RJVHEdYLu?ut2n5CzG@}63V9Bz#Z`-WYJe$VrfXIz?t_ADCHfru|a%eqcz zReUWUY45je)V?AgU6~fBnQ+mFj2Q!E>68%-xLSF)d`}+QbSS(1dQl$wC4A^#m6v9s z+q=dN^i<451LrMX`Q%v+@`_zvAUlwQUg#&dQH^s@(|xVSd{{aIrFS8w?JJp%Hnzza zUv}&(G`sKNS<7}$M23Yy?&pq=!~5rd<8+idMyi!>LC>dJ-P|fPhe$Oe$8zsp8_4;&ucZjzMHQZ%?mKQJ7Q;*}(yXd#EH# zE{hBxJVO);xvGm8QV@qILR9jVL~FSsG&B@IyEOHl%@&1)k`Y9g;4y@|JX9_Z3C9U} zs6-N_A>1WlGL=&Nkl;XoFC}{ie@DR4M-dVVQlh=j^W<`=Buv#s?n!V$L&ehYM0mns zi3|wtg;TN+r=LtId5AQUPqO4ve`Rncl!S<-krAMzR4xt;4@Bul0+^+Y6*McyLWp(07ta!^_nCMF|-rJ$`iR4gHT4PL>K%fm!uE)f?@5v>&x zxd=uI&V=6WI0{LKn0!DegMTIsB2+3;1_qCJ z;DCk0{Mo@9NH4JnLdR&OQVwmv#Gx(cL2FN`Bm~i^OI6+cXBvpKWXBLFs z_PtDdZ+HWHnX@-7;B-=ZbzK?o!AV6E&u-27_HRB<+oCf4f=hNy*LhcG6yG?sb<^{G z-$WcWj655<^yc*8ecSEVT&*by;|?AewdB)QD|&>_x;Vk}qn<7F=jMHV^L|TL)9&R+ zO?RX&KHF-DpoZevu$R2I2OsC=@XF|!x*zBk9bA-r)&sijpk~V2_05zH?Ol|4`&^YA z2TFOXx0`a}2sh=*7@l&&eV($+r-^b$H!tPLKUyd|ocB`BPTESyS8)^4S5f1j8WeZ=F&cCI2|C~U33_?zDcW$~0Y?hCBvn9C ze{A?+n>hlvL<*hn#W1alWBxB$LmZu(V)ORqxfHa{Ss9SCHDuO9I$zqmucm+VY|WX9 z^b3DZUZI$s*nd%*MP@q#(_lJ@<Mz8+RvurSMeEk)?e+cgELuk7ADc%HwscMosQF88}v2jgEXe$}97`O5`Ld9y)7VC2AhLaSQ)aC2F)Tbk)_3mFUUh z8P}sqD$$A&k48*7QHjE@Pq_TW(MmMOC%a#pVW&~lwwo7n_E(~^H1+D{fK%EjSTS`^ zC3@V2=NwpGiFSRnJS5;`CF;~;M(eTsYSeyp?dmgYD^ZQ1U{f$WyFFpcqBlUhMymvw z{X2pPS}`UqpwCG54~&mlV?z;^Dct#W>ld@udX}0?W#nQ z5xcubW>liHJqNa$GO7|S^*_j7ms*KFxFyOzp{qmz>f+m~{*}n%<3s2Fcv_7*{aNzG zw_ep~&AAD)^SIUM=-H}UT_0DW*g<`t3~pMD9<7~q!yu?eC$oPzu{RHJPQ~8~xm|^p zluygL`=SaRkC+tmB|JY&Y5yO2i)tk9xA@g(-hfw`l;D503iaS6h%`SLQ99rpDe06)zOGP-TVDMIXt}<4dQkC$cQ$odlvgmiJT%aVmhjtt^|+-Gb$!BH6y#$>yho?H z-0(7@qHlWtG05GBhWl5<1#paLdb7LE_uY(W_Se$ZvmA_Q$6eu|Wsma^@9ssP`1^Ti z;f;i4B{%a>^U3K&;ve(S0gt~{4~GEbz|+fqIlJ;uZ?fgYmk?M2&vm=)H+ksk+6P&` zmE@s5PLjt7%kogvh7$*zM(3dogF5W4Yy<&`^MPjvf1CZ^8b}zOsYTAz(e1{4a7>TK z3&3>8N7G0vJ2hQ<@tmsT4m4rz_*=ofb|CevZP(eC%Fs@g)6?XKJJ9jgoFShcEki?o z?H{z|$1-$}=kvUOU@3Y5bLg_D6lL*UTa}D2McaCQ9RI3)De}{0j$H%guCs<-em1%k zOZa1nFT}}+VGEiHJ)+T4p zJf2mGXnn=ZOJ~Z^(1{0|wuE|K!)9cyNi9X~70u5SW|yKyM@dDAsvPaCYQh;OD@P;Z ze~xgJmLuWzQDc8mL;jFOetWN%qh}E}_oNk;p^^6tOKK07qENk}r>3M7oypDotfsgW z%^9BD?$5QQC@xafn7h6d{o*QHny|PO9qM!QMdy!ppqW0gCwy|sQSvJR{v^)Nmz*s} z7%ll}ISRW|*!c9Ta^!OR*B_%VmSe9m?*Y}n)~u+r3cvdvfH&GPzHHcN3)X0t4&Sh$ z{n_nzKEO1OkZMOA`(@5yV3_R;QM0bhW!}sX;mYesYBTIf>u3I8BBU+cd)&QPGdbr0 zn^E87FOpM6Ckql%l9RNFn0b^!E@lX0Oq^qog>+2NYZA5bz%{063?>Fr02E|UN|K#| zG?dtn<#$Dfe-Ap34(>0dOpwDOx-gI++XL{_`GHf(DjTK4k8hs2Oo?q z7G~J_JQgO~8QEMjo&Gv?&gbzPpxVu(JEOJaJ=k+Tk5h*{w;OmMu&o>-09a{6Mt!#R>AWUh3a9+-psbu zuC*r*x!Q6UxKWXox~JW2xvM|0<<@*aJzW%O*Rv{`s<0ZP zDuzn3pgihhuYPHuy-M+5YJjE6q#@KLQ%e|}+MuqSv{beJ3sqXeaJUnrq;P-Z@4(}v z;p?&<__nNDqCe5Y3x>{A;A$#}8@ z7GcSZ$xKa2)+8IuHGMTnNty(MQm51CG@bRDQR%QlkLe65#-^m}695l`H%c?8lM~hY z#2CFkMQ>KISz3C$X?gCXNlVacQ?c?EePs^3CZp2TI;~+$bV{;8pQ1~Hi)`l}5nwPG zi^tMw(+m_a&Df|eAa^9F!|ZBP>s_QadDtf!xQnz|8~4NA8Y#qT4Be6wHJN4|Js8=# zlwqQfsLtjBPHMUyH2FlMPVOT%XMJGF(wcHGsGI2$G}fTjgJDbqV#wD zriIC(Eg=)AV$Th$lZ!{s3Cv+Nh9~%$L8I5E=zHl?;??n5JV<6WUF-Sl+q=?h&iJ~& z;9^E+O;&NU2ZWZf=d$=-{Myz+xaQ^XWzKBad+JO%%7M$`ZYR0BNFh5yP6WI$=4&`u zgyj|@R~S~fD6Q2~T<#?mEVzVlKWUe1pV20vL%8u!jAdGaMxH5#WFi%$0n*i>l$a0} z!?zQx9k3hK1Y{(+*(6lJPz{Baw%yNm#mkhjE{)=G7pSR3LRbTL%~%GD&F3+e6qArt zZYGoqOB}s$Xo`QiM1Unl@B=ZmgJ&P+%CHUgV$EySl#mdvhWlp+i|K7Ek_odrw`vCz5c8KI2BcT{es}mIS-Y2kTO3yNY^uq~M5jao{oXOlQ)-$cl zCfQxNJ)k7Q9)j3YT!#Q#U}Dc@!WS99lGhTcNJ6+7riB&Vve?^3W-}Y`t)aR-h4Jg> zGV!-!{9+BpuLXRzMf=zG51X7?kYkGbd8R`E_5geeFuyTz4`SRa8tegw;7d4GpL%Tz zvf&g||+?z!C z5W>>{Xf<%%1v9ZI zGz{xnV*>GaCisficUsf#YTJSwxX#?^4%}P^Xc8fJ2zc|!i1&VnE@p4Hr^BS!F^oC{ zlj071an3p?>+kEP&L(k3Ks7u!&BrgsiHwPQ1b076ONA4{9Qfi9_9?vj)e8qqm^!Wj zY6+_vdzp`L=1=Tc_&{fVYhW$Bf@SR1V*fe}GOX^GJrb->VZ!Hrs)jGvmFdU zR0Fxp{yf97ZfUVkY;d>BsuQl5q~Nu8gF8sE9wqH5?0NNbnNo}yAZ5b{EafRf3Nr$- z33-ZzSP!K36c$oHmnoze3%O*mQzpa{3`9m~XTrqm#0ihr=H4~jc8U(q zJ!3QM(Rl84HHU$9W*mnZ?3wWN;EKj(5%stB`Z=Jx_4sQG0P5Fb_UR?oYq^dZLI|@w zu@n8BUiR?Iq!jktEKjrNg7e-(_TxVuZpeBqmnpPxGzfjE!#-65LJ6x-9KY5v#_Lfk zh(1Sh_1p|-7Xds1k0fB8n4V1h4H$pdNXm~8R>2oq{~4bd@7nRJC^J5s=}h==h_^5L zdolh66Mj}Jv(Z?1Fze}Wj-1V?M;dc4G!|yH5+31t*{KemjsL(C-b#Sja~oJ=PLGDw zaZ<9$dZK@l-e9cGRHjBGEb#|rti%e>#gP6~%pI)EZj&-ak!HVc*c5dFSs27y z6n7~{SQIJDk$TAqo7>u08=KqOsGSC+XJ>-++GOm#Rnd4m_jhhsdi{U|RJg|CK4C#a z;W`>tdCUp%Oau{kNC4xl0kKQ5*fS}3X{n0AOUoO?T93?T+b=MMu2nI@r7<8Db57>i z1&g)6H6V6-Cb-@7R4n#UpMM&7Yw!qtG<)5nKEmCdVW6!n7SBd);eoE7+ko)HxX;^D zvDaN1_)o&?g+8FgGZ$_S8`=qr`hphrP{+!kLDw6|Z9t1FSc^v{Es6)5wJJtgdt;b&yd_pB-FqmMgx4@`%!yL zJqpG^w@!?rHo$szV;GkuWOJ)j@bwygAdi=#9~P`mRRfb0Y$BC{;l!T?TnW!f5)A(c zUfX&#(njfmfqw!n#t^Je867+lSe`_+LCyP)JI>DlG?QPrUz$Omo?!3`^h?rebctzx z;RDC{C96l8i|jNNRIoPyItAX7({(yET(tOw1B52f&x-CBK5oK(2 z5O%ig{V}Z6njJx#Y|sqT=po}RjbWza>;Pc>cQmR2;wSXWa!%sUUq8%Rq#@;fS6hvu}SysNnzU`Fj=@9H|Q5vevH z+e)C;>(yf*+d`z8rmSgwFncJ_YzDP9Ijtv5d-If^@YVxXkB7BM$Lr5K%>V{FYa(s! zNbfxQ3@8iHYX^JRp<;5w{~86;$?p$hXQ1sE?`#|E>F8fT*qQEs0b=6EZSDWrS8b*L zu%|IkRW{gnc0wE9Zijd~hZziW=HE;t)3Dft+B)3e)U}oVgE?Mf|82;A@hK@fxafe4 zw_K9}iFtzh7f7aC27CLrGps${Xy{_%{n7pxLXZEpP4C}@0RKx{+yC1Pvfmcjo#O5G zpurOeUUy449W+ueu*Ba2QLsoHEEWXBr)zbIg3wTnBqTu*5+;!-CLj@PV|7ZV+d>?}71%K)MKjC|sG5`Po literal 0 HcmV?d00001 diff --git a/tests/gentropy/dataset/test_study_locus_overlaps.py b/tests/gentropy/dataset/test_study_locus_overlaps.py index 58dc95039..669af476f 100644 --- a/tests/gentropy/dataset/test_study_locus_overlaps.py +++ b/tests/gentropy/dataset/test_study_locus_overlaps.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any +import pyspark.sql.functions as f import pyspark.sql.types as t import pytest @@ -59,7 +60,12 @@ def test_study_locus_overlap_from_associations(mock_study_locus: StudyLocus) -> False, # expected - output DataFrame with overlapping signals [ - {"leftStudyLocusId": "1", "rightStudyLocusId": "2", "chromosome": "1"}, + { + "leftStudyLocusId": "1", + "rightStudyLocusId": "2", + "rightStudyType": "eqtl", + "chromosome": "1", + }, ], ), ( @@ -93,7 +99,14 @@ def test_study_locus_overlap_from_associations(mock_study_locus: StudyLocus) -> # intrastudy - bool of whether or not to use inter-study or intra-study logic True, # expected - output DataFrame with overlapping signals - [{"leftStudyLocusId": "2", "rightStudyLocusId": "1", "chromosome": "1"}], + [ + { + "leftStudyLocusId": "2", + "rightStudyLocusId": "1", + "rightStudyType": "gwas", + "chromosome": "1", + } + ], ), ], ) @@ -118,6 +131,7 @@ def test_overlapping_peaks( [ t.StructField("leftStudyLocusId", t.StringType()), t.StructField("rightStudyLocusId", t.StringType()), + t.StructField("rightStudyType", t.StringType()), t.StructField("chromosome", t.StringType()), ] ) @@ -125,3 +139,30 @@ def test_overlapping_peaks( result_df = StudyLocus._overlapping_peaks(observed_df, intrastudy) expected_df = spark.createDataFrame(expected, expected_schema) assert result_df.collect() == expected_df.collect() + + +class TestStudyLocusOverlap: + """Test the overlapping of StudyLocus dataset.""" + + @pytest.fixture(autouse=True) + def setup( + self: TestStudyLocusOverlap, study_locus_sample_for_colocalisation: StudyLocus + ) -> None: + """Get sample dataset.""" + # Store imput dataset: + self.study_locus = study_locus_sample_for_colocalisation + + # Call locus overlap: + self.overlaps = study_locus_sample_for_colocalisation.find_overlaps() + + def test_coloc_return_type(self: TestStudyLocusOverlap) -> None: + """Test get_schema.""" + assert isinstance(self.overlaps, StudyLocusOverlap) + + def test_coloc_not_null(self: TestStudyLocusOverlap) -> None: + """Test get_schema.""" + assert self.overlaps.df.count() != 0 + + def test_coloc_study_type_not_null(self: TestStudyLocusOverlap) -> None: + """Test get_schema.""" + assert self.overlaps.filter(f.col("rightStudyType").isNull()).df.count() == 0 From c4520aa31a524034475c4630c311d1b0f4053088 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Sat, 12 Oct 2024 14:02:08 +0100 Subject: [PATCH 094/188] feat: enhance variant index partitioning (#834) --- src/gentropy/variant_index.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gentropy/variant_index.py b/src/gentropy/variant_index.py index f6d555f07..dba087c79 100644 --- a/src/gentropy/variant_index.py +++ b/src/gentropy/variant_index.py @@ -59,8 +59,9 @@ def __init__( col("variantId"), col("chromosome"), col("position") ), ) - .write.partitionBy("chromosome") - .mode(session.write_mode) + .repartitionByRange("chromosome", "position") + .sortWithinPartitions("chromosome", "position") + .write.mode(session.write_mode) .parquet(variant_index_path) ) From e77abf499bc4a4b9cdb77cbe962423563fcd5828 Mon Sep 17 00:00:00 2001 From: Yakov Date: Mon, 14 Oct 2024 13:44:52 +0100 Subject: [PATCH 095/188] feat: adding new LD interface (#759) * feat: adding new LD interface * fix: adding ld_index * fix: adding csa * fix: adding fucntion in init * chore: temporary commit to build the Docker image * chore: revert temporary GitHub Actions configuration * chore: committing some restructuring * chore: committing some changes * chore: tweaking variable names * fix: fixing locus index boundaries * docs: documenting where gnomad ht were sourced from * fix: adding afr ancestry to panUKBB interface * fix: fixing arguments for susie_finemapper * fix: removing unneeded code from panukbb ld * feat: ancestry mapping * fix: remove chr from chromosome column + some other bugs * fix: removing duplicated variants & resolving flipped indels * chore: committing changes * Update src/gentropy/datasource/pan_ukbb_ld/ld.py Co-authored-by: Daniel Suveges * Update src/gentropy/method/ld_matrix_interface.py Co-authored-by: Daniel Suveges * Update colocalisation.py * fix: rightStudyType nulls removed * test: improve testing of peak overlap function * fix: overlap test * fix: changes suggested by ds review * chore: removing unneeded variants.py --------- Co-authored-by: Kirill Tsukanov Co-authored-by: Daniel Considine Co-authored-by: Daniel-Considine <113430683+Daniel-Considine@users.noreply.github.com> Co-authored-by: Daniel Suveges --- src/gentropy/config.py | 23 ++ .../datasource/pan_ukbb_ld/__init__.py | 3 + src/gentropy/datasource/pan_ukbb_ld/ld.py | 214 ++++++++++++++++ src/gentropy/method/ld_matrix_interface.py | 101 ++++++++ src/gentropy/susie_finemapper.py | 230 ++++++++++++++++++ 5 files changed, 571 insertions(+) create mode 100644 src/gentropy/datasource/pan_ukbb_ld/__init__.py create mode 100644 src/gentropy/datasource/pan_ukbb_ld/ld.py create mode 100644 src/gentropy/method/ld_matrix_interface.py diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 50eb4af72..464861803 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -403,6 +403,29 @@ class GnomadVariantConfig(StepConfig): _target_: str = "gentropy.gnomad_ingestion.GnomadVariantIndexStep" +@dataclass +class PanUKBBConfig(StepConfig): + """Pan UKB variant ingestion step configuration.""" + + session: Any = field( + default_factory=lambda: { + "start_hail": True, + } + ) + pan_ukbb_ht_path: str = "gs://panukbb-ld-matrixes/ukb-diverse-pops-public-build-38/UKBB.{POP}.ldadj.variant.b38" + pan_ukbb_bm_path: str = "gs://panukbb-ld-matrixes/UKBB.{POP}.ldadj" + ukbb_annotation_path: str = "gs://panukbb-ld-matrixes/UKBB.{POP}.aligned.parquet" + pan_ukbb_pops: list[str] = field( + default_factory=lambda: [ + "AFR", # African + "CSA", # Central/South Asian + "EUR", # European + ] + ) + use_version_from_input: bool = False + _target_: str = "gentropy.pan_ukb_ingestion.PanUKBBVariantIndexStep" + + @dataclass class VariantIndexConfig(StepConfig): """Variant index step configuration.""" diff --git a/src/gentropy/datasource/pan_ukbb_ld/__init__.py b/src/gentropy/datasource/pan_ukbb_ld/__init__.py new file mode 100644 index 000000000..dc57b8db8 --- /dev/null +++ b/src/gentropy/datasource/pan_ukbb_ld/__init__.py @@ -0,0 +1,3 @@ +"""Pan UKBB Data Source.""" + +from __future__ import annotations diff --git a/src/gentropy/datasource/pan_ukbb_ld/ld.py b/src/gentropy/datasource/pan_ukbb_ld/ld.py new file mode 100644 index 000000000..1926ddc9c --- /dev/null +++ b/src/gentropy/datasource/pan_ukbb_ld/ld.py @@ -0,0 +1,214 @@ +"""Step to import filtered version of a LD matrix (block matrix).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import hail as hl +import numpy as np +import pyspark.sql.functions as f +from hail.linalg import BlockMatrix +from pyspark.sql.window import Window + +from gentropy.common.session import Session +from gentropy.config import PanUKBBConfig + +if TYPE_CHECKING: + from pyspark.sql import DataFrame, Row + + +class PanUKBBLDMatrix: + """Toolset to work with Pan UKBB LD matrices.""" + + def __init__( + self, + pan_ukbb_ht_path: str = PanUKBBConfig().pan_ukbb_ht_path, + pan_ukbb_bm_path: str = PanUKBBConfig().pan_ukbb_bm_path, + ld_populations: list[str] = PanUKBBConfig().pan_ukbb_pops, + ukbb_annotation_path: str = PanUKBBConfig().ukbb_annotation_path, + ): + """Initialize. + + Datasets are in hail native format. + + Args: + pan_ukbb_ht_path (str): Path to hail table, source: gs://ukb-diverse-pops-public/ld_release + pan_ukbb_bm_path (str): Path to hail block matrix + ld_populations (list[str]): List of populations + ukbb_annotation_path (str): Path to pan-ukbb variant LD index with alleles flipped to match the order in OT variant annotation + Default values are set in PanUKBBConfig. + """ + self.pan_ukbb_ht_path = pan_ukbb_ht_path + self.pan_ukbb_bm_path = pan_ukbb_bm_path + self.ld_populations = ld_populations + self.ukbb_annotation_output_path = ukbb_annotation_path + + def align_ld_index_alleles( + self, + variant_annotation: DataFrame, + population: str, + hail_table_path: str = PanUKBBConfig.pan_ukbb_ht_path, + hail_table_output: str = PanUKBBConfig.ukbb_annotation_path, + ) -> None: + """Align Pan-UKBB variant LD index alleles with the Open Targets variant annotation. + + Args: + variant_annotation (DataFrame): Open Targets variant annotation DataFrame + population (str): Population label + hail_table_path (str): Path to hail table with Pan-UKBB variant LD index + hail_table_output (str): Path to output the aligned Pan-UKBB variant LD index with alleles in the correct order + """ + ht = hl.read_table(hail_table_path.format(POP=population)) + ht = ( + ht.to_spark() + .select( + "`locus.contig`", + "`locus.position`", + "`alleles`", + "`idx`", + ) + .withColumns( + { + "chromosome": f.split("`locus.contig`", "chr")[1], + "position": f.col("`locus.position`"), + "referenceAllele": f.element_at("`alleles`", 1), + "alternateAllele": f.element_at("`alleles`", 2), + } + ) + .drop("locus.contig", "locus.position", "alleles") + .dropDuplicates( + ["chromosome", "position", "referenceAllele", "alternateAllele"] + ) + ) + ht_va = ( + ht.alias("ukbb") + .join( + variant_annotation.select( + "chromosome", + "position", + f.col("referenceAllele").alias("va_ref"), + f.col("alternateAllele").alias("va_alt"), + ).dropDuplicates(["chromosome", "position", "va_ref", "va_alt"]), + on=["chromosome", "position"], + how="left", + ) + .filter( + ( + (f.col("referenceAllele") == f.col("va_ref")) + & (f.col("alternateAllele") == f.col("va_alt")) + ) + | ( + (f.col("referenceAllele") == f.col("va_alt")) + & (f.col("alternateAllele") == f.col("va_ref")) + ) + | (f.col("va_ref").isNull() | f.col("va_alt").isNull()) + ) + .withColumns( + { + "alleleOrder": f.when( + (f.col("referenceAllele") == f.col("va_alt")) + & (f.col("alternateAllele") == f.col("va_ref")), + -1, + ).otherwise(1), + "new_referenceAllele": f.when( + (f.col("referenceAllele") == f.col("va_alt")) + & (f.col("alternateAllele") == f.col("va_ref")), + f.col("va_ref"), + ).otherwise(f.col("referenceAllele")), + "new_alternateAllele": f.when( + (f.col("alternateAllele") == f.col("va_ref")) + & (f.col("referenceAllele") == f.col("va_alt")), + f.col("va_alt"), + ).otherwise(f.col("alternateAllele")), + } + ) + .select( + f.concat_ws( + "_", + "chromosome", + "position", + "new_referenceAllele", + "new_alternateAllele", + ).alias("variantId"), + "chromosome", + "position", + f.col("new_referenceAllele").alias("referenceAllele"), + f.col("new_alternateAllele").alias("alternateAllele"), + "alleleOrder", + "idx", + ) + ) + window_spec = Window.partitionBy("idx").orderBy(f.col("alleleOrder").desc()) + ht_va = ( + ht_va.withColumn("rank", f.rank().over(window_spec)) + .filter(f.col("rank") == 1) + .drop("rank") + ) + ht_va.write.mode("overwrite").parquet(hail_table_output.format(POP=population)) + + def get_numpy_matrix( + self: PanUKBBLDMatrix, + locus_index: DataFrame, + ancestry: str, + ) -> np.ndarray: + """Extract the LD block matrix for a locus. + + Args: + locus_index (DataFrame): hail matrix variant index table + ancestry (str): Ancestry label + + Returns: + np.ndarray: LD block matrix for the locus + """ + idx = [row["idx"] for row in locus_index.select("idx").collect()] + + half_matrix = ( + BlockMatrix.read(self.pan_ukbb_bm_path.format(POP=ancestry)) + .filter(idx, idx) + .to_numpy() + ) + + alleleOrder = [ + row["alleleOrder"] for row in locus_index.select("alleleOrder").collect() + ] + outer_allele_order = np.outer(alleleOrder, alleleOrder) + np.fill_diagonal(outer_allele_order, 1) + + ld_matrix = (half_matrix + half_matrix.T) - np.diag(np.diag(half_matrix)) + ld_matrix = ld_matrix * outer_allele_order + np.fill_diagonal(ld_matrix, 1) + + return ld_matrix + + def get_locus_index_boundaries( + self, + session: Session, + study_locus_row: Row, + ancestry: str = "EUR", + ) -> DataFrame: + """Extract hail matrix index from StudyLocus rows. + + Args: + session (Session): Session object + study_locus_row (Row): Study-locus row + ancestry (str): Major population, default is "EUR" + + Returns: + DataFrame: Returns the index of the pan-ukbb matrix for the locus + + """ + chromosome = str(study_locus_row["chromosome"]) + start = int(study_locus_row["locusStart"]) + end = int(study_locus_row["locusEnd"]) + + index_file = session.spark.read.parquet( + self.ukbb_annotation_output_path.format(POP=ancestry) + ) + + index_file = index_file.filter( + (f.col("chromosome") == chromosome) + & (f.col("position") >= start) + & (f.col("position") <= end) + ).sort("idx") + + return index_file diff --git a/src/gentropy/method/ld_matrix_interface.py b/src/gentropy/method/ld_matrix_interface.py new file mode 100644 index 000000000..d0051101b --- /dev/null +++ b/src/gentropy/method/ld_matrix_interface.py @@ -0,0 +1,101 @@ +"""Step to import filtered version of a LD matrix (block matrix).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np +import pyspark.sql.functions as f + +from gentropy.common.session import Session +from gentropy.datasource.gnomad.ld import GnomADLDMatrix +from gentropy.datasource.pan_ukbb_ld.ld import PanUKBBLDMatrix + +if TYPE_CHECKING: + from pyspark.sql import DataFrame, Row + + +class LDMatrixInterface: + """Toolset to interact with LD matrices.""" + + ancestry_map = { + "nfe": "EUR", + "csa": "CSA", + "afr": "AFR", + } + + @staticmethod + def get_locus_index_boundaries( + session: Session, + study_locus_row: Row, + ancestry: str = "nfe", + ) -> DataFrame: + """Extract hail matrix index from StudyLocus rows. + + Args: + session (Session): Session object + study_locus_row (Row): Study-locus row + ancestry (str): Major population to extract from gnomad matrix, default is "nfe" + + Returns: + DataFrame: Returns the index of the gnomad matrix for the locus + + """ + if ancestry in ("nfe", "csa", "afr"): + joined_index = PanUKBBLDMatrix().get_locus_index_boundaries( + session=session, + study_locus_row=study_locus_row, + ancestry=LDMatrixInterface.ancestry_map.get(ancestry, ancestry), + ) + else: + joined_index = ( + GnomADLDMatrix() + .get_locus_index_boundaries( + study_locus_row=study_locus_row, + major_population=ancestry, + ) + .withColumn( + "variantId", + f.concat( + f.regexp_replace(f.col("`locus.contig`"), "chr", ""), + f.lit("_"), + f.col("`locus.position`"), + f.lit("_"), + f.col("alleles").getItem(0), + f.lit("_"), + f.col("alleles").getItem(1), + ).cast("string"), + ) + ) + + return joined_index + + @staticmethod + def get_numpy_matrix( + locus_index: DataFrame, + ancestry: str = "nfe", + ) -> np.ndarray: + """Extract the LD block matrix for a locus. + + Args: + locus_index (DataFrame): hail matrix variant index table + ancestry (str): major ancestry label eg. `nfe` + + Returns: + np.ndarray: LD block matrix for the locus + """ + if ancestry in ( + "afr", + "csa", + "nfe", + ): + block_matrix = PanUKBBLDMatrix().get_numpy_matrix( + locus_index=locus_index, + ancestry=LDMatrixInterface.ancestry_map.get(ancestry, ancestry), + ) + else: + block_matrix = GnomADLDMatrix.get_numpy_matrix( + locus_index=locus_index, gnomad_ancestry=ancestry + ) + + return block_matrix diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index 38a621f96..7d04b5763 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -29,6 +29,7 @@ from gentropy.dataset.study_locus import StudyLocus from gentropy.datasource.gnomad.ld import GnomADLDMatrix from gentropy.method.carma import CARMA +from gentropy.method.ld_matrix_interface import LDMatrixInterface from gentropy.method.sumstat_imputation import SummaryStatisticsImputation from gentropy.method.susie_inf import SUSIE_inf @@ -855,3 +856,232 @@ def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries( ) return out + + @staticmethod + def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries_ldinterface( # noqa: C901 + session: Session, + study_locus_row: Row, + study_index: StudyIndex, + max_causal_snps: int = 10, + susie_est_tausq: bool = False, + run_carma: bool = False, + run_sumstat_imputation: bool = False, + carma_time_limit: int = 600, + carma_tau: float = 0.04, + imputed_r2_threshold: float = 0.9, + ld_score_threshold: float = 5, + sum_pips: float = 0.99, + lead_pval_threshold: float = 1e-5, + purity_mean_r2_threshold: float = 0, + purity_min_r2_threshold: float = 0.25, + cs_lbf_thr: float = 2, + ) -> dict[str, Any] | None: + """Susie fine-mapper function that uses study-locus row with collected locus, chromosome and position as inputs. + + Args: + session (Session): Spark session + study_locus_row (Row): StudyLocus row with collected locus + study_index (StudyIndex): StudyIndex object + max_causal_snps (int): maximum number of causal variants + susie_est_tausq (bool): estimate tau squared, default is False + run_carma (bool): run CARMA, default is False + run_sumstat_imputation (bool): run summary statistics imputation, default is False + carma_time_limit (int): CARMA time limit, default is 600 seconds + carma_tau (float): CARMA tau, shrinkage parameter + imputed_r2_threshold (float): imputed R2 threshold, default is 0.8 + ld_score_threshold (float): LD score threshold ofr imputation, default is 4 + sum_pips (float): the expected sum of posterior probabilities in the locus, default is 0.99 (99% credible set) + lead_pval_threshold (float): p-value threshold for the lead variant from CS, default is 1e-5 + purity_mean_r2_threshold (float): thrshold for purity mean r2 qc metrics for filtering credible sets + purity_min_r2_threshold (float): thrshold for purity min r2 qc metrics for filtering credible sets + cs_lbf_thr (float): credible set logBF threshold for filtering credible sets, default is 2 + + Returns: + dict[str, Any] | None: dictionary with study locus, number of GWAS variants, number of LD variants, number of variants after merge, number of outliers, number of imputed variants, number of variants to fine-map, or None + """ + # PLEASE DO NOT REMOVE THIS LINE + pd.DataFrame.iteritems = pd.DataFrame.items + + chromosome = study_locus_row["chromosome"] + studyId = study_locus_row["studyId"] + locusStart = study_locus_row["locusStart"] + locusEnd = study_locus_row["locusEnd"] + + study_index_df = study_index._df + study_index_df = study_index_df.filter(f.col("studyId") == studyId) + major_population = study_index_df.select( + "studyId", + order_array_of_structs_by_field( + "ldPopulationStructure", "relativeSampleSize" + )[0]["ldPopulation"].alias("majorPopulation"), + ).collect()[0]["majorPopulation"] + + region = chromosome + ":" + str(int(locusStart)) + "-" + str(int(locusEnd)) + + schema = StudyLocus.get_schema() + gwas_df = session.spark.createDataFrame([study_locus_row], schema=schema) + exploded_df = gwas_df.select(f.explode("locus").alias("locus")) + + result_df = exploded_df.select( + "locus.variantId", "locus.beta", "locus.standardError" + ) + gwas_df = ( + result_df.withColumn("z", f.col("beta") / f.col("standardError")) + .withColumn( + "chromosome", f.split(f.col("variantId"), "_")[0].cast("string") + ) + .withColumn("position", f.split(f.col("variantId"), "_")[1].cast("int")) + .filter(f.col("chromosome") == chromosome) + .filter(f.col("position") >= int(locusStart)) + .filter(f.col("position") <= int(locusEnd)) + .filter(f.col("z").isNotNull()) + ) + + # Remove ALL duplicated variants from GWAS DataFrame - we don't know which is correct + variant_counts = gwas_df.groupBy("variantId").count() + unique_variants = variant_counts.filter(f.col("count") == 1) + gwas_df = gwas_df.join(unique_variants, on="variantId", how="left_semi") + + ld_index = LDMatrixInterface.get_locus_index_boundaries( + study_locus_row=study_locus_row, ancestry=major_population, session=session + ) + + # Remove ALL duplicated variants from ld_index DataFrame - we don't know which is correct + variant_counts = ld_index.groupBy("variantId").count() + unique_variants = variant_counts.filter(f.col("count") == 1) + ld_index = ld_index.join(unique_variants, on="variantId", how="left_semi").sort( + "idx" + ) + if "alleleOrder" not in ld_index.columns: + ld_index = ld_index.withColumn("alleleOrder", f.lit(1)) + + if not run_sumstat_imputation: + # Filtering out the variants that are not in the LD matrix, we don't need them + gwas_index = gwas_df.join( + ld_index.select("variantId", "idx", "alleleOrder"), on="variantId" + ).sort("idx") + gwas_df = gwas_index.select( + "variantId", + "z", + "chromosome", + "position", + "beta", + "StandardError", + ) + gwas_index = gwas_index.drop( + "z", "chromosome", "position", "beta", "StandardError" + ) + if gwas_index.rdd.isEmpty(): + logging.warning("No overlapping variants in the LD Index") + return None + gnomad_ld = LDMatrixInterface.get_numpy_matrix( + gwas_index, ancestry=major_population + ) + + # Module to remove NANs from the LD matrix + if sum(sum(np.isnan(gnomad_ld))) > 0: + gwas_index = gwas_index.toPandas() + + # First round of filtering out the variants with NANs + nan_count = 1 - (sum(np.isnan(gnomad_ld)) / len(gnomad_ld)) + indices = np.where(nan_count >= 0.98) + indices = indices[0] + gnomad_ld = gnomad_ld[indices][:, indices] + + gwas_index = gwas_index.iloc[indices, :] + + if len(gwas_index) == 0: + logging.warning("No overlapping variants in the LD Index") + return None + + # Second round of filtering out the variants with NANs + nan_count = sum(np.isnan(gnomad_ld)) + indices = np.where(nan_count == 0) + indices = indices[0] + + gnomad_ld = gnomad_ld[indices][:, indices] + gwas_index = gwas_index.iloc[indices, :] + + if len(gwas_index) == 0: + logging.warning("No overlapping variants in the LD Index") + return None + + gwas_index = session.spark.createDataFrame(gwas_index) + + else: + gwas_index = gwas_df.join( + ld_index.select("variantId", "idx", "alleleOrder"), on="variantId" + ).sort("idx") + if gwas_index.rdd.isEmpty(): + logging.warning("No overlapping variants in the LD Index") + return None + gwas_index = ld_index + gnomad_ld = LDMatrixInterface.get_numpy_matrix( + gwas_index, ancestry=major_population + ) + + # Module to remove NANs from the LD matrix + if sum(sum(np.isnan(gnomad_ld))) > 0: + gwas_index = gwas_index.toPandas() + + # First round of filtering out the variants with NANs + nan_count = 1 - (sum(np.isnan(gnomad_ld)) / len(gnomad_ld)) + indices = np.where(nan_count >= 0.98) + indices = indices[0] + gnomad_ld = gnomad_ld[indices][:, indices] + + gwas_index = gwas_index.iloc[indices, :] + + if len(gwas_index) == 0: + logging.warning("No overlapping variants in the LD Index") + return None + + # Second round of filtering out the variants with NANs + nan_count = sum(np.isnan(gnomad_ld)) + indices = np.where(nan_count == 0) + indices = indices[0] + + gnomad_ld = gnomad_ld[indices][:, indices] + gwas_index = gwas_index.iloc[indices, :] + + if len(gwas_index) == 0: + logging.warning("No overlapping variants in the LD Index") + return None + + gwas_index = session.spark.createDataFrame(gwas_index) + + # sanity filters on LD matrix + np.fill_diagonal(gnomad_ld, 1) + gnomad_ld[gnomad_ld > 1] = 1 + gnomad_ld[gnomad_ld < -1] = -1 + upper_triangle = np.triu(gnomad_ld) + gnomad_ld = ( + upper_triangle + upper_triangle.T - np.diag(upper_triangle.diagonal()) + ) + np.fill_diagonal(gnomad_ld, 1) + + out = SusieFineMapperStep.susie_finemapper_from_prepared_dataframes( + GWAS_df=gwas_df, + ld_index=gwas_index, + gnomad_ld=gnomad_ld, + L=max_causal_snps, + session=session, + studyId=studyId, + region=region, + locusStart=locusStart, + locusEnd=locusEnd, + susie_est_tausq=susie_est_tausq, + run_carma=run_carma, + run_sumstat_imputation=run_sumstat_imputation, + carma_time_limit=carma_time_limit, + carma_tau=carma_tau, + imputed_r2_threshold=imputed_r2_threshold, + ld_score_threshold=ld_score_threshold, + sum_pips=sum_pips, + lead_pval_threshold=lead_pval_threshold, + purity_mean_r2_threshold=purity_mean_r2_threshold, + purity_min_r2_threshold=purity_min_r2_threshold, + cs_lbf_thr=cs_lbf_thr, + ) + + return out From 38d4cb5a07a32463102764c1991c28cd4391f563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Mon, 14 Oct 2024 15:16:58 +0100 Subject: [PATCH 096/188] ci: configure java v8 (#840) --- .github/workflows/pr.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 5fb541572..f27ae499e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -17,6 +17,11 @@ jobs: uses: actions/setup-python@v5 with: python-version: 3.10.8 + - name: Set up Java + uses: actions/setup-java@v4 + with: + java-version: "8" + distribution: "temurin" - name: Install and configure Poetry uses: snok/install-poetry@v1 with: From d461e38173d11250df0557d1d52285c4f0097d01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Mon, 14 Oct 2024 16:11:34 +0100 Subject: [PATCH 097/188] fix(trainer): drop `studyLocusId` from training sets (#837) --- src/gentropy/method/l2g/trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gentropy/method/l2g/trainer.py b/src/gentropy/method/l2g/trainer.py index 69dfb24ff..1c4088462 100644 --- a/src/gentropy/method/l2g/trainer.py +++ b/src/gentropy/method/l2g/trainer.py @@ -155,14 +155,14 @@ def train( Returns: LocusToGeneModel: Fitted model """ - data_df = self.feature_matrix._df.drop("geneId").toPandas() + data_df = self.feature_matrix._df.drop("geneId", "studyLocusId").toPandas() # Encode labels in `goldStandardSet` to a numeric value data_df["goldStandardSet"] = data_df["goldStandardSet"].map( self.model.label_encoder ) - # Convert all columns to numeric and split + # Ensure all columns are numeric and split data_df = data_df.apply(pd.to_numeric) self.feature_cols = [ col From 6817aadcc0ffe2baf53909cba9a05a3f57a10d64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Mon, 14 Oct 2024 16:18:08 +0100 Subject: [PATCH 098/188] fix(l2g): remove custom session params + other fixes (#841) * chore: delete l2g custom spark session * fix: add neighbourhood features in `LocusToGeneFeatureMatrixConfig` * fix(feature_matrix): use right config for default values --- src/gentropy/config.py | 21 +++++++++------------ src/gentropy/l2g.py | 4 ++-- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 464861803..83a578ab8 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -222,16 +222,7 @@ class LDBasedClumpingConfig(StepConfig): class LocusToGeneConfig(StepConfig): """Locus to gene step configuration.""" - session: Any = field( - default_factory=lambda: { - "extended_spark_conf": { - "spark.dynamicAllocation.enabled": "false", - "spark.driver.memory": "48g", - "spark.executor.memory": "48g", - "spark.sql.shuffle.partitions": "800", - } - } - ) + session: Any = field(default_factory=lambda: {"extended_spark_conf": None}) run_mode: str = MISSING predictions_path: str = MISSING credible_set_path: str = MISSING @@ -313,12 +304,18 @@ class LocusToGeneFeatureMatrixConfig(StepConfig): "eQtlColocClppMaximum", "pQtlColocClppMaximum", "sQtlColocClppMaximum", - "tuQtlColocClppMaximum", # max H4 for each (study, locus, gene) aggregating over a specific qtl type "eQtlColocH4Maximum", "pQtlColocH4Maximum", "sQtlColocH4Maximum", - "tuQtlColocH4Maximum", + # max CLPP for each (study, locus, gene) aggregating over a specific qtl type and in relation with the mean in the vicinity + "eQtlColocClppMaximumNeighbourhood", + "pQtlColocClppMaximumNeighbourhood", + "sQtlColocClppMaximumNeighbourhood", + # max H4 for each (study, locus, gene) aggregating over a specific qtl type and in relation with the mean in the vicinity + "eQtlColocH4MaximumNeighbourhood", + "pQtlColocH4MaximumNeighbourhood", + "sQtlColocH4MaximumNeighbourhood", # distance to gene footprint "distanceSentinelFootprint", "distanceSentinelFootprintNeighbourhood", diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 296aba3d2..8ff41d09b 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -10,7 +10,7 @@ from gentropy.common.session import Session from gentropy.common.utils import access_gcp_secret -from gentropy.config import LocusToGeneConfig +from gentropy.config import LocusToGeneConfig, LocusToGeneFeatureMatrixConfig from gentropy.dataset.colocalisation import Colocalisation from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix @@ -31,7 +31,7 @@ def __init__( self, session: Session, *, - features_list: list[str] = LocusToGeneConfig().features_list, + features_list: list[str] = LocusToGeneFeatureMatrixConfig().features_list, credible_set_path: str, variant_index_path: str | None = None, colocalisation_path: str | None = None, From 44f05c438f16266f64a8f45889de875e31bd4a57 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Oct 2024 16:26:00 +0100 Subject: [PATCH 099/188] build(deps-dev): bump pymdown-extensions from 10.10.1 to 10.11.2 (#815) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [pymdown-extensions](https://github.com/facelessuser/pymdown-extensions) from 10.10.1 to 10.11.2. - [Release notes](https://github.com/facelessuser/pymdown-extensions/releases) - [Commits](https://github.com/facelessuser/pymdown-extensions/compare/10.10.1...10.11.2) --- updated-dependencies: - dependency-name: pymdown-extensions dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Irene López Santiago <45119610+ireneisdoomed@users.noreply.github.com> --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 9cdc7b88e..083f524f9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3740,13 +3740,13 @@ files = [ [[package]] name = "pymdown-extensions" -version = "10.10.1" +version = "10.11.2" description = "Extension pack for Python Markdown." optional = false python-versions = ">=3.8" files = [ - {file = "pymdown_extensions-10.10.1-py3-none-any.whl", hash = "sha256:6c74ea6c2e2285186a241417480fc2d3cc52941b3ec2dced4014c84dc78c5493"}, - {file = "pymdown_extensions-10.10.1.tar.gz", hash = "sha256:ad277ee4739ced051c3b6328d22ce782358a3bec39bc6ca52815ccbf44f7acdc"}, + {file = "pymdown_extensions-10.11.2-py3-none-any.whl", hash = "sha256:41cdde0a77290e480cf53892f5c5e50921a7ee3e5cd60ba91bf19837b33badcf"}, + {file = "pymdown_extensions-10.11.2.tar.gz", hash = "sha256:bc8847ecc9e784a098efd35e20cba772bc5a1b529dfcef9dc1972db9021a1049"}, ] [package.dependencies] From 7fa85b22a0ec07137df56b0771fc3b9b84d3161d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Oct 2024 16:34:02 +0100 Subject: [PATCH 100/188] build(deps-dev): bump mkdocs-git-committers-plugin-2 from 2.3.0 to 2.4.1 (#818) Bumps [mkdocs-git-committers-plugin-2](https://github.com/ojacques/mkdocs-git-committers-plugin-2) from 2.3.0 to 2.4.1. - [Release notes](https://github.com/ojacques/mkdocs-git-committers-plugin-2/releases) - [Commits](https://github.com/ojacques/mkdocs-git-committers-plugin-2/compare/2.3.0...2.4.1) --- updated-dependencies: - dependency-name: mkdocs-git-committers-plugin-2 dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 083f524f9..2d49d84ac 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2472,13 +2472,13 @@ pyyaml = ">=5.1" [[package]] name = "mkdocs-git-committers-plugin-2" -version = "2.3.0" +version = "2.4.1" description = "An MkDocs plugin to create a list of contributors on the page. The git-committers plugin will seed the template context with a list of GitHub or GitLab committers and other useful GIT info such as last modified date" optional = false -python-versions = ">=3.8,<4" +python-versions = "<4,>=3.8" files = [ - {file = "mkdocs-git-committers-plugin-2-2.3.0.tar.gz", hash = "sha256:d6baca1ae04db8120640038eda8142f2d081c27b53f3b566c83c75717e4ed81a"}, - {file = "mkdocs_git_committers_plugin_2-2.3.0-py3-none-any.whl", hash = "sha256:7b3434af3be525c12858eb3b44b4c6b695b7c7b7760482ea8de1c6e292e84f0f"}, + {file = "mkdocs_git_committers_plugin_2-2.4.1-py3-none-any.whl", hash = "sha256:ec9c1d81445606c471337d1c4a1782c643b7377077b545279dc18b86b7362c6d"}, + {file = "mkdocs_git_committers_plugin_2-2.4.1.tar.gz", hash = "sha256:ea1f80a79cedc42289e0b8e973276df04fb94f56e0ae3efc5385fb28547cf5cb"}, ] [package.dependencies] From c50f660880d5ecc817a9cd9ca29f36e3b5ec28ef Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Oct 2024 16:46:26 +0100 Subject: [PATCH 101/188] build(deps-dev): bump pyparsing from 3.1.2 to 3.2.0 (#836) Bumps [pyparsing](https://github.com/pyparsing/pyparsing) from 3.1.2 to 3.2.0. - [Release notes](https://github.com/pyparsing/pyparsing/releases) - [Changelog](https://github.com/pyparsing/pyparsing/blob/master/CHANGES) - [Commits](https://github.com/pyparsing/pyparsing/compare/pyparsing_3.1.2...3.2.0) --- updated-dependencies: - dependency-name: pyparsing dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 2d49d84ac..8c18cf5af 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3758,13 +3758,13 @@ extra = ["pygments (>=2.12)"] [[package]] name = "pyparsing" -version = "3.1.2" +version = "3.2.0" description = "pyparsing module - Classes and methods to define and execute parsing grammars" optional = false -python-versions = ">=3.6.8" +python-versions = ">=3.9" files = [ - {file = "pyparsing-3.1.2-py3-none-any.whl", hash = "sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742"}, - {file = "pyparsing-3.1.2.tar.gz", hash = "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad"}, + {file = "pyparsing-3.2.0-py3-none-any.whl", hash = "sha256:93d9577b88da0bbea8cc8334ee8b918ed014968fd2ec383e868fb8afb1ccef84"}, + {file = "pyparsing-3.2.0.tar.gz", hash = "sha256:cbf74e27246d595d9a74b186b810f6fbb86726dbf3b9532efb343f6d7294fe9c"}, ] [package.extras] From 6da92cedd1714237f097b1a7d1d84096b025d9c8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Oct 2024 16:56:02 +0100 Subject: [PATCH 102/188] build(deps-dev): bump mkdocstrings-python from 1.11.1 to 1.12.1 (#842) Bumps [mkdocstrings-python](https://github.com/mkdocstrings/python) from 1.11.1 to 1.12.1. - [Release notes](https://github.com/mkdocstrings/python/releases) - [Changelog](https://github.com/mkdocstrings/python/blob/main/CHANGELOG.md) - [Commits](https://github.com/mkdocstrings/python/compare/1.11.1...1.12.1) --- updated-dependencies: - dependency-name: mkdocstrings-python dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 8c18cf5af..254c05a75 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2585,13 +2585,13 @@ python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"] [[package]] name = "mkdocstrings-python" -version = "1.11.1" +version = "1.12.1" description = "A Python handler for mkdocstrings." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "mkdocstrings_python-1.11.1-py3-none-any.whl", hash = "sha256:a21a1c05acef129a618517bb5aae3e33114f569b11588b1e7af3e9d4061a71af"}, - {file = "mkdocstrings_python-1.11.1.tar.gz", hash = "sha256:8824b115c5359304ab0b5378a91f6202324a849e1da907a3485b59208b797322"}, + {file = "mkdocstrings_python-1.12.1-py3-none-any.whl", hash = "sha256:205244488199c9aa2a39787ad6a0c862d39b74078ea9aa2be817bc972399563f"}, + {file = "mkdocstrings_python-1.12.1.tar.gz", hash = "sha256:60d6a5ca912c9af4ad431db6d0111ce9f79c6c48d33377dde6a05a8f5f48d792"}, ] [package.dependencies] From e34e0c978acdf69df382cb5cfa7c9d0a25292622 Mon Sep 17 00:00:00 2001 From: Yakov Date: Tue, 15 Oct 2024 10:47:53 +0100 Subject: [PATCH 103/188] chore: adding priors to coloc step (#830) * chore: adding priors to step * fix: making specifying the priors optional * fix: revert optional --------- Co-authored-by: Daniel Considine Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- src/gentropy/colocalisation.py | 10 +++++++++- src/gentropy/config.py | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/gentropy/colocalisation.py b/src/gentropy/colocalisation.py index 0dcdff206..6c2aa3467 100644 --- a/src/gentropy/colocalisation.py +++ b/src/gentropy/colocalisation.py @@ -24,6 +24,9 @@ def __init__( credible_set_path: str, coloc_path: str, colocalisation_method: str, + priorc1: float = 1e-4, + priorc2: float = 1e-4, + priorc12: float = 1e-5, ) -> None: """Run Colocalisation step. @@ -32,6 +35,9 @@ def __init__( credible_set_path (str): Input credible sets path. coloc_path (str): Output Colocalisation path. colocalisation_method (str): Colocalisation method. + priorc1 (float): Prior on variant being causal for trait 1. Defaults to 1e-4. + priorc2 (float): Prior on variant being causal for trait 2. Defaults to 1e-4. + priorc12 (float): Prior on variant being causal for both traits. Defaults to 1e-5. """ colocalisation_class = self._get_colocalisation_class(colocalisation_method) # Extract @@ -47,7 +53,9 @@ def __init__( # Transform overlaps = credible_set.find_overlaps() - colocalisation_results = colocalisation_class.colocalise(overlaps) # type: ignore + colocalisation_results = colocalisation_class.colocalise( # type: ignore + overlaps, priorc1=priorc1, priorc2=priorc2, priorc12=priorc12 + ) # Load colocalisation_results.df.write.mode(session.write_mode).parquet( diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 83a578ab8..d7d12df21 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -38,6 +38,9 @@ class ColocalisationConfig(StepConfig): credible_set_path: str = MISSING coloc_path: str = MISSING colocalisation_method: str = MISSING + priorc1: float = MISSING + priorc2: float = MISSING + priorc12: float = MISSING _target_: str = "gentropy.colocalisation.ColocalisationStep" From 0a6d57b1fcb59ffc9ca71f6c4ea2f7ceaa07df49 Mon Sep 17 00:00:00 2001 From: Yakov Date: Tue, 15 Oct 2024 13:14:47 +0100 Subject: [PATCH 104/188] fix: fix ukbppp studindex (#839) * fix: fix ukbppp studindex * fix: review --------- Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- .../datasource/ukb_ppp_eur/study_index.py | 26 +++++++++++-------- .../ukb_ppp_eur_sumstat_preprocess.py | 26 ++++++++++++++----- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/src/gentropy/datasource/ukb_ppp_eur/study_index.py b/src/gentropy/datasource/ukb_ppp_eur/study_index.py index 8a3105f5d..890630e54 100644 --- a/src/gentropy/datasource/ukb_ppp_eur/study_index.py +++ b/src/gentropy/datasource/ukb_ppp_eur/study_index.py @@ -52,17 +52,21 @@ def from_source( .join(num_of_samples, "studyId", "inner") ) # Add population structure. - study_index_df = study_index_df.withColumn( - "discoverySamples", - f.array( - f.struct( - f.col("nSamples").cast("integer").alias("sampleSize"), - f.lit("European").alias("ancestry"), - ) - ), - ).withColumn( - "ldPopulationStructure", - cls.aggregate_and_map_ancestries(f.col("discoverySamples")), + study_index_df = ( + study_index_df.withColumn( + "discoverySamples", + f.array( + f.struct( + f.col("nSamples").cast("integer").alias("sampleSize"), + f.lit("European").alias("ancestry"), + ) + ), + ) + .withColumn( + "ldPopulationStructure", + cls.aggregate_and_map_ancestries(f.col("discoverySamples")), + ) + .withColumn("biosampleFromSourceId", f.lit("UBERON_0001969")) ) return StudyIndex( diff --git a/src/gentropy/ukb_ppp_eur_sumstat_preprocess.py b/src/gentropy/ukb_ppp_eur_sumstat_preprocess.py index 3cee45c6a..8f0f3eef2 100644 --- a/src/gentropy/ukb_ppp_eur_sumstat_preprocess.py +++ b/src/gentropy/ukb_ppp_eur_sumstat_preprocess.py @@ -15,7 +15,14 @@ class UkbPppEurStep: """UKB PPP (EUR) data ingestion and harmonisation.""" def __init__( - self, session: Session, raw_study_index_path_from_tsv: str, raw_summary_stats_path: str, variant_annotation_path: str, tmp_variant_annotation_path: str, study_index_output_path: str, summary_stats_output_path: str + self, + session: Session, + raw_study_index_path_from_tsv: str, + raw_summary_stats_path: str, + variant_annotation_path: str, + tmp_variant_annotation_path: str, + study_index_output_path: str, + summary_stats_output_path: str, ) -> None: """Run UKB PPP (EUR) data ingestion and harmonisation step. @@ -28,7 +35,9 @@ def __init__( study_index_output_path (str): Study index output path. summary_stats_output_path (str): Summary stats output path. """ - session.logger.info("Pre-compute the direct and flipped variant annotation dataset.") + session.logger.info( + "Pre-compute the direct and flipped variant annotation dataset." + ) prepare_va(session, variant_annotation_path, tmp_variant_annotation_path) session.logger.info("Process study index.") @@ -38,11 +47,16 @@ def __init__( raw_study_index_path_from_tsv=raw_study_index_path_from_tsv, raw_summary_stats_path=raw_summary_stats_path, ) - .df - .write - .mode("overwrite") + .df.write.mode("overwrite") .parquet(study_index_output_path) ) session.logger.info("Process and harmonise summary stats.") - process_summary_stats_per_chromosome(session, UkbPppEurSummaryStats, raw_summary_stats_path, tmp_variant_annotation_path, summary_stats_output_path, study_index_output_path) + process_summary_stats_per_chromosome( + session, + UkbPppEurSummaryStats, + raw_summary_stats_path, + tmp_variant_annotation_path, + summary_stats_output_path, + study_index_output_path, + ) From 84a7a0d85a0652dbc895b3c3309cdff77ad23fe2 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Tue, 15 Oct 2024 16:26:21 +0100 Subject: [PATCH 105/188] fix: l2g fixes (#844) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: unnecessary session object * fix: circular dependency * revert: broadcast * fix: join * feat(l2g): persist feature matrix in l2g training plan (#843) * fix(gold_standard): remove extra columns in `build_feature_matrix` * fix: uncomment filter overlaps * feat(feature_matrix): `select_features` returns a new instance * fix(overlaps): add missing columns in `_convert_to_square_matrix` * fix(l2g): set `variant_index_path` as optional in step config * fix(l2g): set `feature_matrix_path` as mandatory in step config * fix(l2g): set `predictions_path` as optional in step config * chore(feature_matrix): remove `features_list` default from step * chore: fix `test_filter_unique_associations` toy data * chore: fix `test_filter_unique_associations` toy data --------- Co-authored-by: Irene López Santiago <45119610+ireneisdoomed@users.noreply.github.com> Co-authored-by: Irene López --- src/gentropy/config.py | 7 +-- src/gentropy/dataset/l2g_feature_matrix.py | 33 +++++++--- src/gentropy/dataset/l2g_gold_standard.py | 3 +- src/gentropy/dataset/study_locus_overlap.py | 3 + src/gentropy/l2g.py | 60 ++++++++++--------- tests/gentropy/conftest.py | 8 +-- tests/gentropy/dataset/test_l2g.py | 7 ++- .../dataset/test_l2g_feature_matrix.py | 20 ++++--- .../dataset/test_study_locus_overlap.py | 10 ++-- 9 files changed, 92 insertions(+), 59 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index d7d12df21..a05ba3258 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -225,13 +225,12 @@ class LDBasedClumpingConfig(StepConfig): class LocusToGeneConfig(StepConfig): """Locus to gene step configuration.""" - session: Any = field(default_factory=lambda: {"extended_spark_conf": None}) run_mode: str = MISSING - predictions_path: str = MISSING credible_set_path: str = MISSING - variant_index_path: str = MISSING + feature_matrix_path: str = MISSING + predictions_path: str | None = None + variant_index_path: str | None = None model_path: str | None = None - feature_matrix_path: str | None = None gold_standard_curation_path: str | None = None gene_interactions_path: str | None = None features_list: list[str] = field( diff --git a/src/gentropy/dataset/l2g_feature_matrix.py b/src/gentropy/dataset/l2g_feature_matrix.py index b4893a785..bb4942312 100644 --- a/src/gentropy/dataset/l2g_feature_matrix.py +++ b/src/gentropy/dataset/l2g_feature_matrix.py @@ -5,6 +5,8 @@ from functools import reduce from typing import TYPE_CHECKING, Type +from typing_extensions import Self + from gentropy.common.spark_helpers import convert_from_long_to_wide from gentropy.dataset.l2g_gold_standard import L2GGoldStandard from gentropy.method.l2g.feature_factory import FeatureFactory, L2GFeatureInputLoader @@ -31,8 +33,9 @@ def __init__( features_list (list[str] | None): List of features to use. If None, all possible features are used. with_gold_standard (bool): Whether to include the gold standard set in the feature matrix. """ + self.with_gold_standard = with_gold_standard self.fixed_cols = ["studyLocusId", "geneId"] - if with_gold_standard: + if self.with_gold_standard: self.fixed_cols.append("goldStandardSet") self.features_list = features_list or [ @@ -143,7 +146,7 @@ def select_features( self: L2GFeatureMatrix, features_list: list[str] | None, ) -> L2GFeatureMatrix: - """Select a subset of features from the feature matrix. + """Returns a new object with a subset of features from the original feature matrix. Args: features_list (list[str] | None): List of features to select @@ -156,12 +159,24 @@ def select_features( """ if features_list := features_list or self.features_list: # cast to float every feature in the features_list - self._df = self._df.selectExpr( - self.fixed_cols - + [ - f"CAST({feature} AS FLOAT) AS {feature}" - for feature in features_list - ] + return L2GFeatureMatrix( + _df=self._df.selectExpr( + self.fixed_cols + + [ + f"CAST({feature} AS FLOAT) AS {feature}" + for feature in features_list + ] + ), + features_list=features_list, + with_gold_standard=self.with_gold_standard, ) - return self raise ValueError("features_list cannot be None") + + def persist(self: Self) -> Self: + """Persist the feature matrix in memory. + + Returns: + Self: Persisted Dataset + """ + self._df = self._df.persist() + return self diff --git a/src/gentropy/dataset/l2g_gold_standard.py b/src/gentropy/dataset/l2g_gold_standard.py index e1083fbf0..f1df3a700 100644 --- a/src/gentropy/dataset/l2g_gold_standard.py +++ b/src/gentropy/dataset/l2g_gold_standard.py @@ -59,7 +59,7 @@ def from_otg_curation( OpenTargetsL2GGoldStandard.as_l2g_gold_standard( gold_standard_curation, variant_index ) - # .filter_unique_associations(study_locus_overlap) + .filter_unique_associations(study_locus_overlap) .remove_false_negatives(interactions_df) ) @@ -132,6 +132,7 @@ def build_feature_matrix( on=["studyId", "variantId", "geneId"], how="inner", ) + .drop("studyId", "variantId") .distinct(), with_gold_standard=True, ) diff --git a/src/gentropy/dataset/study_locus_overlap.py b/src/gentropy/dataset/study_locus_overlap.py index d14a2da96..a6288a5e8 100644 --- a/src/gentropy/dataset/study_locus_overlap.py +++ b/src/gentropy/dataset/study_locus_overlap.py @@ -1,4 +1,5 @@ """Study locus overlap index dataset.""" + from __future__ import annotations from dataclasses import dataclass @@ -60,6 +61,8 @@ def _convert_to_square_matrix(self: StudyLocusOverlap) -> StudyLocusOverlap: "rightStudyLocusId as leftStudyLocusId", "rightStudyType", "tagVariantId", + "chromosome", + "statistics", ) ).distinct(), _schema=self.get_schema(), diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 8ff41d09b..1a5037bb2 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -10,7 +10,6 @@ from gentropy.common.session import Session from gentropy.common.utils import access_gcp_secret -from gentropy.config import LocusToGeneConfig, LocusToGeneFeatureMatrixConfig from gentropy.dataset.colocalisation import Colocalisation from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix @@ -31,7 +30,7 @@ def __init__( self, session: Session, *, - features_list: list[str] = LocusToGeneFeatureMatrixConfig().features_list, + features_list: list[str], credible_set_path: str, variant_index_path: str | None = None, colocalisation_path: str | None = None, @@ -94,20 +93,20 @@ class LocusToGeneStep: def __init__( self, session: Session, - hyperparameters: dict[str, Any] = LocusToGeneConfig().hyperparameters, + hyperparameters: dict[str, Any], *, run_mode: str, - features_list: list[str] = LocusToGeneConfig().features_list, - download_from_hub: bool = LocusToGeneConfig().download_from_hub, + features_list: list[str], + download_from_hub: bool, wandb_run_name: str, - model_path: str | None = None, credible_set_path: str, feature_matrix_path: str, + model_path: str | None = None, gold_standard_curation_path: str | None = None, variant_index_path: str | None = None, gene_interactions_path: str | None = None, predictions_path: str | None = None, - hf_hub_repo_id: str | None = LocusToGeneConfig().hf_hub_repo_id, + hf_hub_repo_id: str | None, ) -> None: """Initialise the step and run the logic based on mode. @@ -118,9 +117,9 @@ def __init__( features_list (list[str]): List of features to use for the model download_from_hub (bool): Whether to download the model from Hugging Face Hub wandb_run_name (str): Name of the run to track model training in Weights and Biases - model_path (str | None): Path to the model. It can be either in the filesystem or the name on the Hugging Face Hub (in the form of username/repo_name). credible_set_path (str): Path to the credible set dataset necessary to build the feature matrix feature_matrix_path (str): Path to the L2G feature matrix input dataset + model_path (str | None): Path to the model. It can be either in the filesystem or the name on the Hugging Face Hub (in the form of username/repo_name). gold_standard_curation_path (str | None): Path to the gold standard curation file variant_index_path (str | None): Path to the variant index gene_interactions_path (str | None): Path to the gene interactions dataset @@ -174,7 +173,13 @@ def __init__( self.run_train() def run_predict(self) -> None: - """Run the prediction step.""" + """Run the prediction step. + + Raises: + ValueError: If predictions_path is not provided for prediction mode + """ + if not self.predictions_path: + raise ValueError("predictions_path must be provided for prediction mode") predictions = L2GPrediction.from_credible_set( self.session, self.credible_set, @@ -184,11 +189,10 @@ def run_predict(self) -> None: hf_token=access_gcp_secret("hfhub-key", "open-targets-genetics-dev"), download_from_hub=self.download_from_hub, ) - if self.predictions_path: - predictions.df.write.mode(self.session.write_mode).parquet( - self.predictions_path - ) - self.session.logger.info(self.predictions_path) + predictions.df.write.mode(self.session.write_mode).parquet( + self.predictions_path + ) + self.session.logger.info("L2G predictions saved successfully.") def run_train(self) -> None: """Run the training step.""" @@ -239,20 +243,21 @@ def _annotate_gold_standards_w_feature_matrix(self) -> L2GFeatureMatrix: if self.gs_curation and self.interactions and self.variant_index: study_locus_overlap = StudyLocus( _df=self.credible_set.df.join( - f.broadcast( - self.gs_curation.select( - f.concat_ws( - "_", - f.col("sentinel_variant.locus_GRCh38.chromosome"), - f.col("sentinel_variant.locus_GRCh38.position"), - f.col("sentinel_variant.alleles.reference"), - f.col("sentinel_variant.alleles.alternative"), - ).alias("variantId"), - f.col("association_info.otg_id").alias("studyId"), - ) + self.gs_curation.select( + f.concat_ws( + "_", + f.col("sentinel_variant.locus_GRCh38.chromosome"), + f.col("sentinel_variant.locus_GRCh38.position"), + f.col("sentinel_variant.alleles.reference"), + f.col("sentinel_variant.alleles.alternative"), + ).alias("variantId"), + f.col("association_info.otg_id").alias("studyId"), ), - ["studyId", "variantId"], - "inner", + on=[ + "studyId", + "variantId", + ], + how="inner", ), _schema=StudyLocus.get_schema(), ).find_overlaps() @@ -270,5 +275,6 @@ def _annotate_gold_standards_w_feature_matrix(self) -> L2GFeatureMatrix: ) .fill_na() .select_features(self.features_list) + .persist() ) raise ValueError("Dependencies for train mode not set.") diff --git a/tests/gentropy/conftest.py b/tests/gentropy/conftest.py index 9ea31ae20..81a378ee2 100644 --- a/tests/gentropy/conftest.py +++ b/tests/gentropy/conftest.py @@ -600,12 +600,12 @@ def mock_l2g_feature_matrix(spark: SparkSession) -> L2GFeatureMatrix: return L2GFeatureMatrix( _df=spark.createDataFrame( [ - ("1", "gene1", 100.0, None), - ("2", "gene2", 1000.0, 0.0), + ("1", "gene1", 100.0, None, True), + ("2", "gene2", 1000.0, 0.0, False), ], - "studyLocusId STRING, geneId STRING, distanceTssMean FLOAT, distanceSentinelTssMinimum FLOAT", + "studyLocusId STRING, geneId STRING, distanceTssMean FLOAT, distanceSentinelTssMinimum FLOAT, goldStandardSet BOOLEAN", ), - with_gold_standard=False, + with_gold_standard=True, ) diff --git a/tests/gentropy/dataset/test_l2g.py b/tests/gentropy/dataset/test_l2g.py index f73b6f7c2..293735edd 100644 --- a/tests/gentropy/dataset/test_l2g.py +++ b/tests/gentropy/dataset/test_l2g.py @@ -70,8 +70,11 @@ def test_filter_unique_associations(spark: SparkSession) -> None: ) mock_sl_overlap_df = spark.createDataFrame( - [("1", "2", "eqtl", "variant2"), ("1", "4", "eqtl", "variant4")], - "leftStudyLocusId STRING, rightStudyLocusId STRING, rightStudyType STRING, tagVariantId STRING", + [ + ("1", "2", "eqtl", "CHROM1", "variant2", None), + ("1", "4", "eqtl", "CHROM1", "variant4", None), + ], + StudyLocusOverlap.get_schema(), ) expected_df = spark.createDataFrame( diff --git a/tests/gentropy/dataset/test_l2g_feature_matrix.py b/tests/gentropy/dataset/test_l2g_feature_matrix.py index b74b6330a..f4859844a 100644 --- a/tests/gentropy/dataset/test_l2g_feature_matrix.py +++ b/tests/gentropy/dataset/test_l2g_feature_matrix.py @@ -5,13 +5,7 @@ from typing import TYPE_CHECKING import pytest -from pyspark.sql.types import ( - ArrayType, - DoubleType, - StringType, - StructField, - StructType, -) +from pyspark.sql.types import ArrayType, DoubleType, StringType, StructField, StructType from gentropy.dataset.colocalisation import Colocalisation from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix @@ -24,6 +18,18 @@ from pyspark.sql import SparkSession +def test_select_features_inheritance( + spark: SparkSession, mock_l2g_feature_matrix: L2GFeatureMatrix +) -> None: + """Test L2GFeatureMatrix.select_features method inherits the instance attributes in the new instance.""" + new_instance = mock_l2g_feature_matrix.select_features( + features_list=["distanceTssMean"] + ) + assert new_instance.features_list == ["distanceTssMean"] + # Because the feature matrix contains the gold standard flag information, the new fixed colums should be the same + assert "goldStandardSet" in new_instance.fixed_cols + + class TestFromFeaturesList: """Test L2GFeatureMatrix.from_features_list method. diff --git a/tests/gentropy/dataset/test_study_locus_overlap.py b/tests/gentropy/dataset/test_study_locus_overlap.py index 5dcba19c9..c517c023d 100644 --- a/tests/gentropy/dataset/test_study_locus_overlap.py +++ b/tests/gentropy/dataset/test_study_locus_overlap.py @@ -19,19 +19,19 @@ def test_convert_to_square_matrix(spark: SparkSession) -> None: mock_sl_overlap = StudyLocusOverlap( _df=spark.createDataFrame( [ - ("1", "2", "eqtl", "variant2"), + ("1", "2", "eqtl", "CHROM1", "variant2", None), ], - "leftStudyLocusId STRING, rightStudyLocusId STRING, rightStudyType STRING, tagVariantId STRING", + StudyLocusOverlap.get_schema(), ), _schema=StudyLocusOverlap.get_schema(), ) expected_df = spark.createDataFrame( [ - ("1", "2", "eqtl", "variant2"), - ("2", "1", "eqtl", "variant2"), + ("1", "2", "eqtl", "CHROM1", "variant2", None), + ("2", "1", "eqtl", "CHROM1", "variant2", None), ], - "leftStudyLocusId STRING, rightStudyLocusId STRING, rightStudyType STRING, tagVariantId STRING", + StudyLocusOverlap.get_schema(), ) observed_df = mock_sl_overlap._convert_to_square_matrix().df From 172cedfedfc336dbd6277286bae736eb2b9504f9 Mon Sep 17 00:00:00 2001 From: Yakov Date: Tue, 15 Oct 2024 16:47:41 +0100 Subject: [PATCH 106/188] chore: remove h4/h3 ratio (#829) * chore: remove h3/h4 ratiob * fix: remove it from schema * fix: conftest * fix: h3h4 --- src/gentropy/assets/schemas/colocalisation.json | 6 ------ src/gentropy/method/colocalisation.py | 17 ++++++++++++----- tests/gentropy/conftest.py | 1 - 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/gentropy/assets/schemas/colocalisation.json b/src/gentropy/assets/schemas/colocalisation.json index 7d05c849a..0bfb66816 100644 --- a/src/gentropy/assets/schemas/colocalisation.json +++ b/src/gentropy/assets/schemas/colocalisation.json @@ -67,12 +67,6 @@ "nullable": true, "metadata": {} }, - { - "name": "log2h4h3", - "type": "double", - "nullable": true, - "metadata": {} - }, { "name": "clpp", "type": "double", diff --git a/src/gentropy/method/colocalisation.py b/src/gentropy/method/colocalisation.py index 7a3a0d9c5..7d711d9a3 100644 --- a/src/gentropy/method/colocalisation.py +++ b/src/gentropy/method/colocalisation.py @@ -79,7 +79,12 @@ def colocalise( f.col("statistics.right_posteriorProbability"), ), ) - .groupBy("leftStudyLocusId", "rightStudyLocusId", "rightStudyType", "chromosome") + .groupBy( + "leftStudyLocusId", + "rightStudyLocusId", + "rightStudyType", + "chromosome", + ) .agg( f.count("*").alias("numberColocalisingVariants"), f.sum(f.col("clpp")).alias("clpp"), @@ -168,7 +173,12 @@ def colocalise( f.col("left_logBF") + f.col("right_logBF"), ) # Group by overlapping peak and generating dense vectors of log_BF: - .groupBy("chromosome", "leftStudyLocusId", "rightStudyLocusId", "rightStudyType") + .groupBy( + "chromosome", + "leftStudyLocusId", + "rightStudyLocusId", + "rightStudyType", + ) .agg( f.count("*").alias("numberColocalisingVariants"), fml.array_to_vector(f.collect_list(f.col("left_logBF"))).alias( @@ -245,13 +255,10 @@ def colocalise( .withColumn("h2", f.col("posteriors").getItem(2)) .withColumn("h3", f.col("posteriors").getItem(3)) .withColumn("h4", f.col("posteriors").getItem(4)) - .withColumn("h4h3", f.col("h4") / f.col("h3")) - .withColumn("log2h4h3", f.log2(f.col("h4h3"))) # clean up .drop( "posteriors", "allBF", - "h4h3", "lH0bf", "lH1bf", "lH2bf", diff --git a/tests/gentropy/conftest.py b/tests/gentropy/conftest.py index 81a378ee2..b1fc5ef90 100644 --- a/tests/gentropy/conftest.py +++ b/tests/gentropy/conftest.py @@ -81,7 +81,6 @@ def mock_colocalisation(spark: SparkSession) -> Colocalisation: .withColumnSpec("h2", percentNulls=0.1) .withColumnSpec("h3", percentNulls=0.1) .withColumnSpec("h4", percentNulls=0.1) - .withColumnSpec("log2h4h3", percentNulls=0.1) .withColumnSpec("clpp", percentNulls=0.1) .withColumnSpec( "colocalisationMethod", From 97a8873a34621eb7ef3e74e34b7426bc54c0e501 Mon Sep 17 00:00:00 2001 From: Daniel-Considine <113430683+Daniel-Considine@users.noreply.github.com> Date: Wed, 16 Oct 2024 17:11:15 +0100 Subject: [PATCH 107/188] fix: updating the susie_finemapper init (#846) * fix: updating the susie_finemapper init * fix: more fixes * fix: v2 * fix: v2 --------- Co-authored-by: Yakov Tsepilov --- src/gentropy/susie_finemapper.py | 276 +++---------------------------- 1 file changed, 25 insertions(+), 251 deletions(-) diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index 7d04b5763..25adbccbe 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -26,8 +26,7 @@ order_array_of_structs_by_field, ) from gentropy.dataset.study_index import StudyIndex -from gentropy.dataset.study_locus import StudyLocus -from gentropy.datasource.gnomad.ld import GnomADLDMatrix +from gentropy.dataset.study_locus import StudyLocus, StudyLocusQualityCheck from gentropy.method.carma import CARMA from gentropy.method.ld_matrix_interface import LDMatrixInterface from gentropy.method.sumstat_imputation import SummaryStatisticsImputation @@ -104,7 +103,7 @@ def __init__( study_index = StudyIndex.from_parquet(session, study_index_path) # Run fine-mapping - result_logging = self.susie_finemapper_one_sl_row_v4_ss_gathered_boundaries( + result_logging = self.susie_finemapper_one_sl_row_gathered_boundaries( session=session, study_locus_row=study_locus, study_index=study_index, @@ -127,9 +126,19 @@ def __init__( if result_logging is not None: if result_logging["study_locus"] is not None: # Write result - result_logging["study_locus"].df.write.mode(session.write_mode).parquet( - study_locus_output + df = result_logging["study_locus"].df + + df = df.withColumn("qualityControls", f.lit(None)) + df = df.withColumn( + "qualityControls", + StudyLocus.update_quality_flag( + f.col("qualityControls"), + f.lit(True), + StudyLocusQualityCheck.OUT_OF_SAMPLE_LD, + ), ) + + df.write.mode(session.write_mode).parquet(study_locus_output) # Write log result_logging["log"].to_parquet( study_locus_output + ".log", @@ -426,6 +435,7 @@ def susie_finemapper_from_prepared_dataframes( purity_min_r2_threshold: float = 0.25, cs_lbf_thr: float = 2, ld_min_r2: float = 0.9, + N_total: int = 100_000, ) -> dict[str, Any] | None: """Susie fine-mapper function that uses LD, z-scores, variant info and other options for Fine-Mapping. @@ -452,6 +462,7 @@ def susie_finemapper_from_prepared_dataframes( purity_min_r2_threshold (float): thrshold for purity min r2 qc metrics for filtering credible sets cs_lbf_thr (float): credible set logBF threshold for filtering credible sets, default is 2 ld_min_r2 (float): Threshold to fillter CS by leads in high LD, default is 0.9 + N_total (int): total number of samples, default is 100_000 Returns: dict[str, Any] | None: dictionary with study locus, number of GWAS variants, number of LD variants, number of variants after merge, number of outliers, number of imputed variants, number of variants to fine-map @@ -550,7 +561,7 @@ def susie_finemapper_from_prepared_dataframes( N_imputed = 0 susie_output = SUSIE_inf.susie_inf( - z=z_to_fm, LD=ld_to_fm, L=L, est_tausq=susie_est_tausq + z=z_to_fm, LD=ld_to_fm, L=L, est_tausq=susie_est_tausq, n=N_total ) schema = StructType( @@ -613,7 +624,7 @@ def susie_finemapper_from_prepared_dataframes( } @staticmethod - def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries( + def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 session: Session, study_locus_row: Row, study_index: StudyIndex, @@ -673,248 +684,9 @@ def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries( )[0]["ldPopulation"].alias("majorPopulation"), ).collect()[0]["majorPopulation"] - region = chromosome + ":" + str(int(locusStart)) + "-" + str(int(locusEnd)) - - schema = StudyLocus.get_schema() - gwas_df = session.spark.createDataFrame([study_locus_row], schema=schema) - exploded_df = gwas_df.select(f.explode("locus").alias("locus")) - - result_df = exploded_df.select( - "locus.variantId", "locus.beta", "locus.standardError" - ) - gwas_df = ( - result_df.withColumn("z", f.col("beta") / f.col("standardError")) - .withColumn( - "chromosome", f.split(f.col("variantId"), "_")[0].cast("string") - ) - .withColumn("position", f.split(f.col("variantId"), "_")[1].cast("int")) - .filter(f.col("chromosome") == chromosome) - .filter(f.col("position") >= int(locusStart)) - .filter(f.col("position") <= int(locusEnd)) - .filter(f.col("z").isNotNull()) - ) - - # Remove ALL duplicated variants from GWAS DataFrame - we don't know which is correct - variant_counts = gwas_df.groupBy("variantId").count() - unique_variants = variant_counts.filter(f.col("count") == 1) - gwas_df = gwas_df.join(unique_variants, on="variantId", how="left_semi") - - ld_index = ( - GnomADLDMatrix() - .get_locus_index_boundaries( - study_locus_row=study_locus_row, - major_population=major_population, - ) - .withColumn( - "variantId", - f.concat( - f.lit(chromosome), - f.lit("_"), - f.col("`locus.position`"), - f.lit("_"), - f.col("alleles").getItem(0), - f.lit("_"), - f.col("alleles").getItem(1), - ).cast("string"), - ) - ) - # Remove ALL duplicated variants from ld_index DataFrame - we don't know which is correct - variant_counts = ld_index.groupBy("variantId").count() - unique_variants = variant_counts.filter(f.col("count") == 1) - ld_index = ld_index.join(unique_variants, on="variantId", how="left_semi").sort( - "idx" - ) - - if not run_sumstat_imputation: - # Filtering out the variants that are not in the LD matrix, we don't need them - gwas_index = gwas_df.join( - ld_index.select("variantId", "alleles", "idx"), on="variantId" - ).sort("idx") - gwas_df = gwas_index.select( - "variantId", - "z", - "chromosome", - "position", - "beta", - "StandardError", - ) - gwas_index = gwas_index.drop( - "z", "chromosome", "position", "beta", "StandardError" - ) - if gwas_index.rdd.isEmpty(): - logging.warning("No overlapping variants in the LD Index") - return None - gnomad_ld = GnomADLDMatrix.get_numpy_matrix( - gwas_index, gnomad_ancestry=major_population - ) - - # Module to remove NANs from the LD matrix - if sum(sum(np.isnan(gnomad_ld))) > 0: - gwas_index = gwas_index.toPandas() - - # First round of filtering out the variants with NANs - nan_count = 1 - (sum(np.isnan(gnomad_ld)) / len(gnomad_ld)) - indices = np.where(nan_count >= 0.98) - indices = indices[0] - gnomad_ld = gnomad_ld[indices][:, indices] - - gwas_index = gwas_index.iloc[indices, :] - - if len(gwas_index) == 0: - logging.warning("No overlapping variants in the LD Index") - return None - - # Second round of filtering out the variants with NANs - nan_count = sum(np.isnan(gnomad_ld)) - indices = np.where(nan_count == 0) - indices = indices[0] - - gnomad_ld = gnomad_ld[indices][:, indices] - gwas_index = gwas_index.iloc[indices, :] - - if len(gwas_index) == 0: - logging.warning("No overlapping variants in the LD Index") - return None - - gwas_index = session.spark.createDataFrame(gwas_index) - - else: - gwas_index = gwas_df.join( - ld_index.select("variantId", "alleles", "idx"), on="variantId" - ).sort("idx") - if gwas_index.rdd.isEmpty(): - logging.warning("No overlapping variants in the LD Index") - return None - gwas_index = ld_index - gnomad_ld = GnomADLDMatrix.get_numpy_matrix( - gwas_index, gnomad_ancestry=major_population - ) - - # Module to remove NANs from the LD matrix - if sum(sum(np.isnan(gnomad_ld))) > 0: - gwas_index = gwas_index.toPandas() - - # First round of filtering out the variants with NANs - nan_count = 1 - (sum(np.isnan(gnomad_ld)) / len(gnomad_ld)) - indices = np.where(nan_count >= 0.98) - indices = indices[0] - gnomad_ld = gnomad_ld[indices][:, indices] - - gwas_index = gwas_index.iloc[indices, :] - - if len(gwas_index) == 0: - logging.warning("No overlapping variants in the LD Index") - return None - - # Second round of filtering out the variants with NANs - nan_count = sum(np.isnan(gnomad_ld)) - indices = np.where(nan_count == 0) - indices = indices[0] - - gnomad_ld = gnomad_ld[indices][:, indices] - gwas_index = gwas_index.iloc[indices, :] - - if len(gwas_index) == 0: - logging.warning("No overlapping variants in the LD Index") - return None - - gwas_index = session.spark.createDataFrame(gwas_index) - - # sanity filters on LD matrix - np.fill_diagonal(gnomad_ld, 1) - gnomad_ld[gnomad_ld > 1] = 1 - gnomad_ld[gnomad_ld < -1] = -1 - upper_triangle = np.triu(gnomad_ld) - gnomad_ld = ( - upper_triangle + upper_triangle.T - np.diag(upper_triangle.diagonal()) - ) - np.fill_diagonal(gnomad_ld, 1) - - out = SusieFineMapperStep.susie_finemapper_from_prepared_dataframes( - GWAS_df=gwas_df, - ld_index=gwas_index, - gnomad_ld=gnomad_ld, - L=max_causal_snps, - session=session, - studyId=studyId, - region=region, - locusStart=int(locusStart), - locusEnd=int(locusEnd), - susie_est_tausq=susie_est_tausq, - run_carma=run_carma, - run_sumstat_imputation=run_sumstat_imputation, - carma_time_limit=carma_time_limit, - carma_tau=carma_tau, - imputed_r2_threshold=imputed_r2_threshold, - ld_score_threshold=ld_score_threshold, - sum_pips=sum_pips, - lead_pval_threshold=lead_pval_threshold, - purity_mean_r2_threshold=purity_mean_r2_threshold, - purity_min_r2_threshold=purity_min_r2_threshold, - cs_lbf_thr=cs_lbf_thr, - ld_min_r2=ld_min_r2, - ) - - return out - - @staticmethod - def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries_ldinterface( # noqa: C901 - session: Session, - study_locus_row: Row, - study_index: StudyIndex, - max_causal_snps: int = 10, - susie_est_tausq: bool = False, - run_carma: bool = False, - run_sumstat_imputation: bool = False, - carma_time_limit: int = 600, - carma_tau: float = 0.04, - imputed_r2_threshold: float = 0.9, - ld_score_threshold: float = 5, - sum_pips: float = 0.99, - lead_pval_threshold: float = 1e-5, - purity_mean_r2_threshold: float = 0, - purity_min_r2_threshold: float = 0.25, - cs_lbf_thr: float = 2, - ) -> dict[str, Any] | None: - """Susie fine-mapper function that uses study-locus row with collected locus, chromosome and position as inputs. - - Args: - session (Session): Spark session - study_locus_row (Row): StudyLocus row with collected locus - study_index (StudyIndex): StudyIndex object - max_causal_snps (int): maximum number of causal variants - susie_est_tausq (bool): estimate tau squared, default is False - run_carma (bool): run CARMA, default is False - run_sumstat_imputation (bool): run summary statistics imputation, default is False - carma_time_limit (int): CARMA time limit, default is 600 seconds - carma_tau (float): CARMA tau, shrinkage parameter - imputed_r2_threshold (float): imputed R2 threshold, default is 0.8 - ld_score_threshold (float): LD score threshold ofr imputation, default is 4 - sum_pips (float): the expected sum of posterior probabilities in the locus, default is 0.99 (99% credible set) - lead_pval_threshold (float): p-value threshold for the lead variant from CS, default is 1e-5 - purity_mean_r2_threshold (float): thrshold for purity mean r2 qc metrics for filtering credible sets - purity_min_r2_threshold (float): thrshold for purity min r2 qc metrics for filtering credible sets - cs_lbf_thr (float): credible set logBF threshold for filtering credible sets, default is 2 - - Returns: - dict[str, Any] | None: dictionary with study locus, number of GWAS variants, number of LD variants, number of variants after merge, number of outliers, number of imputed variants, number of variants to fine-map, or None - """ - # PLEASE DO NOT REMOVE THIS LINE - pd.DataFrame.iteritems = pd.DataFrame.items - - chromosome = study_locus_row["chromosome"] - studyId = study_locus_row["studyId"] - locusStart = study_locus_row["locusStart"] - locusEnd = study_locus_row["locusEnd"] - - study_index_df = study_index._df - study_index_df = study_index_df.filter(f.col("studyId") == studyId) - major_population = study_index_df.select( - "studyId", - order_array_of_structs_by_field( - "ldPopulationStructure", "relativeSampleSize" - )[0]["ldPopulation"].alias("majorPopulation"), - ).collect()[0]["majorPopulation"] + N_total = int(study_index_df.select("nSamples").collect()[0]["nSamples"]) + if N_total is None: + N_total = 100_000 region = chromosome + ":" + str(int(locusStart)) + "-" + str(int(locusEnd)) @@ -1068,8 +840,8 @@ def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries_ldinterface( # noqa: session=session, studyId=studyId, region=region, - locusStart=locusStart, - locusEnd=locusEnd, + locusStart=int(locusStart), + locusEnd=int(locusEnd), susie_est_tausq=susie_est_tausq, run_carma=run_carma, run_sumstat_imputation=run_sumstat_imputation, @@ -1082,6 +854,8 @@ def susie_finemapper_one_sl_row_v4_ss_gathered_boundaries_ldinterface( # noqa: purity_mean_r2_threshold=purity_mean_r2_threshold, purity_min_r2_threshold=purity_min_r2_threshold, cs_lbf_thr=cs_lbf_thr, + ld_min_r2=ld_min_r2, + N_total=N_total, ) return out From 6a059d0f4d25e3b6ed1fe506ddffd344dc77bd43 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Thu, 17 Oct 2024 11:39:13 +0200 Subject: [PATCH 108/188] feat(coloc): step refactoring (#845) * feat: prior fix * refactor: coloc step * fix: subclasss colocalisation methods on interface * fix: failing coloc step * chore(coloc): step tests * chore: restore old script --------- Co-authored-by: Szymon Szyszkowski Co-authored-by: project-defiant --- src/gentropy/colocalisation.py | 73 +++-- src/gentropy/config.py | 4 +- src/gentropy/dataset/colocalisation.py | 2 +- src/gentropy/method/colocalisation.py | 59 +++- tests/gentropy/dataset/test_study_index.py | 1 - .../method/test_colocalisation_method.py | 6 + .../gentropy/step/test_colocalisation_step.py | 293 ++++++++++++++++++ 7 files changed, 392 insertions(+), 46 deletions(-) create mode 100644 tests/gentropy/step/test_colocalisation_step.py diff --git a/src/gentropy/colocalisation.py b/src/gentropy/colocalisation.py index 6c2aa3467..a45a9a6a1 100644 --- a/src/gentropy/colocalisation.py +++ b/src/gentropy/colocalisation.py @@ -2,14 +2,14 @@ from __future__ import annotations -import inspect -from importlib import import_module +from functools import partial +from typing import Any, Type from pyspark.sql.functions import col from gentropy.common.session import Session from gentropy.dataset.study_locus import StudyLocus -from gentropy.method.colocalisation import Coloc +from gentropy.method.colocalisation import Coloc, ColocalisationMethodInterface class ColocalisationStep: @@ -18,59 +18,71 @@ class ColocalisationStep: This workflow runs colocalisation analyses that assess the degree to which independent signals of the association share the same causal variant in a region of the genome, typically limited by linkage disequilibrium (LD). """ + __coloc_methods__ = { + method.METHOD_NAME.lower(): method + for method in ColocalisationMethodInterface.__subclasses__() + } + def __init__( self, session: Session, credible_set_path: str, coloc_path: str, colocalisation_method: str, - priorc1: float = 1e-4, - priorc2: float = 1e-4, - priorc12: float = 1e-5, + colocalisation_method_params: dict[str, Any] | None = None, ) -> None: """Run Colocalisation step. + This step allows for running two colocalisation methods: ecaviar and coloc. + Args: session (Session): Session object. credible_set_path (str): Input credible sets path. coloc_path (str): Output Colocalisation path. colocalisation_method (str): Colocalisation method. - priorc1 (float): Prior on variant being causal for trait 1. Defaults to 1e-4. - priorc2 (float): Prior on variant being causal for trait 2. Defaults to 1e-4. - priorc12 (float): Prior on variant being causal for both traits. Defaults to 1e-5. + colocalisation_method_params (dict[str, Any] | None): Keyword arguments passed to the colocalise method of Colocalisation class. Defaults to None + + Keyword Args: + priorc1 (float): Prior on variant being causal for trait 1. Defaults to 1e-4. For coloc method only. + priorc2 (float): Prior on variant being causal for trait 2. Defaults to 1e-4. For coloc method only. + priorc12 (float): Prior on variant being causal for both traits. Defaults to 1e-5. For coloc method only. """ + colocalisation_method = colocalisation_method.lower() colocalisation_class = self._get_colocalisation_class(colocalisation_method) + # Extract - credible_set = ( - StudyLocus.from_parquet( - session, credible_set_path, recursiveFileLookup=True - ).filter(col("finemappingMethod").isin("SuSie", "SuSiE-inf")) - if colocalisation_class is Coloc - else StudyLocus.from_parquet( - session, credible_set_path, recursiveFileLookup=True - ) + credible_set = StudyLocus.from_parquet( + session, credible_set_path, recusiveFileLookup=True ) + if colocalisation_method == Coloc.METHOD_NAME.lower(): + credible_set = credible_set.filter( + col("finemappingMethod").isin("SuSie", "SuSiE-inf") + ) # Transform overlaps = credible_set.find_overlaps() - colocalisation_results = colocalisation_class.colocalise( # type: ignore - overlaps, priorc1=priorc1, priorc2=priorc2, priorc12=priorc12 - ) + # Make a partial caller to ensure that colocalisation_method_params are added to the call only when dict is not empty + coloc = colocalisation_class.colocalise + if colocalisation_method_params: + coloc = partial(coloc, **colocalisation_method_params) + colocalisation_results = coloc(overlaps) # Load colocalisation_results.df.write.mode(session.write_mode).parquet( f"{coloc_path}/{colocalisation_method.lower()}" ) @classmethod - def _get_colocalisation_class(cls: type[ColocalisationStep], method: str) -> type: + def _get_colocalisation_class( + cls, method: str + ) -> Type[ColocalisationMethodInterface]: """Get colocalisation class. Args: method (str): Colocalisation method. Returns: - type: Colocalisation class. + Type[ColocalisationMethodInterface]: Class that implements the ColocalisationMethodInterface. Raises: ValueError: if method not available. @@ -79,15 +91,8 @@ def _get_colocalisation_class(cls: type[ColocalisationStep], method: str) -> typ >>> ColocalisationStep._get_colocalisation_class("ECaviar") """ - module_name = "gentropy.method.colocalisation" - module = import_module(module_name) - - available_methods = [] - for class_name, class_obj in inspect.getmembers(module, inspect.isclass): - if class_obj.__module__ == module_name: - available_methods.append(class_name) - if class_name == method: - return class_obj - raise ValueError( - f"Method {method} is not supported. Available: {(', ').join(available_methods)}" - ) + method = method.lower() + if method not in cls.__coloc_methods__: + raise ValueError(f"Colocalisation method {method} not available.") + coloc_method = cls.__coloc_methods__[method] + return coloc_method diff --git a/src/gentropy/config.py b/src/gentropy/config.py index a05ba3258..ad941f5e0 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -38,9 +38,7 @@ class ColocalisationConfig(StepConfig): credible_set_path: str = MISSING coloc_path: str = MISSING colocalisation_method: str = MISSING - priorc1: float = MISSING - priorc2: float = MISSING - priorc12: float = MISSING + colocalisation_method_params: dict[str, Any] = field(default_factory=dict[str, Any]) _target_: str = "gentropy.colocalisation.ColocalisationStep" diff --git a/src/gentropy/dataset/colocalisation.py b/src/gentropy/dataset/colocalisation.py index 4b85b68d6..568b46007 100644 --- a/src/gentropy/dataset/colocalisation.py +++ b/src/gentropy/dataset/colocalisation.py @@ -83,7 +83,7 @@ def extract_maximum_coloc_probability_per_region_and_gene( method_colocalisation_metric = ColocalisationStep._get_colocalisation_class( filter_by_colocalisation_method - ).METHOD_METRIC # type: ignore + ).METHOD_METRIC coloc_filtering_expr = [ f.col("rightGeneId").isNotNull(), diff --git a/src/gentropy/method/colocalisation.py b/src/gentropy/method/colocalisation.py index 7d711d9a3..37ca7b0d7 100644 --- a/src/gentropy/method/colocalisation.py +++ b/src/gentropy/method/colocalisation.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Protocol import numpy as np import pyspark.ml.functions as fml @@ -14,13 +14,41 @@ from gentropy.dataset.colocalisation import Colocalisation if TYPE_CHECKING: + from typing import Any + from numpy.typing import NDArray from pyspark.sql import Column from gentropy.dataset.study_locus_overlap import StudyLocusOverlap -class ECaviar: +class ColocalisationMethodInterface(Protocol): + """Colocalisation method interface.""" + + METHOD_NAME: str + METHOD_METRIC: str + + @classmethod + def colocalise( + cls, overlapping_signals: StudyLocusOverlap, **kwargs: Any + ) -> Colocalisation: + """Method to generate the colocalisation. + + Args: + overlapping_signals (StudyLocusOverlap): Overlapping study loci. + **kwargs (Any): Additional keyword arguments to the colocalise method. + + + Returns: + Colocalisation: loci colocalisation + + Raises: + NotImplementedError: Implement in derivative classes. + """ + raise NotImplementedError("Implement in derivative classes.") + + +class ECaviar(ColocalisationMethodInterface): """ECaviar-based colocalisation analysis. It extends [CAVIAR](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5142122/#bib18) framework to explicitly estimate the posterior probability that the same variant is causal in 2 studies while accounting for the uncertainty of LD. eCAVIAR computes the colocalization posterior probability (**CLPP**) by utilizing the marginal posterior probabilities. This framework allows for **multiple variants to be causal** in a single locus. @@ -60,12 +88,15 @@ def _get_clpp(left_pp: Column, right_pp: Column) -> Column: @classmethod def colocalise( - cls: type[ECaviar], overlapping_signals: StudyLocusOverlap + cls: type[ECaviar], + overlapping_signals: StudyLocusOverlap, + **kwargs: Any, ) -> Colocalisation: """Calculate bayesian colocalisation based on overlapping signals. Args: overlapping_signals (StudyLocusOverlap): overlapping signals. + **kwargs (Any): Additional parameters passed to the colocalise method. Returns: Colocalisation: colocalisation results based on eCAVIAR. @@ -95,7 +126,7 @@ def colocalise( ) -class Coloc: +class Coloc(ColocalisationMethodInterface): """Calculate bayesian colocalisation based on overlapping signals from credible sets. Based on the [R COLOC package](https://github.com/chr1swallace/coloc/blob/main/R/claudia.R), which uses the Bayes factors from the credible set to estimate the posterior probability of colocalisation. This method makes the simplifying assumption that **only one single causal variant** exists for any given trait in any genomic region. @@ -143,22 +174,36 @@ def _get_posteriors(all_bfs: NDArray[np.float64]) -> DenseVector: def colocalise( cls: type[Coloc], overlapping_signals: StudyLocusOverlap, - priorc1: float = 1e-4, - priorc2: float = 1e-4, - priorc12: float = 1e-5, + **kwargs: float, ) -> Colocalisation: """Calculate bayesian colocalisation based on overlapping signals. Args: overlapping_signals (StudyLocusOverlap): overlapping peaks + **kwargs (float): Additional parameters passed to the colocalise method. + Keyword Args: priorc1 (float): Prior on variant being causal for trait 1. Defaults to 1e-4. priorc2 (float): Prior on variant being causal for trait 2. Defaults to 1e-4. priorc12 (float): Prior on variant being causal for traits 1 and 2. Defaults to 1e-5. Returns: Colocalisation: Colocalisation results + + Raises: + TypeError: When passed incorrect prior argument types. """ + # Ensure priors are always present, even if not passed + priorc1 = kwargs.get("priorc1") or 1e-4 + priorc2 = kwargs.get("priorc2") or 1e-4 + priorc12 = kwargs.get("priorc12") or 1e-5 + priors = [priorc1, priorc2, priorc12] + if any(not isinstance(prior, float) for prior in priors): + raise TypeError( + "Passed incorrect type(s) for prior parameters. got %s", + {type(p): p for p in priors}, + ) + # register udfs logsum = f.udf(get_logsum, DoubleType()) posteriors = f.udf(Coloc._get_posteriors, VectorUDT()) diff --git a/tests/gentropy/dataset/test_study_index.py b/tests/gentropy/dataset/test_study_index.py index 303642d5e..4bfede7d9 100644 --- a/tests/gentropy/dataset/test_study_index.py +++ b/tests/gentropy/dataset/test_study_index.py @@ -444,7 +444,6 @@ def _setup(self: TestDiseaseValidation, spark: SparkSession) -> None: "backgroundTraitFromSourceMappedIds", f.array().cast("array") ) ) - study_df.show() # Mock study index: self.study_index = StudyIndex( _df=study_df, diff --git a/tests/gentropy/method/test_colocalisation_method.py b/tests/gentropy/method/test_colocalisation_method.py index 1d788eb1f..c9a99d16f 100644 --- a/tests/gentropy/method/test_colocalisation_method.py +++ b/tests/gentropy/method/test_colocalisation_method.py @@ -17,6 +17,12 @@ def test_coloc(mock_study_locus_overlap: StudyLocusOverlap) -> None: """Test coloc.""" assert isinstance(Coloc.colocalise(mock_study_locus_overlap), Colocalisation) + assert isinstance( + Coloc.colocalise( + mock_study_locus_overlap, priorc1=1e-4, priorc2=1e-4, priorc12=1e-5 + ), + Colocalisation, + ) @pytest.mark.parametrize( diff --git a/tests/gentropy/step/test_colocalisation_step.py b/tests/gentropy/step/test_colocalisation_step.py new file mode 100644 index 000000000..e74dee234 --- /dev/null +++ b/tests/gentropy/step/test_colocalisation_step.py @@ -0,0 +1,293 @@ +"""Test colocalisation step.""" + +from pathlib import Path +from typing import Type + +import pytest + +from gentropy.colocalisation import ColocalisationStep +from gentropy.common.session import Session +from gentropy.dataset.colocalisation import Colocalisation +from gentropy.dataset.study_locus import StudyLocus +from gentropy.method.colocalisation import Coloc, ColocalisationMethodInterface, ECaviar + + +@pytest.mark.step_test +class TestColocalisationStep: + """Test colocalisation steps.""" + + @pytest.fixture(autouse=True) + def _setup(self, session: Session, tmp_path: Path) -> None: + """Setup StudyLocus for testing.""" + credible_set_data = [ + ( + "-1299941111165481046", + "gwas", + "1_62634374_G_GA", + "1", + 62634374, + "1:62116600-63176657", + "GCST90269661", + None, + -18.026562155233105, + 8.294741, + -72, + None, + None, + None, + [ + "Variant not found in LD reference, Study locus finemapped without in-sample LD reference" + ], + "SuSiE-inf", + 2, + 128.08235878972883, + 1.0, + 1.0, + 62116600, + 63176657, + None, + [("1_62634374_G_GA", 1.0)], + [ + ( + True, + True, + 303.2017476882394, + 1.0, + "1_62634374_G_GA", + None, + None, + -0.07779708137213309, + None, + None, + ) + ], + "SuSiE fine-mapped credible set with out-of-sample LD", + ), + ( + "-1245591334543437941", + "gwas", + "1_62725906_C_A", + "1", + 62725906, + "1:62275115-62861709", + "GCST90024601", + None, + 6.818181818181818, + 1.0845997, + -12, + None, + None, + None, + [ + "Variant not found in LD reference, Study locus finemapped without in-sample LD reference" + ], + "SuSiE-inf", + 3, + 903.4374513916813, + 1.0, + 1.0, + 62275115, + 62861709, + None, + [("1_62725906_C_A", 1.0)], + [ + ( + True, + True, + 2087.4457573345685, + 0.9999999999381545, + "1_62725906_C_A", + None, + None, + 0.20241232721094407, + None, + None, + ) + ], + "SuSiE fine-mapped credible set with out-of-sample LD", + ), + ( + "-0.20241232721094407", + "gwas", + "1_62725906_C_A", + "1", + 62725906, + "1:62335572-62883302", + "GCST90025461", + None, + 6.363636363636364, + 5.0753098, + -10, + None, + None, + None, + [ + "Variant not found in LD reference, Study locus finemapped without in-sample LD reference" + ], + "SuSiE-inf", + 2, + 912.1598183692258, + 1.0, + 1.0, + 62335572, + 62883302, + None, + [("1_62725906_C_A", 1.0)], + [ + ( + True, + True, + 2107.38950418228, + 0.9999999999454303, + "1_62725906_C_A", + None, + None, + 0.20330391077149534, + None, + None, + ) + ], + "SuSiE fine-mapped credible set with out-of-sample LD", + ), + ( + "-2271857845883525223", + "gwas", + "1_62634374_G_GA", + "1", + 62634374, + "1:62192511-63034021", + "GCST90269580", + None, + -15.43232373355239, + 1.0077391, + -54, + None, + None, + None, + [ + "Variant not found in LD reference, Study locus finemapped without in-sample LD reference" + ], + "SuSiE-inf", + 2, + 104.77639852123883, + 1.0, + 1.0, + 62192511, + 63034021, + None, + [("1_62634374_G_GA", 1.0)], + [ + ( + True, + True, + 249.20354469210795, + 1.0, + "1_62634374_G_GA", + None, + None, + -0.07071263272378725, + None, + None, + ) + ], + "SuSiE fine-mapped credible set with out-of-sample LD", + ), + ] + self.credible_set_path = str(tmp_path / "credible_set_datasets") + session.spark.createDataFrame( + credible_set_data, schema=StudyLocus.get_schema() + ).write.parquet(self.credible_set_path) + self.coloc_path = str(tmp_path / "colocalisation") + + @pytest.mark.parametrize( + ["label", "expected_method"], + [ + pytest.param("coloc", Coloc, id="coloc method"), + pytest.param("ecaviar", ECaviar, id="ecaviar method"), + pytest.param("ECaviar", ECaviar, id="uppercase label"), + ], + ) + def test_get_colocalisation_class( + self, label: str, expected_method: Type[ColocalisationMethodInterface] + ) -> None: + """Test _get_colocalisation_class method on ColocalisationStep.""" + method = ColocalisationStep._get_colocalisation_class(label) + assert ( + method is expected_method + ), "Incorrect colocalisation class returned by ColocalisationStep._get_colocalisation_class(label)" + + def test_label_with_invalid_method(self) -> None: + """Test what happens when invalid method_label is passed to the _get_colocalisation_class.""" + with pytest.raises(ValueError): + ColocalisationStep._get_colocalisation_class("NewMethod") + + @pytest.mark.parametrize( + ["coloc_method", "expected_data"], + [ + pytest.param( + "ecaviar", + { + "clpp": [1.0, 1.0], + "colocalisationMethod": ["eCAVIAR", "eCAVIAR"], + "leftStudyLocusId": [ + "-1245591334543437941", + "-2271857845883525223", + ], + "rightStudyLocusId": [ + "-0.20241232721094407", + "-1299941111165481046", + ], + }, + id="ecaviar", + ), + pytest.param( + "coloc", + { + "h4": [1.0, 1.0], + "h3": [0.0, 0.0], + "h2": [0.0, 0.0], + "h1": [0.0, 0.0], + "h0": [0.0, 0.0], + "colocalisationMethod": ["COLOC", "COLOC"], + "leftStudyLocusId": [ + "-1245591334543437941", + "-2271857845883525223", + ], + "rightStudyLocusId": [ + "-0.20241232721094407", + "-1299941111165481046", + ], + }, + id="coloc", + ), + ], + ) + def test_colocalise( + self, + coloc_method: str, + expected_data: dict[str, list[float] | list[str]], + session: Session, + ) -> None: + """Test colocalise method.""" + ColocalisationStep( + session=session, + credible_set_path=self.credible_set_path, + coloc_path=self.coloc_path, + colocalisation_method=coloc_method, + ) + + coloc_dataset = Colocalisation.from_parquet( + session, self.coloc_path, recursiveFileLookup=True + ) + for column in expected_data: + values = [c[column] for c in coloc_dataset.df.collect()] + expected_values = expected_data[column] + for v, e in zip(values, expected_values): + if isinstance(e, float): + assert ( + e == pytest.approx(v, 1e-1) + ), f"Incorrect value {v} at {column} found in {coloc_method}, expected {e}" + else: + assert ( + e == v + ), f"Incorrect value {v} at {column} found in {coloc_method}, expected {e}" From 9bbbc02681ae4ac2b137522aff29e4d946f047d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Thu, 17 Oct 2024 11:32:01 +0100 Subject: [PATCH 109/188] test: skip `fetch_coordinates_from_rsids` (#850) --- src/gentropy/datasource/ensembl/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gentropy/datasource/ensembl/api.py b/src/gentropy/datasource/ensembl/api.py index 2a767610f..eb42b3166 100644 --- a/src/gentropy/datasource/ensembl/api.py +++ b/src/gentropy/datasource/ensembl/api.py @@ -20,7 +20,7 @@ def fetch_coordinates_from_rsids( Exception: If an error occurs while processing the batches. Example: - >>> fetch_coordinates_from_rsids(["rs75493593"]) + >>> fetch_coordinates_from_rsids(["rs75493593"]) # doctest: +SKIP {'rs75493593': ['17_7041768_G_C', '17_7041768_G_T']} """ From 5cbf5ed793d3a131495cdf70330ee720102544ae Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Thu, 17 Oct 2024 11:41:02 +0100 Subject: [PATCH 110/188] fix(eqtl): deduplicating credible set loci (#849) * fix(eqtl): deduplicating credible set loci * fix: removing --- .../datasource/eqtl_catalogue/finemapping.py | 60 ++-- tests/gentropy/conftest.py | 18 +- .../data_samples/QTD000584.lbf_variable.txt | 292 ++++++------------ .../eqtl_catalogue/test_eqtl_catalogue.py | 28 +- 4 files changed, 156 insertions(+), 242 deletions(-) diff --git a/src/gentropy/datasource/eqtl_catalogue/finemapping.py b/src/gentropy/datasource/eqtl_catalogue/finemapping.py index 0808b7016..ea46359df 100644 --- a/src/gentropy/datasource/eqtl_catalogue/finemapping.py +++ b/src/gentropy/datasource/eqtl_catalogue/finemapping.py @@ -136,22 +136,8 @@ def parse_susie_results( """ ss_ftp_path_template = "https://ftp.ebi.ac.uk/pub/databases/spot/eQTL/sumstats" return ( - lbf.withColumn( - "dataset_id", - cls._extract_dataset_id_from_file_path(f.input_file_name()), - ) - .join( - ( - credible_sets.withColumn( - "dataset_id", - cls._extract_dataset_id_from_file_path(f.input_file_name()), - ) - .withColumn( - "credibleSetIndex", - cls._extract_credible_set_index(f.col("cs_id")), - ) - .join(f.broadcast(studies_metadata), on="dataset_id") - ), + lbf.join( + credible_sets.join(f.broadcast(studies_metadata), on="dataset_id"), on=["molecular_trait_id", "region", "variant", "dataset_id"], how="inner", ) @@ -285,11 +271,26 @@ def read_credible_set_from_source( Returns: DataFrame: Credible sets DataFrame. """ - return session.spark.read.csv( - credible_set_path, - sep="\t", - header=True, - schema=cls.raw_credible_set_schema, + return ( + session.spark.read.csv( + credible_set_path, + sep="\t", + header=True, + schema=cls.raw_credible_set_schema, + ) + .withColumns( + { + # Adding dataset id based on the input file name: + "dataset_id": cls._extract_dataset_id_from_file_path( + f.input_file_name() + ), + # Parsing credible set index from the cs_id: + "credibleSetIndex": cls._extract_credible_set_index(f.col("cs_id")), + } + ) + # Remove duplicates caused by explosion of single variants to multiple rsid-s: + .drop("rsid") + .distinct() ) @classmethod @@ -307,9 +308,16 @@ def read_lbf_from_source( Returns: DataFrame: Log Bayes Factors DataFrame. """ - return session.spark.read.csv( - lbf_path, - sep="\t", - header=True, - schema=cls.raw_lbf_schema, + return ( + session.spark.read.csv( + lbf_path, + sep="\t", + header=True, + schema=cls.raw_lbf_schema, + ) + .withColumn( + "dataset_id", + cls._extract_dataset_id_from_file_path(f.input_file_name()), + ) + .distinct() ) diff --git a/tests/gentropy/conftest.py b/tests/gentropy/conftest.py index b1fc5ef90..10298205d 100644 --- a/tests/gentropy/conftest.py +++ b/tests/gentropy/conftest.py @@ -459,22 +459,20 @@ def sample_finngen_studies(spark: SparkSession) -> DataFrame: @pytest.fixture() -def sample_eqtl_catalogue_finemapping_credible_sets(spark: SparkSession) -> DataFrame: +def sample_eqtl_catalogue_finemapping_credible_sets(session: Session) -> DataFrame: """Sample raw eQTL Catalogue credible sets outputted by SuSIE.""" - return spark.read.option("delimiter", "\t").csv( - "tests/gentropy/data_samples/QTD000584.credible_sets.tsv", - header=True, - schema=EqtlCatalogueFinemapping.raw_credible_set_schema, + return EqtlCatalogueFinemapping.read_credible_set_from_source( + session, + credible_set_path=["tests/gentropy/data_samples/QTD000584.credible_sets.tsv"], ) @pytest.fixture() -def sample_eqtl_catalogue_finemapping_lbf(spark: SparkSession) -> DataFrame: +def sample_eqtl_catalogue_finemapping_lbf(session: Session) -> DataFrame: """Sample raw eQTL Catalogue table with logBayesFactors outputted by SuSIE.""" - return spark.read.option("delimiter", "\t").csv( - "tests/gentropy/data_samples/QTD000584.lbf_variable.txt", - header=True, - schema=EqtlCatalogueFinemapping.raw_lbf_schema, + return EqtlCatalogueFinemapping.read_lbf_from_source( + session, + lbf_path=["tests/gentropy/data_samples/QTD000584.lbf_variable.txt"], ) diff --git a/tests/gentropy/data_samples/QTD000584.lbf_variable.txt b/tests/gentropy/data_samples/QTD000584.lbf_variable.txt index 47023f5b6..2e5f080b7 100644 --- a/tests/gentropy/data_samples/QTD000584.lbf_variable.txt +++ b/tests/gentropy/data_samples/QTD000584.lbf_variable.txt @@ -1,201 +1,93 @@ molecular_trait_id region variant chromosome position lbf_variable1 lbf_variable2 lbf_variable3 lbf_variable4 lbf_variable5 lbf_variable6 lbf_variable7 lbf_variable8 lbf_variable9 lbf_variable10 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124823679_A_C 10 124823679 -1.26935233091278 0.00552916857251828 0.00670686749504501 0.00684844231246418 0.00642940186674279 0.00573212028335401 0.0049294931404007 0.00412586958588079 0.00338040036796583 0.00272239661658613 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124823817_T_C 10 124823817 -2.25582222123641 -0.00208125858550989 -0.00256052488318437 -0.00261904135127056 -0.00244640743271951 -0.00216292043253841 -0.00184233048896054 -0.00152734817415645 -0.00124041502049366 -0.000991265768420568 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124824245_G_A 10 124824245 -2.25249260789328 -0.00197882128489812 -0.00243573380317663 -0.00249154951399166 -0.00232690101650856 -0.00205664557252794 -0.00175120086684633 -0.00145129303789115 -0.00117826513141051 -0.00094132823193549 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124824364_C_A 10 124824364 -2.25582222123641 -0.00208125858550989 -0.00256052488318437 -0.00261904135127056 -0.00244640743271951 -0.00216292043253841 -0.00184233048896054 -0.00152734817415645 -0.00124041502049366 -0.000991265768420568 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124824534_C_CA 10 124824534 -2.26318575514626 -0.00759055742912418 -0.00927290986259788 -0.00947680517976357 -0.00887433723066655 -0.00787871618900615 -0.00674313127256188 -0.00561710571236462 -0.00458216509091836 -0.00367617563083877 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124825317_C_CA 10 124825317 -0.577170243131913 0.0161425873788299 0.0196348326829869 0.0200560085606836 0.0188102366798635 0.0167429348752397 0.0143719021033193 0.0120069483938527 0.00982103212513996 0.00789777641993306 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124825317_C_CAA 10 124825317 -1.93134671459752 0.00808255585983675 0.00982087088974337 0.0100302701012764 0.00941074157608668 0.00838160756220585 0.00719967534544841 0.0060190946946368 0.00492641412078276 0.00396386932238579 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124825317_CA_C 10 124825317 -0.532610050446922 0.0206967338320014 0.0251845551687073 0.0257260629026321 0.0241245358917253 0.0214679440558103 0.0184226737957691 0.015386928044514 0.0125825075687986 0.0101162494818747 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124825356_A_G 10 124825356 -0.698810919271326 0.0188588353573147 0.0229460465266103 0.0234391645719034 0.0219807191027961 0.0195612559074494 0.0167874827544021 0.0140220418245414 0.0114670266520394 0.00921986192434243 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124826915_CA_C 10 124826915 -2.31321458683551 -0.00840871648295538 -0.0102690109215051 -0.0104943894955198 -0.00982839321766171 -0.00872744617829202 -0.00747120691421399 -0.00622499048851344 -0.00507909365762194 -0.00407558751563553 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124826959_TG_T 10 124826959 -0.724597943609362 0.0182366191848016 0.0221881319406165 0.0226648572108372 0.021254882010378 0.0189157406366314 0.0162339191520751 0.0135600139674978 0.0110894466434504 0.00891645807503716 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124827162_AT_A 10 124827162 -2.21496848857903 -0.000625904251086329 -0.000787481469924689 -0.00080761257734574 -0.000748472679356293 -0.000653030232775187 -0.000547665274479137 -0.000446885348123871 -0.000357525256686309 -0.000281883923476833 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124827532_A_G 10 124827532 -2.4239637525357 -0.0102023448081918 -0.0124549417313204 -0.0127277408296163 -0.0119215488382096 -0.0105883972157388 -0.00906651295314331 -0.00755604216127681 -0.00616651788850042 -0.00494914623184917 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124827750_CA_C 10 124827750 -2.19705303485449 -0.00537724863697608 -0.00657770507775179 -0.00672340680485517 -0.00629302312399416 -0.00558265410110481 -0.00477375066817576 -0.00397306111913664 -0.003238379074328 -0.00259621529903864 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124828361_A_G 10 124828361 -2.25588888031751 -0.00208224488191977 -0.00256172645601493 -0.00262026893494349 -0.00244755811013109 -0.00216394368366934 -0.00184320789165371 -0.00152808041788477 -0.00124101337164051 -0.000991746533507865 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124828519_C_T 10 124828519 -2.25588888031751 -0.00208224488191977 -0.00256172645601493 -0.00262026893494349 -0.00244755811013109 -0.00216394368366934 -0.00184320789165371 -0.00152808041788477 -0.00124101337164051 -0.000991746533507865 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124828674_C_CT 10 124828674 -2.2667244433029 -0.00233277556838196 -0.00286698520973738 -0.0029321414787975 -0.00273987673260034 -0.00242386800603445 -0.00206606181755742 -0.00171404672221032 -0.00139296162534341 -0.00111382485843814 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124829420_G_A 10 124829420 -0.773523247901995 0.015053028496554 0.0183075326479205 0.0186999848913336 0.0175391396825431 0.0156125614034157 0.0134026087571608 0.0111979862234701 0.00915996807566311 0.00736660676212342 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124829573_A_G 10 124829573 -2.25249260789328 -0.00197882128489812 -0.00243573380317663 -0.00249154951399166 -0.00232690101650856 -0.00205664557252794 -0.00175120086684633 -0.00145129303789115 -0.00117826513141051 -0.00094132823193549 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124830088_A_G 10 124830088 -2.25249260789328 -0.00197882128489812 -0.00243573380317663 -0.00249154951399166 -0.00232690101650856 -0.00205664557252794 -0.00175120086684633 -0.00145129303789115 -0.00117826513141051 -0.00094132823193549 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124831282_G_T 10 124831282 -2.4051241801424 -0.010194429812052 -0.0124453571870049 -0.0127179559473882 -0.0119123569954724 -0.0105801942745289 -0.00905945119390417 -0.0075501258083901 -0.00616166635223259 -0.00494523626616372 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124831283_C_T 10 124831283 -2.25249260789328 -0.00197882128489812 -0.00243573380317663 -0.00249154951399166 -0.00232690101650856 -0.00205664557252794 -0.00175120086684633 -0.00145129303789115 -0.00117826513141051 -0.00094132823193549 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124831840_GTC_G 10 124831840 -1.20159579152222 0.0109045402639332 0.01325692516841 0.0135404689032392 0.012701693318911 0.0113090986301061 0.00971086161782742 0.00811562554315026 0.0066401903543869 0.00534128287561808 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124831844_T_A 10 124831844 -1.34658243357735 0.00835723769649288 0.0101540220853651 0.0103704459305436 0.00973012977614873 0.00866640295231225 0.00744464090993979 0.00622416647313395 0.00509446481102227 0.004099230894131 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124831872_T_A 10 124831872 -2.34625212779857 -0.00968803683158015 -0.0118285437848731 -0.0120878037141861 -0.0113216450523219 -0.0100548425007685 -0.00860892613410691 -0.00717409193702956 -0.00585435801725565 -0.00469829595754456 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124832027_A_AT 10 124832027 -2.25588888031751 -0.00208224488191977 -0.00256172645601493 -0.00262026893494349 -0.00244755811013109 -0.00216394368366934 -0.00184320789165371 -0.00152808041788477 -0.00124101337164051 -0.000991746533507865 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124833145_C_CA 10 124833145 -2.28283780570513 -0.00698556273488071 -0.00853682247273824 -0.00872490334208464 -0.00816921247013314 -0.00725119755438497 -0.00620459080659508 -0.00516728131637656 -0.00421430872952477 -0.00338041150034485 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124833253_G_A 10 124833253 -2.40657832584665 -0.010203516950968 -0.0124564268860818 -0.0127292651718496 -0.0119229580124944 -0.0105896217621688 -0.00906753535325366 -0.00755687284647166 -0.00616717992570104 -0.00494966650181938 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124833306_C_CA 10 124833306 -0.718119334208785 0.0204411095129298 0.0248729799428951 0.0254077221671682 0.0238261920838805 0.0212027184080621 0.0181953299954114 0.0151972610715623 0.0124275702368206 0.00999179406236905 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124833440_G_A 10 124833440 -2.41814339171232 -0.0098327631078603 -0.0120047960981764 -0.0122678640335487 -0.0114904461724898 -0.0102049807281643 -0.00873769306515593 -0.00728157837374699 -0.00594220704109105 -0.00476889267550984 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124835036_G_T 10 124835036 -2.25249260789328 -0.00197882128489812 -0.00243573380317663 -0.00249154951399166 -0.00232690101650856 -0.00205664557252794 -0.00175120086684633 -0.00145129303789115 -0.00117826513141051 -0.00094132823193549 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124835477_A_G 10 124835477 -2.26357089726545 -0.00222271066466995 -0.00273289892174455 -0.00279515289447785 -0.00261146945343693 -0.00230967927981629 -0.00196814756905628 -0.00163233041593891 -0.0013261864531442 -0.00106017148459969 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124837161_C_T 10 124837161 -2.25249260789328 -0.00197882128489812 -0.00243573380317663 -0.00249154951399166 -0.00232690101650856 -0.00205664557252794 -0.00175120086684633 -0.00145129303789115 -0.00117826513141051 -0.00094132823193549 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124838416_A_G 10 124838416 -2.25101211104246 -0.0019007294594231 -0.0023406051271837 -0.00239436257681014 -0.00223579989727307 -0.00197562888849667 -0.00168172781245923 -0.00139331039268331 -0.00113088226397506 -0.000903255146583959 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124838524_C_CA 10 124838524 -2.19483311051505 -0.00073611609891211 -0.000923663761242199 -0.000946982114754746 -0.000878450889030269 -0.000767649297590989 -0.000645009776721217 -0.000527355321968237 -0.000422707491994689 -0.000333856132527011 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124839146_G_A 10 124839146 -2.41082375945296 -0.0099006577745171 -0.0120875311249189 -0.0123523927681961 -0.0115696724236227 -0.0102754235108065 -0.00879808585381836 -0.00733197202772651 -0.00598338047337421 -0.00480197096266988 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124839225_GT_G 10 124839225 -2.2547600983747 -0.00180979521534308 -0.00222990403800383 -0.00228127539590872 -0.00212976945205723 -0.00188129907636458 -0.00160080305748656 -0.00132574085524739 -0.00107564323783382 -0.00085885419282361 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124839264_G_A 10 124839264 -2.25101211104246 -0.0019007294594231 -0.0023406051271837 -0.00239436257681014 -0.00223579989727307 -0.00197562888849667 -0.00168172781245923 -0.00139331039268331 -0.00113088226397506 -0.000903255146583959 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124840199_T_C 10 124840199 -2.39506056143101 -0.0100551588919027 -0.0122757056966614 -0.0125446341421811 -0.0117498873604074 -0.0104357075330044 -0.00893554999762491 -0.00744671581526868 -0.00607715974785661 -0.00487733286532199 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124840467_G_C 10 124840467 -2.1955186516612 -0.00526044141737181 -0.00643541330403563 -0.00657803613453112 -0.00615675597566279 -0.00546147170434175 -0.00466983553727074 -0.00388633342336675 -0.00316750640982066 -0.00253926795723158 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124840650_A_T 10 124840650 -0.374919234825047 0.0248189202763704 0.0302058642972165 0.0308559915029942 0.0289332976481971 0.0257445014718507 0.0220899879909635 0.0184477751553693 0.0150838660999502 0.0121261860282771 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124841158_G_A 10 124841158 -2.25719025489475 -0.00373076326931709 -0.00456968145801628 -0.00467164556865285 -0.0043705450008793 -0.00387415552450276 -0.0033098215585956 -0.00275218677675948 -0.00224138417213471 -0.00179559257569517 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124841267_C_CA 10 124841267 -0.65796727446411 0.0189073654131953 0.0230050128069865 0.0234993877730809 0.0220372234211639 0.0196115820169309 0.0168307125669158 0.0140581828413753 0.0114966063532012 0.00924366168201773 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124841437_A_T 10 124841437 -2.29495240097472 -0.00291412074261821 -0.00357511775813135 -0.00365559041324737 -0.00341803791169371 -0.0030269813358732 -0.00258326144869336 -0.00214572179889183 -0.00174573513882059 -0.00139729438053715 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124841469_C_T 10 124841469 -2.25101211104246 -0.0019007294594231 -0.0023406051271837 -0.00239436257681014 -0.00223579989727307 -0.00197562888849667 -0.00168172781245923 -0.00139331039268331 -0.00113088226397506 -0.000903255146583959 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124841584_A_ATT 10 124841584 -2.25101211104246 -0.0019007294594231 -0.0023406051271837 -0.00239436257681014 -0.00223579989727307 -0.00197562888849667 -0.00168172781245923 -0.00139331039268331 -0.00113088226397506 -0.000903255146583959 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124842626_A_G 10 124842626 -2.41287094557869 -0.0100150766337816 -0.0122271638344906 -0.0124950780441044 -0.0117033357201128 -0.0103941634812945 -0.00889978325271024 -0.00741674739941001 -0.0060525820624826 -0.00485752266582073 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124843625_T_C 10 124843625 -0.391278428517821 0.024561707548254 0.0298925376811181 0.0305358856678355 0.0286332362714949 0.0254776545317208 0.0218611616603241 0.0182567943647096 0.0149277976644338 0.0120007813442671 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124843786_G_T 10 124843786 -1.33271204361954 0.00441986373065539 0.00535558969393302 0.00546793225278286 0.00513532497097469 0.00458127002409814 0.00394260008157588 0.0033021845256247 0.00270727904679813 0.00218152035234276 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124843824_A_T 10 124843824 -0.808644938623694 0.016697723696264 0.0203135002655714 0.0207496644093359 0.019459618318773 0.017319203250421 0.014864862658424 0.0124173893985442 0.0101557032279578 0.00816617525578556 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124844385_G_T 10 124844385 -2.02426977742932 -0.00213689943378714 -0.0026291399548195 -0.00268924500523449 -0.00251192446234683 -0.00222075929825616 -0.00189151365654316 -0.00156805551923922 -0.00127342581869971 -0.00101761227868069 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124844711_C_CT 10 124844711 -2.01503820789279 -0.0031535461822334 -0.00386680125593264 -0.00395359058961686 -0.00369735935604076 -0.00327534683315944 -0.00279619439061829 -0.00232339410815152 -0.00189089116750285 -0.00151390251550865 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124844711_CT_C 10 124844711 -1.25199277177365 0.00564134730161303 0.00684346544685388 0.00698798899544517 0.00656022898481989 0.00584849311572233 0.00502931156001063 0.00420920084715881 0.00344851446769701 0.00277713927604362 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124844888_T_A 10 124844888 -0.375732324632437 0.0248049403934418 0.0301888353436373 0.0308385942069993 0.0289169894669477 0.0257299980795658 0.0220775506700184 0.0184373945242706 0.015075382871137 0.0121193693891644 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124845303_A_G 10 124845303 -0.556302344385712 0.0212121522157274 0.0258124212515813 0.02636751383189 0.0247258183214765 0.0220026662019812 0.0188812049933484 0.015769620000925 0.0128952390237536 0.0103675349386072 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124845846_A_C 10 124845846 -0.884014411763964 0.0151661909799317 0.0184479377895981 0.0188437496976177 0.0176730174456483 0.0157303195339376 0.0135023205298919 0.0112801617152996 0.00922634025881797 0.00741939137745495 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124846286_G_A 10 124846286 -1.37561849919552 0.00360869729913871 0.0043675560701204 0.00445853364713455 0.00418909939642553 0.00373973471157552 0.00322091938858371 0.0026998228571804 0.00221500290375154 0.0017859441489021 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124846582_A_T 10 124846582 -2.26188014201847 -0.00224770286182485 -0.00276329511473161 -0.00282620056778216 -0.00264058969506387 -0.00233560082750373 -0.00199039993220795 -0.00165092240380105 -0.00134139465236771 -0.00107240210186754 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124847850_CA_C 10 124847850 -2.28281655421035 -0.00654849040547711 -0.00800445581757403 -0.00818102594238379 -0.00765937146285545 -0.00679776151501077 -0.00581573061622098 -0.00484270944668097 -0.00394905137158963 -0.00316725747826574 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124847866_A_C 10 124847866 -2.26188014201847 -0.00224770286182485 -0.00276329511473161 -0.00282620056778216 -0.00264058969506387 -0.00233560082750373 -0.00199039993220795 -0.00165092240380105 -0.00134139465236771 -0.00107240210186754 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124848040_G_C 10 124848040 -0.481683170797529 0.0227445034177043 0.0276790136989811 0.0282744849293763 0.0265133981797634 0.0235924041188884 0.0202444637162862 0.0169074327419207 0.0138250704192542 0.0111146884616717 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124848431_G_T 10 124848431 -2.38994467621248 -0.0102487196804901 -0.0125114761704976 -0.0127855037145341 -0.011975680154201 -0.010636515414411 -0.00910775469932679 -0.00759044613753002 -0.00619462013039795 -0.00497171833825583 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124849256_C_T 10 124849256 -1.38512306922568 0.00339803602645494 0.00411095135359352 0.00419637882777657 0.00394335521202338 0.00352118509595067 0.00303350175896622 0.00254339629742217 0.00208716759899241 0.00168322238442897 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124850331_AT_A 10 124850331 -1.14789963673752 0.00996659050391013 0.012113906167416 0.0123726636984429 0.0116071677113001 0.010335953372294 0.00887658967490967 0.00741951332525836 0.00607146531596081 0.00488439246519556 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124850332_T_A 10 124850332 -1.01054912426359 0.0121087994910529 0.0147233008992642 0.0150384958619871 0.0141061253366908 0.012558376242287 0.0107824370924039 0.00901021637326727 0.00737142598287832 0.00592897437797379 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124850559_T_G 10 124850559 -1.13119431454808 0.0077316812330781 0.00938960202083639 0.00958918572989953 0.0089986189143505 0.00801708156622638 0.00688902193433982 0.00576141878737957 0.00471704068319001 0.0037964732754765 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124850734_G_C 10 124850734 -2.37281216720148 -0.00849915332668516 -0.0103805678343534 -0.0106085343587341 -0.00993490934488417 -0.00882146614210955 -0.00755114392486167 -0.00629114155199195 -0.00513272999375847 -0.00411839045022067 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124850745_T_G 10 124850745 -2.34013210285488 -0.00969765122941624 -0.0118403812180796 -0.0120999132441537 -0.0113329522913554 -0.0100648328471036 -0.00861742889999473 -0.00718113521053709 -0.00586007372556985 -0.00470286051065205 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124851010_CA_C 10 124851010 -2.24645897725556 -0.00235531432211511 -0.00289433135303696 -0.00296006531160753 -0.00276608972863635 -0.00244723489208853 -0.00208615341038509 -0.00173085986078103 -0.00140673445268824 -0.00112491489425048 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124851270_A_G 10 124851270 -1.61315974418337 -0.000695769921655209 -0.000875665552248162 -0.000898092069163692 -0.000832212088537787 -0.000725915764403062 -0.000608615379035893 -0.000496473023891664 -0.000397087102147253 -0.000312998377002049 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124851579_G_GTA 10 124851579 -1.59727070433974 -0.00061750007360839 -0.000780342350705343 -0.000800709184291648 -0.000740919496504233 -0.000644717347988877 -0.000538975306584533 -0.00043834176227886 -0.000349575886498421 -0.00027481732563972 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124851849_C_G 10 124851849 -2.20172963397072 0.000344256327158732 0.00039415271008636 0.000399565593268481 0.00038317481379968 0.000353440956338602 0.00031548803148862 0.000273580693262776 0.000231287096364774 0.000191276585144617 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124852104_G_GT 10 124852104 -2.2142673807444 -4.62757645354195e-05 -8.15929161559481e-05 -8.64752302738303e-05 -7.24257501500958e-05 -5.1720342387096e-05 -3.19376801352078e-05 -1.63778023014594e-05 -5.66040100657972e-06 8.87177957320517e-07 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124852520_C_T 10 124852520 -2.20172963397072 0.000344256327158732 0.00039415271008636 0.000399565593268481 0.00038317481379968 0.000353440956338602 0.00031548803148862 0.000273580693262776 0.000231287096364774 0.000191276585144617 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124853121_T_C 10 124853121 -2.08205539936493 -0.00420301069672924 -0.00514722500754417 -0.00526196779568089 -0.00492312010183538 -0.00436442249759983 -0.00372912664544867 -0.00310123401735352 -0.00252594669318995 -0.00202377282313959 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124853579_T_TAC 10 124853579 -1.61585361789647 -0.000345258762839418 -0.000448720538141156 -0.000461913934652891 -0.000423335086342291 -0.000362281274423282 -0.000296775425316476 -0.000236194720025562 -0.000184379079970931 -0.000142075434924838 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124853751_G_GTAAA 10 124853751 -2.20977800042084 3.70890256058409e-05 1.99281102375437e-05 1.72385022576549e-05 2.48041614621108e-05 3.47623586063328e-05 4.22377673410246e-05 4.55419703824234e-05 4.49491512908295e-05 4.1559466096075e-05 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124853754_A_AATAC 10 124853754 -2.42310750391642 -0.00816288430775636 -0.00997028315492132 -0.0101892914486599 -0.00954214613878657 -0.0084725115638733 -0.00725223239856909 -0.00604193309195722 -0.00492927684208899 -0.00395504989832229 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124853754_AATAC_A 10 124853754 -2.23742763746063 -0.00309302891731633 -0.00379548670270724 -0.0038810337245434 -0.00362852200688257 -0.00321292741721368 -0.00274150469795131 -0.00227679631973965 -0.00185210290683546 -0.00148224109781347 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124854823_C_CT 10 124854823 -1.86484506952777 0.00383919706239855 0.00464862332681681 0.00474571721880057 0.00445820147131881 0.00397890418358582 0.003425869817268 0.00287075923259517 0.00235460317835168 0.00189805451313596 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124855424_T_C 10 124855424 -1.0689563395791 0.0141172757206536 0.0171696302035032 0.0175377135128474 0.016448956230227 0.0146420365699691 0.0125693744018558 0.010501743789741 0.00859039221974855 0.00690851161051054 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124855907_G_C 10 124855907 -2.20172963397072 0.000344256327158732 0.00039415271008636 0.000399565593268481 0.00038317481379968 0.000353440956338602 0.00031548803148862 0.000273580693262776 0.000231287096364774 0.000191276585144617 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124855998_G_A 10 124855998 -2.37932547905698 -0.00861996285577771 -0.0105277396098318 -0.0107588911543872 -0.0100758489045782 -0.00894680165723827 -0.00765861847467741 -0.00638083841970261 -0.00520602792366454 -0.0041772857839284 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124856077_A_T 10 124856077 -1.16017327403508 0.00801496109916755 0.0097347198991069 0.00994177515210781 0.00932911690801985 0.00831097738559849 0.00714102523529814 0.00597172866494144 0.0048888938827627 0.00393455384169439 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124856078_A_T 10 124856078 -2.14546465344517 -0.00264064646411999 -0.0032489631421706 -0.0033232551150908 -0.00310409717180971 -0.0027442463684717 -0.00233735805750968 -0.00193764577601785 -0.00157357336004704 -0.00125747255615627 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124856242_A_T 10 124856242 -2.18709076006587 -0.00538453181998921 -0.0065864650294607 -0.00673234219202712 -0.00630143767333413 -0.0055901939405385 -0.00478027140120796 -0.0039785487658075 -0.0032428974093639 -0.00259986956338309 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124856370_C_A 10 124856370 -1.11015451493385 0.0118067214251947 0.0143554458513271 0.0146626971213943 0.0137538153112589 0.0122450014223734 0.0105136514942097 0.00878583544447276 0.00718802616565428 0.00578158250360916 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124856452_T_G 10 124856452 -2.20172963397072 0.000344256327158732 0.00039415271008636 0.000399565593268481 0.00038317481379968 0.000353440956338602 0.00031548803148862 0.000273580693262776 0.000231287096364774 0.000191276585144617 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124856781_C_A 10 124856781 -2.36227386231629 -0.00816558116262867 -0.00997388799706478 -0.010193013635666 -0.00954552323386615 -0.00847535078454831 -0.00725450724893095 -0.00604370003366839 -0.00493062235971653 -0.00395606214243127 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124857129_C_CT 10 124857129 -0.628128352211606 0.0177862952187477 0.021636776894546 0.0221012213611553 0.0207275049713487 0.0184481586819056 0.0158343421387213 0.0132276629929318 0.0108187035753233 0.00869950642747908 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124857129_C_CTT 10 124857129 -2.42446482948922 -0.00926366137701251 -0.0113108456281656 -0.0115588116243739 -0.0108260353360889 -0.00961447205302912 -0.00823167021258353 -0.00685953560296149 -0.00559753276883068 -0.00449209086004476 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124857210_G_C 10 124857210 -0.306771195812222 0.0286652508914989 0.0348902182491391 0.0356415588328018 0.033419575533935 0.0297347307371907 0.0255122158064474 0.0213044359671573 0.0174186366462514 0.0140024549121831 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124857357_T_TC 10 124857357 -0.74774913506455 0.0214291612881903 0.0260762148754976 0.0266369431252684 0.024978567685662 0.0222277202792651 0.0190744684515596 0.0159311466000784 0.013027407337197 0.0104738541329716 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124858183_C_CA 10 124858183 -2.36528455141954 -0.00827051004109691 -0.010102092818042 -0.0103240404132974 -0.00966821288590625 -0.00858426591431449 -0.00734771591222039 -0.00612133846652974 -0.00499395294071414 -0.00400686936803973 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124858338_C_CA 10 124858338 -0.773298686997732 0.0194937265888591 0.023718843168909 0.0242286059761128 0.0227209306438221 0.0202198272743854 0.0173525046840579 0.0144938466078139 0.0118527543759659 0.00952992235737105 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124858870_A_G 10 124858870 -1.99595508079809 -0.0030426223995299 -0.00373368189427925 -0.00381784076000002 -0.00356942264728266 -0.00316056964121536 -0.00269680051078636 -0.00223964438309876 -0.00182186054527822 -0.00145802291673025 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124859522_G_A 10 124859522 -2.365157793514 -0.00827230048391048 -0.0101042706382657 -0.0103262649462352 -0.009670299249668 -0.00858612299682049 -0.00734931002308947 -0.00612267027291802 -0.00499504229370284 -0.00400774539649795 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124859964_G_C 10 124859964 -1.97405365708197 -0.000543187217329155 -0.000689424284475049 -0.000707774278422058 -0.000653946936808758 -0.000567586561615485 -0.000473047903800872 -0.000383497516525999 -0.000304893758784175 -0.000239010868388512 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124860024_G_A 10 124860024 -2.36198869204334 -0.00818237968679414 -0.00999474923050592 -0.0102143760096149 -0.00956541085758689 -0.00849283648263688 -0.00726930683032956 -0.00605589201100054 -0.00494046640365386 -0.00396388882906162 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124861026_G_A 10 124861026 -0.29067413452153 0.0292743585012767 0.0356323821717037 0.0363998018739244 0.0341302799222793 0.0303666802517433 0.0260540420335156 0.021756582282018 0.0177880781062152 0.0142992751009148 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124861051_C_CA 10 124861051 -2.03795344804845 0.00555943061560837 0.00674735090256284 0.00689025720990388 0.00646733396331989 0.00576398883266815 0.00495499615532724 0.00414564596221378 0.00339543051932356 0.00273367987436712 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124861242_G_A 10 124861242 -2.20538965904691 0.000288868708465007 0.000326679151345655 0.000330631833028505 0.000318558453802353 0.000295978460458812 0.000266214181689595 0.000232457354235382 0.000197682182137449 0.000164274842282541 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124861453_TAA_T 10 124861453 -2.00910553493323 -0.00182619854521437 -0.00225222279478343 -0.00230437118373716 -0.0021506075147375 -0.0018986367507523 -0.0016145052431189 -0.00133621578900422 -0.0010834843115779 -0.000864650898003028 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124861546_CT_C 10 124861546 -0.299575679675067 0.0293182633765765 0.0356858057176668 0.0364543738184406 0.0341814550444037 0.0304122204168675 0.026093122370122 0.0217892226905265 0.0178147691794286 0.0143207342160045 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124861678_T_C 10 124861678 -2.20538965904691 0.000288868708465007 0.000326679151345655 0.000330631833028505 0.000318558453802353 0.000295978460458812 0.000266214181689595 0.000232457354235382 0.000197682182137449 0.000164274842282541 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124861767_AAC_A 10 124861767 -2.365157793514 -0.00827230048391048 -0.0101042706382657 -0.0103262649462352 -0.009670299249668 -0.00858612299682049 -0.00734931002308947 -0.00612267027291802 -0.00499504229370284 -0.00400774539649795 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124861896_C_CT 10 124861896 -2.34361530890309 -0.00980634181046591 -0.011972669645862 -0.0122350494284547 -0.0114596665769731 -0.0101775802247421 -0.0087141702195348 -0.00726192484030941 -0.00592613088375993 -0.00475596454737515 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124861896_CT_C 10 124861896 -2.11354144863348 -0.00409914225762442 -0.00502058249042037 -0.00513257112179355 -0.00480186548615391 -0.00425664741908793 -0.00363676336165764 -0.00302419255784248 -0.0024630230271776 -0.00197323604012922 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124862342_G_GTTTC 10 124862342 -0.864289568967809 0.0117991261534693 0.0143437545655223 0.0146504461140693 0.01374317670312 0.0122367825312009 0.0105078147886846 0.00878196212628257 0.00718560700752313 0.00578016109837343 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124862342_GTTTC_G 10 124862342 -2.07217144588303 -0.00258142782033044 -0.00317205002266618 -0.00324407967452078 -0.00303152917706973 -0.00268212882431484 -0.00228644336901063 -0.00189708658195631 -0.00154187258018723 -0.00123300999567011 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124862659_C_T 10 124862659 -2.00640662063191 -0.0015945570639424 -0.00197002543164304 -0.00201606566951762 -0.00188036208133902 -0.0016583151324463 -0.0014084341022178 -0.00116423409372279 -0.000942947211313605 -0.000751729697384906 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124863331_C_A 10 124863331 -1.22981344082261 0.00648215511408479 0.00786783716180706 0.00803454041488383 0.00754119935295261 0.00672080692922439 0.00577726742646778 0.00483339430462681 0.00395855802165279 0.00318694041292122 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124863667_C_T 10 124863667 -2.3114541036934 -0.00829396163175478 -0.0101306759535689 -0.0103532432628235 -0.00969558317750518 -0.008608602562032 -0.00736858267875284 -0.00613875319919099 -0.0050081841819436 -0.0040183049156246 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124863792_G_A 10 124863792 -2.41338949335586 -0.0102158448242644 -0.0124713467902331 -0.0127444957394616 -0.0119372685587149 -0.0106023974915321 -0.00907853859404728 -0.00756609550762599 -0.00617474587064493 -0.00495576633282546 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124863826_G_A 10 124863826 -2.41855974376777 -0.0101440851973633 -0.0123838778534062 -0.0126551275127089 -0.0118535151938319 -0.0105279425574754 -0.00901471897582562 -0.00751285317703587 -0.00613125311405538 -0.00492083048354353 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124863850_G_GC 10 124863850 -2.41352642132538 -0.0102144621698033 -0.012469660943899 -0.0127427732232159 -0.0119356544404736 -0.0106009628320707 -0.00907730911471605 -0.00756507000458795 -0.00617390830827391 -0.00495509366272406 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124863879_T_C 10 124863879 -2.41268773268103 -0.0102220363045333 -0.0124788964814622 -0.0127522097145749 -0.0119444969101368 -0.0106088219217635 -0.00908404396312212 -0.00757068729584676 -0.0061784959761586 -0.0049587780367748 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124864887_G_C 10 124864887 -2.41381085358942 -0.0102111816273278 -0.0124656612846996 -0.0127386865962063 -0.0119318248998526 -0.0105975589377953 -0.00907439191290926 -0.00756263668078105 -0.00617192085803975 -0.00495349743346196 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124864962_G_A 10 124864962 -2.36296427035108 -0.00948256910029599 -0.0115783613765692 -0.0118322222259506 -0.0110820300565244 -0.00984169616350661 -0.00842609600830224 -0.00702145659546582 -0.00572959297396114 -0.00459802178713353 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124864997_C_T 10 124864997 -2.14306511331013 -0.000260582271575327 -0.000344678931059939 -0.000355510086136768 -0.000323907281513769 -0.000274320602060119 -0.000221798724961975 -0.000173991441501986 -0.000133826557028982 -0.00010165157100106 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124865002_G_A 10 124865002 -2.41251023227331 -0.0102177237552032 -0.0124736416121158 -0.0127468409641547 -0.0119394648622468 -0.0106043476515545 -0.00908020794689834 -0.00756748634465332 -0.00617588064852326 -0.00495667689130519 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124865704_C_T 10 124865704 -2.356457849256 -0.00810041343117263 -0.00989494605377494 -0.0101124190637973 -0.00946982283804632 -0.00840780679078579 -0.00719637044055998 -0.00599500068627545 -0.00489069306086476 -0.00392388550016687 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124866001_C_T 10 124866001 -2.41724916412078 -0.0101958677186325 -0.0124469888190628 -0.012719607906591 -0.011913947000719 -0.0105816689704175 -0.00906077472772004 -0.00755127886086404 -0.0061626447093448 -0.00494604760820128 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124866099_A_G 10 124866099 -2.35790509730176 -0.00943119364994427 -0.0115157563517401 -0.0117682599888602 -0.0110220803751888 -0.0097883931689875 -0.00838039806867608 -0.00698332514382205 -0.00569843849875928 -0.00457299284562085 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124866319_G_A 10 124866319 -2.18814687053492 0.00103374299003889 0.00123404983497233 0.00125763382490041 0.00118751492896596 0.00106875046180654 0.000928882685671262 0.000785528494525511 0.000649649603641667 0.000527441820001417 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124867389_CA_C 10 124867389 -2.42439460706178 -0.0101842635723424 -0.0124330062519746 -0.0127053421280041 -0.0119005213782186 -0.0105696512953779 -0.00905039308598443 -0.00754255137385229 -0.00615546565041747 -0.00494024614327948 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124867437_G_A 10 124867437 -2.42181542402192 -0.0101066826092229 -0.0123382838178938 -0.0126085430100131 -0.0118098586950279 -0.0104891346011668 -0.00898145612081969 -0.00748510459873053 -0.00610858680202764 -0.00490262433592559 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124867446_C_G 10 124867446 -1.85215232914845 -0.00219644256812179 -0.00270409053415355 -0.00276612269390064 -0.00258314719750308 -0.0022828582301293 -0.00194355207155761 -0.00161048553124532 -0.00130734889056683 -0.00104434379659546 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124867617_A_G 10 124867617 0.0880066890301177 0.0386184419415754 0.0470141016771235 0.0480276683821961 0.0450303335243802 0.0400606280284359 0.0343672187321142 0.0286951992821192 0.0234585417857001 0.018855811854865 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124868004_G_T 10 124868004 -2.19059206540259 0.000953096955157218 0.00113580654021028 0.00115726448532039 0.00109343176676324 0.000985083395587338 0.00085713839380297 0.000725651487846868 0.000600719590937437 0.000488126203770367 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124868077_G_GTGA 10 124868077 -2.41899105915463 -0.0101780617338392 -0.0124252775963898 -0.0126974243405238 -0.011893159682852 -0.0105631931415644 -0.00904494157493074 -0.00753807276433704 -0.00615185904515902 -0.00493738546844558 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124868108_C_T 10 124868108 -2.14137376350746 0.00264440227928109 0.00319616521047505 0.00326221109855407 0.00306654318791511 0.00273974650405817 0.00236175554929829 0.0019813871070169 0.00162687466035072 0.00131264841277945 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124868637_C_T 10 124868637 -2.35102908566065 -0.00780264455196455 -0.00953175666351491 -0.0097413143052516 -0.00912211407945041 -0.00809881776910881 -0.00693163082493786 -0.00577423046153225 -0.00471041807656247 -0.00377912609346032 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124868925_C_T 10 124868925 -2.42306586227239 -0.0100929881505887 -0.0123215899667151 -0.0125914864714836 -0.0117938742724766 -0.0104749255068342 -0.00896927737872755 -0.00747494489504685 -0.00610028793409745 -0.00489595850814961 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124869257_C_A 10 124869257 -2.40674804533862 -0.0101603128281558 -0.0124038473720414 -0.0126755543259103 -0.0118725933059327 -0.0105448069896554 -0.00902908155322146 -0.00752475900861693 -0.00614092186447524 -0.00492855719484586 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124870301_A_T 10 124870301 -2.42454937630958 -0.00974110134836925 -0.0118928028542493 -0.0121534054533319 -0.0113832711043136 -0.0101098404746653 -0.0086562759604818 -0.0072137650952655 -0.00588689393417718 -0.00472451987113187 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124870342_G_A 10 124870342 -2.24254240705457 -0.00155228805833918 -0.001918805622525 -0.0019637739393592 -0.0018312471997417 -0.0016144878414015 -0.00137070144089391 -0.00113261520680918 -0.000917011551710978 -0.000730820734208049 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124870593_C_T 10 124870593 -2.28037853045056 -0.00134568315480665 -0.00166453583354098 -0.00170367426888918 -0.00158833865386798 -0.00139980328914646 -0.00118791410601071 -0.000981141917813844 -0.000794040732649215 -0.000632581205984195 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124870765_G_A 10 124870765 -2.34195939181553 -0.00902961921251144 -0.0110268481587883 -0.0112688079747527 -0.0105538087631345 -0.00937181748354243 -0.00802304244203489 -0.00668496244615779 -0.00545453617141289 -0.00437695428784401 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124870937_A_AAG 10 124870937 -2.37230685613492 -0.0093384519906885 -0.0114030468423576 -0.0116531449753792 -0.0109140826757277 -0.00969221702959722 -0.00829779624754634 -0.00691427904751718 -0.0056419356567381 -0.00452753650781235 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124871088_G_A 10 124871088 -2.26562276538336 -0.00537848873693392 -0.00657950314815325 -0.00672528021667285 -0.00629467842352716 -0.0055839768997612 -0.00477473843442233 -0.00397376401807215 -0.00323886218870451 -0.0025965392986782 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124871508_C_A 10 124871508 -2.41297959802709 -0.0101947873312165 -0.0124456400135347 -0.0127182258369429 -0.011912662783883 -0.0105805434293291 -0.00905982558303986 -0.00755049984142708 -0.00616201788623894 -0.00494555077041792 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124871651_C_T 10 124871651 -2.3623966394335 -0.00830651776258673 -0.0101460758460079 -0.0103689899794577 -0.00971030666917239 -0.00862164030402823 -0.00737970712410219 -0.00614799112696929 -0.0050156980025875 -0.00402431740164788 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124871717_A_G 10 124871717 -2.4120830893663 -0.0102036150574341 -0.0124564046810671 -0.0127292247990596 -0.0119229691746092 -0.0105897033553912 -0.00906767491220384 -0.00755704645855548 -0.00616736437406873 -0.0049498444409326 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124872065_T_A 10 124872065 -2.30465233503976 -0.00365486539906623 -0.00447767922271014 -0.00457770666273749 -0.00428233826081614 -0.00379550118031746 -0.00324217557751449 -0.00269557085906413 -0.00219500349589508 -0.00175824653571421 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124872284_G_T 10 124872284 -2.30348823457678 -0.00361587902324656 -0.0044301847211381 -0.0045291842032591 -0.00423685518766437 -0.00375505420705258 -0.00320749298886014 -0.0026666256327994 -0.00217135053572592 -0.00173924146888726 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124872559_C_T 10 124872559 -2.35818431673202 -0.00824558293686994 -0.0100718622175235 -0.0102931724982511 -0.00963923160842528 -0.0085584251654911 -0.00732549161127682 -0.00610273627658842 -0.00497871148928031 -0.00399459471276487 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124872986_C_A 10 124872986 -0.923706737356496 0.0113315790353465 0.0137774642370032 0.0140723123671753 0.0132001198497949 0.0117521885172498 0.010090661306521 0.00843248146293307 0.0068990280585175 0.00554919908147644 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124872994_G_A 10 124872994 -2.02341254613387 -0.00305117962125623 -0.0037442032562236 -0.0038286019425362 -0.00357947548896753 -0.00316945864580731 -0.00270437280105584 -0.00224592262039458 -0.0018269597359315 -0.00146209816974752 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124874052_A_T 10 124874052 -2.38693357409243 -0.00857135685344668 -0.0104677043984567 -0.0106974530085249 -0.0100185440549705 -0.00889626085821815 -0.00761568940437174 -0.00634534793821828 -0.00517727850068006 -0.00415436230611821 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124874155_T_G 10 124874155 -2.39775412364336 -0.0095381309483078 -0.0116458920824067 -0.0119011954574866 -0.0111467353237535 -0.00989931199233807 -0.00847557202956661 -0.00706280566807305 -0.00576342417034059 -0.00462523416929539 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124874233_C_T 10 124874233 -2.3657502195825 -0.00819925708227709 -0.0100143852244652 -0.0102343204824731 -0.0095844263876832 -0.00851021757446402 -0.00728467031510993 -0.0060690927664635 -0.00495153679656823 -0.00397298222001563 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124874407_G_A 10 124874407 -2.26168534408016 -0.00422404575473223 -0.00517125252840644 -0.00528631338163388 -0.00494649756284593 -0.0043860331397263 -0.0037484614709502 -0.00311803508838837 -0.00254017415289898 -0.0020355542196957 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124874580_T_C 10 124874580 -1.19481654392163 0.0101655306157933 0.0123577032463165 0.0126219163375412 0.0118403142047154 0.0105425674085526 0.00905306449880472 0.00756622814239627 0.00619093445407781 0.0049800925341712 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124874628_C_CT 10 124874628 -2.21762065241773 -0.0068649175779858 -0.00838864786078775 -0.00857336997435709 -0.00802758682496441 -0.00712586538941729 -0.00609771797206493 -0.00507858055624455 -0.00414219430208052 -0.00332272602368633 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124874628_C_CTT 10 124874628 -2.41714408502127 -0.00800738758039721 -0.00978066315042758 -0.00999554192893104 -0.00936060032146635 -0.00831117033048256 -0.0071139884713225 -0.00592664550866528 -0.00483513478250375 -0.00387945455836292 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124874628_CT_C 10 124874628 -2.32866712978018 -0.00982315867274641 -0.0119934445874148 -0.0122563103102897 -0.0114794955938891 -0.0101950662573653 -0.00872902015657351 -0.00727419931742856 -0.00593607209606306 -0.00476388984716758 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124875024_TG_T 10 124875024 -2.42452927878212 -0.00995743974258767 -0.0121564186176411 -0.0124227350109321 -0.0116357077968465 -0.0103342934182642 -0.00884870870390708 -0.00737433855345104 -0.00601808911237178 -0.0048299211207814 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124875025_G_T 10 124875025 -2.4074340910167 -0.0100566566558609 -0.0122776043978226 -0.0125465832484069 -0.0117516889421418 -0.0104372724088253 -0.00893685570121816 -0.00744777584257594 -0.00607800384572599 -0.00487799564911295 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124875780_A_C 10 124875780 -2.38071407300441 -0.00964567364203406 -0.0117770634939354 -0.0120352252895088 -0.0112723161652171 -0.0100109112755642 -0.00857119345630197 -0.00714254995264962 -0.00582854491132201 -0.00467752856522763 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124875816_T_G 10 124875816 -2.27825848629036 -0.00467085688263058 -0.00571556438176835 -0.00584240515522705 -0.00546775928303234 -0.00484958240206401 -0.00414595096883952 -0.00344977312729711 -0.00281126046625824 -0.00225337270792991 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124875879_A_G 10 124875879 -2.28786347712826 -0.00487631012383982 -0.00596584354113627 -0.00609809996602451 -0.00570744142188451 -0.00506273121830825 -0.00432872766074865 -0.00360231827681012 -0.00293591784287139 -0.00235353665601723 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124875900_C_T 10 124875900 -2.27825848629036 -0.00467085688263058 -0.00571556438176835 -0.00584240515522705 -0.00546775928303234 -0.00484958240206401 -0.00414595096883952 -0.00344977312729711 -0.00281126046625824 -0.00225337270792991 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124876023_T_C 10 124876023 -2.27825848629036 -0.00467085688263058 -0.00571556438176835 -0.00584240515522705 -0.00546775928303234 -0.00484958240206401 -0.00414595096883952 -0.00344977312729711 -0.00281126046625824 -0.00225337270792991 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124876126_G_C 10 124876126 -2.42442534698811 -0.00996620261597592 -0.0121671208762901 -0.0124336722754026 -0.0116459506476527 -0.0103433885272244 -0.00885649441035641 -0.00738082549002339 -0.00602338193984098 -0.00483416826723149 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124876310_G_A 10 124876310 -2.27825848629036 -0.00467085688263058 -0.00571556438176835 -0.00584240515522705 -0.00546775928303234 -0.00484958240206401 -0.00414595096883952 -0.00344977312729711 -0.00281126046625824 -0.00225337270792991 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124876408_T_A 10 124876408 -2.27825848629036 -0.00467085688263058 -0.00571556438176835 -0.00584240515522705 -0.00546775928303234 -0.00484958240206401 -0.00414595096883952 -0.00344977312729711 -0.00281126046625824 -0.00225337270792991 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124876535_G_C 10 124876535 -2.27829632805935 -0.0046719314900967 -0.00571687347664041 -0.00584374258032572 -0.00546901294301305 -0.00485069726480258 -0.00414690695804953 -0.00345057098314872 -0.00281191245252588 -0.00225389658222452 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124876564_C_CAAATAAATTAAATAAAT 10 124876564 -2.12942475684804 -0.00158894226318473 -0.00196172235023306 -0.00200739705782738 -0.00187274792744985 -0.00165230120068705 -0.00140402024302455 -0.00116116606062144 -0.000940902542695632 -0.000750411885188207 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124876568_C_CAAAT 10 124876568 -2.36215716918498 -0.00930963241113991 -0.0113680331574431 -0.0116173857781154 -0.0108805285028986 -0.00966232519673138 -0.00827211136925454 -0.00689279817253707 -0.00562434798885292 -0.00451338044114014 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124876568_C_CAAATAAAT 10 124876568 -0.648929701322621 0.0308435023885512 0.037538910758864 0.0383469638093343 0.0359572174258802 0.0319938803026778 0.0274518144402669 0.0229251617514246 0.0187445132080155 0.0150688257769223 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124876568_C_T 10 124876568 -2.18052202855304 -0.0025014134292598 -0.00307281178678309 -0.00314247042041149 -0.00293689828122723 -0.00259888188556445 -0.00221594741345088 -0.00183898574125596 -0.00149494453347643 -0.00119568940126769 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124876568_CAAATAAATAAAT_C 10 124876568 -2.39512978852103 -0.00966641362165488 -0.0118020856746277 -0.0120607589826016 -0.0112963335514102 -0.0100323896310339 -0.00858972654791046 -0.00715811151822665 -0.00584133104862161 -0.00468785065644051 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124876675_A_G 10 124876675 -2.18052202855304 -0.0025014134292598 -0.00307281178678309 -0.00314247042041149 -0.00293689828122723 -0.00259888188556445 -0.00221594741345088 -0.00183898574125596 -0.00149494453347643 -0.00119568940126769 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124876737_A_G 10 124876737 -2.18052202855304 -0.0025014134292598 -0.00307281178678309 -0.00314247042041149 -0.00293689828122723 -0.00259888188556445 -0.00221594741345088 -0.00183898574125596 -0.00149494453347643 -0.00119568940126769 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124876844_A_AT 10 124876844 -2.18052202855304 -0.0025014134292598 -0.00307281178678309 -0.00314247042041149 -0.00293689828122723 -0.00259888188556445 -0.00221594741345088 -0.00183898574125596 -0.00149494453347643 -0.00119568940126769 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124876970_G_T 10 124876970 -2.19095558457118 -0.0026607600886992 -0.0032669188210912 -0.00334077688664092 -0.00312278806469335 -0.00276419634652525 -0.0023577087879838 -0.00195730204120537 -0.0015916325090366 -0.00127338083983242 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124877181_G_A 10 124877181 -2.14942874363679 -0.00173198752737358 -0.00213550753494474 -0.0021848838874825 -0.00203928133960396 -0.00180063515418505 -0.00153144685282891 -0.00126770412651389 -0.00102810371357176 -0.00082057698983995 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124877742_C_A 10 124877742 -2.06789359669105 0.00262987934722991 0.00317820519682677 0.00324382912345111 0.00304940571873802 0.00272464288181862 0.00234893708859962 0.00197079826345536 0.00161830332058033 0.00130581835814692 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124877843_A_G 10 124877843 -2.11433783846441 -0.00499598240766241 -0.00611346990748007 -0.00624915308189644 -0.00584839244132107 -0.00518713173971674 -0.00443447616285741 -0.00368981097672227 -0.00300684247595751 -0.00241012368287175 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124879093_T_C 10 124879093 -2.14290127114632 -0.00384406929353576 -0.0047133166240183 -0.00481908880636928 -0.00450682075191633 -0.00399251706057679 -0.00340857100185765 -0.00283234773866159 -0.00230520965746672 -0.0018457033228696 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124879923_G_C 10 124879923 -2.40574668912287 -0.0102621202589086 -0.0125278272947424 -0.0128022120185198 -0.0119913327898047 -0.0106504215446259 -0.00911966590142299 -0.00760037614135145 -0.00620272648010101 -0.00497822609501108 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124880248_C_T 10 124880248 -1.75985774107497 -0.000877998601555241 -0.00109773845343986 -0.00112497966076397 -0.00104486327676945 -0.000914988452652477 -0.000770712572622845 -0.000631732887391045 -0.000507600807614583 -0.000401785470919513 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124880294_G_A 10 124880294 -1.9302822333851 -0.00663373426827363 -0.00810982892571577 -0.0082888702952415 -0.00775992850902441 -0.00688640253167883 -0.00589097568227137 -0.00490487765349767 -0.00399938193617988 -0.00320736784692865 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124880336_T_C 10 124880336 -2.3971867783626 -0.00536784219154596 -0.00656538312311383 -0.00671071122189026 -0.006281419100306 -0.0055727663806806 -0.00476568956506807 -0.00396667520303096 -0.00323341469103466 -0.00259240340282307 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124880404_C_CA 10 124880404 -1.12033853837012 0.0244027604505059 0.0296972495055932 0.0303361632276444 0.0284465890882575 0.0253124981487161 0.0217203410918638 0.0181399226250618 0.0148327803451616 0.0119247739464381 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124880404_CA_C 10 124880404 -2.1183672853477 -0.00473362071415639 -0.00579071933489494 -0.00591902499045105 -0.00554002302072565 -0.00491448643477943 -0.00420222218601563 -0.00349723797950663 -0.00285041846016343 -0.00228509379823727 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124880634_A_G 10 124880634 -2.42263994346138 -0.00980890127822365 -0.0119749417421993 -0.0122372647243512 -0.011462035241383 -0.0101801143860145 -0.00871675898083257 -0.0072644269715183 -0.00592843026074208 -0.00475799007795086 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124880658_C_G 10 124880658 -1.54338394539326 0.0054732181143331 0.006636598101601 0.00677638902011513 0.00636258344555207 0.00567375159401484 0.00488046004011222 0.0040857800176437 0.00334825903324143 0.00269700196257672 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124880660_C_G 10 124880660 -2.42416829065956 -0.00990841040868284 -0.0120961267053583 -0.0123610679168187 -0.0115780975040773 -0.0102833454460893 -0.00880529657188056 -0.0073383332879815 -0.00598883475236445 -0.00480653243049112 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124880710_T_C 10 124880710 -2.40511428331389 -0.0101195205057487 -0.0123543389114853 -0.01262499780101 -0.0118251391319895 -0.0105025123221472 -0.00899272201284296 -0.00749433786698495 -0.00611600569828985 -0.00490849710312347 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124881424_C_T 10 124881424 -1.36919701492274 0.00999673883925034 0.0121489779011763 0.0124082824073661 0.0116411348917662 0.0103670205340629 0.00890407623996703 0.00744315622964464 0.00609131454102307 0.00490071372014 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124882052_C_T 10 124882052 -1.82420838934042 0.0022928177461039 0.0027633036003798 0.00281940697305805 0.00265306005708954 0.00237438141671209 0.00205074471555555 0.00172371164138019 0.00141772511562177 0.00114558890483929 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124883060_G_A 10 124883060 -1.90172214488359 0.000577834630465457 0.000674749179790268 0.000685735811911226 0.000652797970321473 0.000595242745631719 0.000524817245715514 0.00044992479634498 0.000376620248644155 0.000308915212811733 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124883112_C_G 10 124883112 -1.89887216225045 0.000537991937034743 0.000626233357059025 0.000636172694886472 0.000606331851326747 0.000553910421083437 0.000489364593859509 0.000420327911155205 0.000352428055699683 0.00028947216448838 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124883394_T_TATTC 10 124883394 -2.38825389806601 -0.00878270817768545 -0.0107237598647796 -0.0109588724170124 -0.0102640787576536 -0.00911532698727369 -0.00780423307672162 -0.00650327561321129 -0.00530675905511302 -0.00425869852260474 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124883535_T_A 10 124883535 -2.38883035182704 -0.00876769840310532 -0.0107054703526681 -0.01094018654477 -0.0102465647594068 -0.009099754282758 -0.00779088174651577 -0.00649213456308928 -0.00529765622368883 -0.00425138528198943 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124883669_A_G 10 124883669 -2.02483811858609 0.00281448798610162 0.00340305047823719 0.0034735314263088 0.00326474527569998 0.00291618593853871 0.00251323571720308 0.00210796785899481 0.00173043429351782 0.00139594610687688 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124883741_G_T 10 124883741 -1.90172214488359 0.000577834630465457 0.000674749179790268 0.000685735811911226 0.000652797970321473 0.000595242745631719 0.000524817245715514 0.00044992479634498 0.000376620248644155 0.000308915212811733 -UROS.11248.43.3..1 chr10:124823285-126823285 chr10_124883757_G_T 10 124883757 -2.23798452352228 -0.00323497440754217 -0.00396937981280354 -0.00405881135783215 -0.00379482448051105 -0.00336031726431107 -0.00286740186100642 -0.00238146493585711 -0.00193733324513268 -0.00155051237063519 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1004625_A_G 1 1004625 31.6611029091783 -0.202959075620786 -0.202130730314703 -0.20961965656411 -0.220935552186467 -0.231637998079872 -0.238725148211663 -0.24082771183179 -0.237793637394013 -0.230147752064775 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1005429_C_CA 1 1005429 32.221487936929 -0.195965362215776 -0.195159905703761 -0.202442615288331 -0.213450908310367 -0.223867149857143 -0.230767346101171 -0.232814731479065 -0.229859524607447 -0.222413891866702 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1005429_C_CA 1 1005429 32.221487936929 -0.195965362215776 -0.195159905703761 -0.202442615288331 -0.213450908310367 -0.223867149857143 -0.230767346101171 -0.232814731479065 -0.229859524607447 -0.222413891866702 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1005904_C_T 1 1005904 32.2201611534391 -0.195991762843284 -0.195186220371416 -0.202469703004364 -0.213479148809414 -0.223896462401234 -0.230797358533138 -0.232844950282998 -0.229889448070568 -0.222443065660339 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1005954_G_A 1 1005954 32.2307888818185 -0.19578105619256 -0.19497623026446 -0.202253251284341 -0.213253021383546 -0.223661291151955 -0.230556257657266 -0.232602096369102 -0.229649101642647 -0.222209070783182 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1006159_C_T 1 1006159 31.136467292701 -0.209990149528141 -0.209138719976353 -0.216835315041259 -0.228461477008492 -0.239453018312253 -0.246729173853613 -0.248887512499901 -0.245773726161243 -0.237925750736744 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1007746_CTTTTTTTTTTTTTT_C 1 1007746 32.9559850984322 -0.038316648883864 -0.0380238494960996 -0.0406893282630594 -0.0448305028253735 -0.04887519730207 -0.051621545891178 -0.0524445636406954 -0.0512453110009252 -0.0482663766774842 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1008088_T_C 1 1008088 1.37412874657265 0.327702121948042 0.326632473445201 0.336381513669191 0.350847368073308 0.364259715641688 0.373012957292964 0.375606404726796 0.37190257498043 0.362433775328293 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1008307_G_C 1 1008307 32.5456111578158 -0.194700204398019 -0.193898250606131 -0.201149852637882 -0.212113006036389 -0.222488569395536 -0.229362885636536 -0.231402755433439 -0.228458430823835 -0.22104090567139 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1008307_G_C 1 1008307 32.5456111578158 -0.194700204398019 -0.193898250606131 -0.201149852637882 -0.212113006036389 -0.222488569395536 -0.229362885636536 -0.231402755433439 -0.228458430823835 -0.22104090567139 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1009184_T_C 1 1009184 32.5611388775887 -0.194290897321716 -0.193490230314105 -0.200730245427149 -0.211676195244074 -0.222035837874369 -0.228899799643638 -0.230936619539856 -0.227996653323558 -0.220590227413268 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1009184_T_C 1 1009184 32.5611388775887 -0.194290897321716 -0.193490230314105 -0.200730245427149 -0.211676195244074 -0.222035837874369 -0.228899799643638 -0.230936619539856 -0.227996653323558 -0.220590227413268 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1009716_C_T 1 1009716 2.71104079064173 0.494816635254447 0.493192703993635 0.50794034053314 0.529835423625354 0.550140638470176 0.563389361772642 0.567308216212787 0.561697932338878 0.547375550942794 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1009731_C_T 1 1009731 2.82150405521786 0.540342129323484 0.538562825668397 0.554719332306166 0.57871213290032 0.600969016749107 0.615494215722806 0.619790737162891 0.61363890191046 0.597937184159163 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1010094_C_T 1 1010094 2.24493920482519 0.411968335337582 0.41061718608088 0.422907017935993 0.441149233643318 0.458066284407125 0.469106147268475 0.472374152232941 0.467700270944111 0.455760977707615 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1010232_C_T 1 1010232 1.91202916188143 0.359803709417789 0.358629772331635 0.36932105260348 0.385180619164498 0.399879856468536 0.409469520222412 0.41230946464606 0.408251380242343 0.397876921424855 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1010481_ATTAT_A 1 1010481 1.77173807821859 0.32367057938181 0.322617627724349 0.332215602605901 0.34644932972347 0.359638760397212 0.368242813192753 0.370791808350112 0.367151750023736 0.357841775774211 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1010481_ATTAT_A 1 1010481 1.77173807821859 0.32367057938181 0.322617627724349 0.332215602605901 0.34644932972347 0.359638760397212 0.368242813192753 0.370791808350112 0.367151750023736 0.357841775774211 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1010481_ATTAT_A 1 1010481 1.77173807821859 0.32367057938181 0.322617627724349 0.332215602605901 0.34644932972347 0.359638760397212 0.368242813192753 0.370791808350112 0.367151750023736 0.357841775774211 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1010747_GT_G 1 1010747 1.75810715157238 0.324689439194826 0.323633006588353 0.33326218961169 0.347542511100306 0.360775365411779 0.369407817534277 0.371965166683577 0.368313013217345 0.358972409164632 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1010755_T_G 1 1010755 1.75810715157238 0.324689439194826 0.323633006588353 0.33326218961169 0.347542511100306 0.360775365411779 0.369407817534277 0.371965166683577 0.368313013217345 0.358972409164632 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1011654_G_A 1 1011654 2.12169251532154 0.366460361798816 0.365265937516033 0.376141030220329 0.392271491754132 0.407219493507684 0.416970000694678 0.419857141457765 0.415731154253511 0.40518294886129 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1013041_A_AAAC 1 1013041 2.00781021970105 0.362130484724756 0.360948644878316 0.371710067829663 0.387674855396944 0.402472485475907 0.412126585625301 0.414985420016895 0.410899869364181 0.400456289240032 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1013312_G_A 1 1013312 2.389285291571 0.421166743866767 0.419791984834859 0.432290918596263 0.450834183229199 0.468019640467544 0.479228539567116 0.482545253802808 0.477800748778892 0.465678374403175 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1013490_C_G 1 1013490 1.89538630594497 0.353868820883007 0.352714933468769 0.363225203506876 0.378815233728964 0.393263825335052 0.402689707855315 0.405481310490667 0.40149274816844 0.39129510202527 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1013541_T_C 1 1013541 1.89945161357021 0.354469519574711 0.353313593450461 0.363842259202563 0.379459698800289 0.393933795126526 0.40337635110558 0.40617287510971 0.402177232445623 0.391961588903811 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1013855_G_A 1 1013855 1.89454057580979 0.354052298424014 0.352897840814967 0.363413213754561 0.379010797399775 0.393466353945608 0.402896750555113 0.405689674590678 0.401699201839202 0.391496694616208 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1014228_G_A 1 1014228 32.6418654124405 -0.180802804248279 -0.180043852226177 -0.186910406121919 -0.197302938239478 -0.207151747100776 -0.213684283615382 -0.215623798363078 -0.212823520223575 -0.205772783096275 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1014228_G_A 1 1014228 32.6418654124405 -0.180802804248279 -0.180043852226177 -0.186910406121919 -0.197302938239478 -0.207151747100776 -0.213684283615382 -0.215623798363078 -0.212823520223575 -0.205772783096275 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1014545_C_T 1 1014545 1.20281022405445 0.267916639443903 0.267054936386071 0.274920572041875 0.286572348338058 0.297357277091691 0.304387754403386 0.306471311531062 0.303499065561979 0.295888238410321 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1014863_A_C 1 1014863 30.7102047260281 -0.214696165616663 -0.213828713720751 -0.221669131092052 -0.233511900561953 -0.244706884587611 -0.252116790069818 -0.254314638616173 -0.251144400637229 -0.243154229162276 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1014863_A_C 1 1014863 30.7102047260281 -0.214696165616663 -0.213828713720751 -0.221669131092052 -0.233511900561953 -0.244706884587611 -0.252116790069818 -0.254314638616173 -0.251144400637229 -0.243154229162276 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1015336_A_T 1 1015336 1.73768783536195 0.324567751473565 0.323512736755249 0.33312889658469 0.347388308919715 0.360600087747298 0.369217920528146 0.371770851149316 0.368125094883084 0.3588001019182 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1015925_A_ATT 1 1015925 31.1469863801803 -0.208655966377409 -0.207808108361277 -0.215472257639861 -0.227051979795868 -0.238002207875466 -0.245252278704403 -0.247402975301871 -0.244300066522567 -0.236480736750293 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1015925_A_ATT 1 1015925 31.1469863801803 -0.208655966377409 -0.207808108361277 -0.215472257639861 -0.227051979795868 -0.238002207875466 -0.245252278704403 -0.247402975301871 -0.244300066522567 -0.236480736750293 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1015925_A_ATT 1 1015925 31.1469863801803 -0.208655966377409 -0.207808108361277 -0.215472257639861 -0.227051979795868 -0.238002207875466 -0.245252278704403 -0.247402975301871 -0.244300066522567 -0.236480736750293 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1015925_A_ATT 1 1015925 31.1469863801803 -0.208655966377409 -0.207808108361277 -0.215472257639861 -0.227051979795868 -0.238002207875466 -0.245252278704403 -0.247402975301871 -0.244300066522567 -0.236480736750293 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1015925_A_ATT 1 1015925 31.1469863801803 -0.208655966377409 -0.207808108361277 -0.215472257639861 -0.227051979795868 -0.238002207875466 -0.245252278704403 -0.247402975301871 -0.244300066522567 -0.236480736750293 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1015925_A_ATT 1 1015925 31.1469863801803 -0.208655966377409 -0.207808108361277 -0.215472257639861 -0.227051979795868 -0.238002207875466 -0.245252278704403 -0.247402975301871 -0.244300066522567 -0.236480736750293 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1015950_G_A 1 1015950 0.884176683134713 0.195840705162238 0.195221010473166 0.200903976121869 0.209307445943998 0.217074019195252 0.222133581488668 0.223635775784243 0.221499942951625 0.216016641813618 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1016623_G_A 1 1016623 30.9861128754779 -0.210464865129048 -0.209610975855757 -0.217329439548726 -0.22899041237872 -0.240016470987136 -0.247316190638715 -0.249481554483427 -0.246357691513044 -0.23848526219243 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1017114_A_AT 1 1017114 31.024532551894 -0.210241815843817 -0.209388737406676 -0.217099878090532 -0.228749800882685 -0.239765441432763 -0.247058283430011 -0.249221606899604 -0.246100659800412 -0.238235582934101 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1017114_A_AT 1 1017114 31.024532551894 -0.210241815843817 -0.209388737406676 -0.217099878090532 -0.228749800882685 -0.239765441432763 -0.247058283430011 -0.249221606899604 -0.246100659800412 -0.238235582934101 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1017114_A_AT 1 1017114 31.024532551894 -0.210241815843817 -0.209388737406676 -0.217099878090532 -0.228749800882685 -0.239765441432763 -0.247058283430011 -0.249221606899604 -0.246100659800412 -0.238235582934101 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1017114_A_AT 1 1017114 31.024532551894 -0.210241815843817 -0.209388737406676 -0.217099878090532 -0.228749800882685 -0.239765441432763 -0.247058283430011 -0.249221606899604 -0.246100659800412 -0.238235582934101 +ISG15.14148.2.3..1 chr1:1138-2001138 chr1_1022518_G_T 1 1022518 31.2281969352636 0.0634418252095963 0.0633989654806943 0.0637694407831844 0.0641436052558637 0.0642910202108671 0.0642800157751431 0.0642644363690663 0.0643069267573702 0.0643407268154315 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1004625_A_G 1 1004625 60.3575665524951 -1.78394536462074 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1005429_C_CA 1 1005429 61.024165250281 -1.73525144796326 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1005429_C_CA 1 1005429 61.024165250281 -1.73525144796326 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1005904_C_T 1 1005904 61.0221011561896 -1.73539907793572 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1005954_G_A 1 1005954 60.9716665050324 -1.73683060605725 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1006159_C_T 1 1006159 59.9874687452069 -1.78614534920697 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1007746_CTTTTTTTTTTTTTT_C 1 1007746 56.5256228788874 -1.19720405745599 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1008088_T_C 1 1008088 -0.907129234395213 25.243875269357 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1008307_G_C 1 1008307 65.6918304267104 -1.45884813824405 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1008307_G_C 1 1008307 65.6918304267104 -1.45884813824405 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1009184_T_C 1 1009184 65.771960163951 -1.44456525071005 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1009184_T_C 1 1009184 65.771960163951 -1.44456525071005 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1009716_C_T 1 1009716 -0.447856745307585 25.2017785595101 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1009731_C_T 1 1009731 -0.445841857837749 25.4269874827134 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1010094_C_T 1 1010094 -0.649054852993949 25.4428175271184 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1010232_C_T 1 1010232 -0.829276587086672 25.6799899493842 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1010481_ATTAT_A 1 1010481 -0.836262770837368 25.4389216459073 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1010481_ATTAT_A 1 1010481 -0.836262770837368 25.4389216459073 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1010481_ATTAT_A 1 1010481 -0.836262770837368 25.4389216459073 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1010747_GT_G 1 1010747 -0.826170274979466 25.5495904572018 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1010755_T_G 1 1010755 -0.826170274979466 25.5495904572018 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1011654_G_A 1 1011654 -0.721925093708164 25.2119185314773 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1013041_A_AAAC 1 1013041 -0.683178587820276 25.4495400510799 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1013312_G_A 1 1013312 -0.646682543118569 25.7383508815934 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1013490_C_G 1 1013490 -0.830608796417208 25.621658716628 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1013541_T_C 1 1013541 -0.836649880908032 25.5935368859696 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1013855_G_A 1 1013855 -0.833887346945684 25.6126978286785 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1014228_G_A 1 1014228 68.87357824629 -1.44285040965178 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1014228_G_A 1 1014228 68.87357824629 -1.44285040965178 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1014545_C_T 1 1014545 -1.01063070074985 26.0676629268844 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1014863_A_C 1 1014863 65.3967593369157 -1.45885206221497 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1014863_A_C 1 1014863 65.3967593369157 -1.45885206221497 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1015336_A_T 1 1015336 -0.739349906651994 25.7895461533593 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1015925_A_ATT 1 1015925 65.2749061618345 -1.41461869181183 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1015925_A_ATT 1 1015925 65.2749061618345 -1.41461869181183 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1015925_A_ATT 1 1015925 65.2749061618345 -1.41461869181183 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1015925_A_ATT 1 1015925 65.2749061618345 -1.41461869181183 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1015925_A_ATT 1 1015925 65.2749061618345 -1.41461869181183 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1015925_A_ATT 1 1015925 65.2749061618345 -1.41461869181183 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1015950_G_A 1 1015950 -1.01506118885836 25.5809772743873 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1016623_G_A 1 1016623 65.0485663530216 -1.4274791103387 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1017114_A_AT 1 1017114 65.3771920394664 -1.39957248113441 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1017114_A_AT 1 1017114 65.3771920394664 -1.39957248113441 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1017114_A_AT 1 1017114 65.3771920394664 -1.39957248113441 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1017114_A_AT 1 1017114 65.3771920394664 -1.39957248113441 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +ISG15.14151.4.3..1 chr1:1138-2001138 chr1_1022518_G_T 1 1022518 59.6277992377165 -0.0390342701330888 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 diff --git a/tests/gentropy/datasource/eqtl_catalogue/test_eqtl_catalogue.py b/tests/gentropy/datasource/eqtl_catalogue/test_eqtl_catalogue.py index ce892128a..4e7a87cc4 100644 --- a/tests/gentropy/datasource/eqtl_catalogue/test_eqtl_catalogue.py +++ b/tests/gentropy/datasource/eqtl_catalogue/test_eqtl_catalogue.py @@ -4,6 +4,7 @@ import pytest from pyspark.sql import DataFrame +from pyspark.sql import functions as f from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus @@ -41,12 +42,27 @@ def test_parse_susie_results( ) -def test_credsets_from_susie_results(processed_finemapping_df: DataFrame) -> None: - """Test creating a study locus from SuSIE results.""" - assert isinstance( - EqtlCatalogueFinemapping.from_susie_results(processed_finemapping_df), - StudyLocus, - ) +class TestEqtlCatalogueStudyLocus: + """Test the correctness of the study locus dataset from eQTL Catalogue.""" + + @pytest.fixture(autouse=True) + def _setup(self, processed_finemapping_df: DataFrame) -> DataFrame: + """Set up the test.""" + self.study_locus = EqtlCatalogueFinemapping.from_susie_results( + processed_finemapping_df + ) + + def test_credsets_from_susie_results(self: TestEqtlCatalogueStudyLocus) -> None: + """Test creating a study locus from SuSIE results.""" + assert isinstance(self.study_locus, StudyLocus) + + def test_locus_uniqueness(self: TestEqtlCatalogueStudyLocus) -> None: + """Test the uniqueness of the locus.""" + find_discrepancies = self.study_locus.df.select( + f.size("locus").alias("locus_size"), + f.size(f.array_distinct("locus")).alias("locus_distinct_size"), + ).filter(f.col("locus_size") != f.col("locus_distinct_size")) + assert find_discrepancies.count() == 0 def test_studies_from_susie_results(processed_finemapping_df: DataFrame) -> None: From 3ef43a9998534b0656af64cf4f5e560a40799346 Mon Sep 17 00:00:00 2001 From: Yakov Date: Thu, 17 Oct 2024 12:27:10 +0100 Subject: [PATCH 111/188] chore: adding logging even when no CS in locus (#848) * chore: adding logging even when no CS in locus * fix: addin CS count to log --- src/gentropy/susie_finemapper.py | 52 +++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index 25adbccbe..5ec347e6a 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -127,7 +127,6 @@ def __init__( if result_logging["study_locus"] is not None: # Write result df = result_logging["study_locus"].df - df = df.withColumn("qualityControls", f.lit(None)) df = df.withColumn( "qualityControls", @@ -137,8 +136,8 @@ def __init__( StudyLocusQualityCheck.OUT_OF_SAMPLE_LD, ), ) - df.write.mode(session.write_mode).parquet(study_locus_output) + if result_logging["log"] is not None: # Write log result_logging["log"].to_parquet( study_locus_output + ".log", @@ -600,21 +599,40 @@ def susie_finemapper_from_prepared_dataframes( end_time = time.time() - log_df = pd.DataFrame( - { - "studyId": studyId, - "region": region, - "N_gwas_before_dedupl": N_gwas_before_dedupl, - "N_gwas": N_gwas, - "N_ld": N_ld, - "N_overlap": N_after_merge, - "N_outliers": N_outliers, - "N_imputed": N_imputed, - "N_final_to_fm": len(ld_to_fm), - "elapsed_time": end_time - start_time, - }, - index=[0], - ) + if study_locus is not None: + log_df = pd.DataFrame( + { + "studyId": studyId, + "region": region, + "N_gwas_before_dedupl": N_gwas_before_dedupl, + "N_gwas": N_gwas, + "N_ld": N_ld, + "N_overlap": N_after_merge, + "N_outliers": N_outliers, + "N_imputed": N_imputed, + "N_final_to_fm": len(ld_to_fm), + "elapsed_time": end_time - start_time, + "number_of_CS": study_locus.df.count(), + }, + index=[0], + ) + else: + log_df = pd.DataFrame( + { + "studyId": studyId, + "region": region, + "N_gwas_before_dedupl": N_gwas_before_dedupl, + "N_gwas": N_gwas, + "N_ld": N_ld, + "N_overlap": N_after_merge, + "N_outliers": N_outliers, + "N_imputed": N_imputed, + "N_final_to_fm": len(ld_to_fm), + "elapsed_time": end_time - start_time, + "number_of_CS": 0, + }, + index=[0], + ) return { "study_locus": study_locus, From d33f66a91a950b029078d15de905b770db07042d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Thu, 17 Oct 2024 13:52:04 +0100 Subject: [PATCH 112/188] feat(l2g): limit colocalisation neighbourhood to protein coding genes (#847) * feat(l2g): wip - limit colocalisation neighbourhood to protein coding genes * feat: adjust logic in `common_neighbourhood_colocalisation_feature_logic` * fix: correct logic in `common_neighbourhood_vep_feature_logic` * chore: adjust tests --- .../dataset/l2g_features/colocalisation.py | 34 +++++--- src/gentropy/dataset/l2g_features/vep.py | 36 ++++---- tests/gentropy/dataset/test_l2g_feature.py | 82 +++++++++---------- 3 files changed, 76 insertions(+), 76 deletions(-) diff --git a/src/gentropy/dataset/l2g_features/colocalisation.py b/src/gentropy/dataset/l2g_features/colocalisation.py index c61daa909..319a128da 100644 --- a/src/gentropy/dataset/l2g_features/colocalisation.py +++ b/src/gentropy/dataset/l2g_features/colocalisation.py @@ -5,10 +5,10 @@ from typing import TYPE_CHECKING, Any import pyspark.sql.functions as f -from pyspark.sql import Window from gentropy.common.spark_helpers import convert_from_wide_to_long from gentropy.dataset.colocalisation import Colocalisation +from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.l2g_features.l2g_feature import L2GFeature from gentropy.dataset.l2g_gold_standard import L2GGoldStandard from gentropy.dataset.study_index import StudyIndex @@ -77,6 +77,7 @@ def common_neighbourhood_colocalisation_feature_logic( *, colocalisation: Colocalisation, study_index: StudyIndex, + gene_index: GeneIndex, study_locus: StudyLocus, ) -> DataFrame: """Wrapper to call the logic that creates a type of colocalisation features. @@ -89,6 +90,7 @@ def common_neighbourhood_colocalisation_feature_logic( qtl_types (list[str] | str): The types of QTL to filter the data by colocalisation (Colocalisation): Dataset with the colocalisation results study_index (StudyIndex): Study index to fetch study type and gene + gene_index (GeneIndex): Gene index to add gene type study_locus (StudyLocus): Study locus to traverse between colocalisation and study index Returns: @@ -105,15 +107,21 @@ def common_neighbourhood_colocalisation_feature_logic( colocalisation=colocalisation, study_index=study_index, study_locus=study_locus, + ).join(gene_index.df.select("geneId", "biotype"), "geneId", "left") + # Compute average score in the vicinity (feature will be the same for any gene associated with a studyLocus) + # (non protein coding genes in the vicinity are excluded see #3552) + regional_mean_per_study_locus = ( + local_max.filter(f.col("biotype") == "protein_coding") + .groupBy("studyLocusId") + .agg(f.mean(local_feature_name).alias("regional_mean")) ) return ( - # Then compute maximum score in the vicinity (feature will be the same for any gene associated with a studyLocus) - local_max.withColumn( - "regional_maximum", - f.max(local_feature_name).over(Window.partitionBy("studyLocusId")), + local_max.join(regional_mean_per_study_locus, "studyLocusId", "left") + .withColumn( + feature_name, + f.col(local_feature_name) - f.coalesce(f.col("regional_mean"), f.lit(0.0)), ) - .withColumn(feature_name, f.col("regional_maximum") - f.col(local_feature_name)) - .drop("regional_maximum", local_feature_name) + .drop("regional_mean", local_feature_name, "biotype") ) @@ -163,7 +171,7 @@ def compute( class EQtlColocClppMaximumNeighbourhoodFeature(L2GFeature): """Max CLPP for each (study, locus) aggregating over all eQTLs.""" - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_dependency_type = [Colocalisation, StudyIndex, GeneIndex, StudyLocus] feature_name = "eQtlColocClppMaximumNeighbourhood" @classmethod @@ -248,7 +256,7 @@ def compute( class PQtlColocClppMaximumNeighbourhoodFeature(L2GFeature): """Max CLPP for each (study, locus, gene) aggregating over all pQTLs.""" - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_dependency_type = [Colocalisation, StudyIndex, GeneIndex, StudyLocus] feature_name = "pQtlColocClppMaximumNeighbourhood" @classmethod @@ -332,7 +340,7 @@ def compute( class SQtlColocClppMaximumNeighbourhoodFeature(L2GFeature): """Max CLPP for each (study, locus, gene) aggregating over all sQTLs.""" - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_dependency_type = [Colocalisation, StudyIndex, GeneIndex, StudyLocus] feature_name = "sQtlColocClppMaximumNeighbourhood" @classmethod @@ -416,7 +424,7 @@ def compute( class EQtlColocH4MaximumNeighbourhoodFeature(L2GFeature): """Max H4 for each (study, locus) aggregating over all eQTLs.""" - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_dependency_type = [Colocalisation, StudyIndex, GeneIndex, StudyLocus] feature_name = "eQtlColocH4MaximumNeighbourhood" @classmethod @@ -500,7 +508,7 @@ def compute( class PQtlColocH4MaximumNeighbourhoodFeature(L2GFeature): """Max H4 for each (study, locus) aggregating over all pQTLs.""" - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_dependency_type = [Colocalisation, StudyIndex, GeneIndex, StudyLocus] feature_name = "pQtlColocH4MaximumNeighbourhood" @classmethod @@ -584,7 +592,7 @@ def compute( class SQtlColocH4MaximumNeighbourhoodFeature(L2GFeature): """Max H4 for each (study, locus) aggregating over all sQTLs.""" - feature_dependency_type = [Colocalisation, StudyIndex, StudyLocus] + feature_dependency_type = [Colocalisation, StudyIndex, GeneIndex, StudyLocus] feature_name = "sQtlColocH4MaximumNeighbourhood" @classmethod diff --git a/src/gentropy/dataset/l2g_features/vep.py b/src/gentropy/dataset/l2g_features/vep.py index 13ac05f91..557d9a509 100644 --- a/src/gentropy/dataset/l2g_features/vep.py +++ b/src/gentropy/dataset/l2g_features/vep.py @@ -5,7 +5,6 @@ from typing import TYPE_CHECKING, Any import pyspark.sql.functions as f -from pyspark.sql import Window from gentropy.common.spark_helpers import convert_from_wide_to_long from gentropy.dataset.gene_index import GeneIndex @@ -92,34 +91,29 @@ def common_neighbourhood_vep_feature_logic( DataFrame: Feature dataset """ local_feature_name = feature_name.replace("Neighbourhood", "") - # First compute mean distances to a gene local_metric = common_vep_feature_logic( study_loci_to_annotate, feature_name=local_feature_name, variant_index=variant_index, + ).join( + # Bring gene classification + gene_index.df.select("geneId", "biotype"), + "geneId", + "inner", + ) + # Compute average score in the vicinity (feature will be the same for any gene associated with a studyLocus) + # (non protein coding genes in the vicinity are excluded see #3552) + regional_mean_per_study_locus = ( + local_metric.filter(f.col("biotype") == "protein_coding") + .groupBy("studyLocusId") + .agg(f.mean(local_feature_name).alias("regional_mean")) ) return ( - # Then compute mean distance in the vicinity (feature will be the same for any gene associated with a studyLocus) - local_metric.join( - # Bring gene classification - gene_index.df.select("geneId", "biotype"), - "geneId", - "inner", - ) + local_metric.join(regional_mean_per_study_locus, "studyLocusId", "left") .withColumn( - "regional_metric", - f.coalesce( - # Calculate mean based on protein coding genes - f.mean( - f.when( - f.col("biotype") == "protein_coding", f.col(local_feature_name) - ) - ).over(Window.partitionBy("studyLocusId")), - # Default to 0 if there are no protein coding genes - f.lit(0), - ), + feature_name, + f.col(local_feature_name) - f.coalesce(f.col("regional_mean"), f.lit(0.0)), ) - .withColumn(feature_name, f.col(local_feature_name) - f.col("regional_metric")) .drop("regional_metric", local_feature_name, "biotype") ) diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py index 6d3b1b3af..bd6fca97c 100644 --- a/tests/gentropy/dataset/test_l2g_feature.py +++ b/tests/gentropy/dataset/test_l2g_feature.py @@ -119,6 +119,29 @@ def test_feature_factory_return_type( assert isinstance(feature_dataset, L2GFeature) +@pytest.fixture(scope="module") +def sample_gene_index(spark: SparkSession) -> GeneIndex: + """Create a sample gene index for testing.""" + return GeneIndex( + _df=spark.createDataFrame( + [ + { + "geneId": "gene1", + "biotype": "protein_coding", + "chromosome": "1", + }, + { + "geneId": "gene2", + "biotype": "lncRNA", + "chromosome": "1", + }, + ], + GeneIndex.get_schema(), + ), + _schema=GeneIndex.get_schema(), + ) + + class TestCommonColocalisationFeatureLogic: """Test the common logic of the colocalisation features.""" @@ -161,7 +184,9 @@ def test__common_colocalisation_feature_logic( ), "The feature values are not as expected." def test__common_neighbourhood_colocalisation_feature_logic( - self: TestCommonColocalisationFeatureLogic, spark: SparkSession + self: TestCommonColocalisationFeatureLogic, + spark: SparkSession, + sample_gene_index: GeneIndex, ) -> None: """Test the common logic of the neighbourhood colocalisation features.""" feature_name = "eQtlColocH4MaximumNeighbourhood" @@ -174,18 +199,20 @@ def test__common_neighbourhood_colocalisation_feature_logic( colocalisation=self.sample_colocalisation, study_index=self.sample_studies, study_locus=self.sample_study_locus, - ) + gene_index=sample_gene_index, + ).withColumn(feature_name, f.round(f.col(feature_name), 2)) + # expected average is (0.81)/1 = 0.81 expected_df = spark.createDataFrame( [ { "studyLocusId": "1", "geneId": "gene1", - "eQtlColocH4MaximumNeighbourhood": 0.08999999999999997, + "eQtlColocH4MaximumNeighbourhood": 0.0, # 0.81 - 0.81 }, { "studyLocusId": "1", "geneId": "gene2", - "eQtlColocH4MaximumNeighbourhood": 0.0, + "eQtlColocH4MaximumNeighbourhood": 0.09, # 0.9 - 0.81 }, ], ).select("studyLocusId", "geneId", "eQtlColocH4MaximumNeighbourhood") @@ -213,6 +240,7 @@ def _setup(self: TestCommonColocalisationFeatureLogic, spark: SparkSession) -> N ), _schema=StudyLocus.get_schema(), ) + self.sample_colocalisation = Colocalisation( _df=spark.createDataFrame( [ @@ -378,7 +406,7 @@ def test_common_distance_feature_logic( observed_df.collect() == expected_df.collect() ), f"Expected and observed dataframes are not equal for feature {feature_name}." - def test_common_neighbourhood_colocalisation_feature_logic( + def test_common_neighbourhood_distance_feature_logic( self: TestCommonDistanceFeatureLogic, spark: SparkSession, ) -> None: @@ -564,30 +592,13 @@ def test_common_vep_feature_logic( def test_common_neighbourhood_vep_feature_logic_no_protein_coding( self: TestCommonVepFeatureLogic, spark: SparkSession, + sample_gene_index: GeneIndex, ) -> None: """Test the logic of the function that extracts the maximum severity score for a gene given the average of the maximum scores for all protein coding genes in the vicinity. Because the genes in the vicinity are all non coding, the neighbourhood features should equal the local ones. """ feature_name = "vepMaximumNeighbourhood" - sample_gene_index = GeneIndex( - _df=spark.createDataFrame( - [ - { - "geneId": "gene1", - "biotype": "lncRNA", - "chromosome": "1", - }, - { - "geneId": "gene2", - "biotype": "lncRNA", - "chromosome": "1", - }, - ], - GeneIndex.get_schema(), - ), - _schema=GeneIndex.get_schema(), - ) observed_df = ( common_neighbourhood_vep_feature_logic( self.sample_study_locus, @@ -601,7 +612,11 @@ def test_common_neighbourhood_vep_feature_logic_no_protein_coding( ) expected_df = ( spark.createDataFrame( - (["1", "gene1", 0.66], ["1", "gene2", 1.0]), + # regional mean is 0.66 + ( + ["1", "gene1", 0.0], + ["1", "gene2", 0.34], + ), # (0.66-0.66) and (1.0 -0.66) ["studyLocusId", "geneId", feature_name], ) .orderBy(feature_name) @@ -614,27 +629,10 @@ def test_common_neighbourhood_vep_feature_logic_no_protein_coding( def test_common_neighbourhood_vep_feature_logic( self: TestCommonVepFeatureLogic, spark: SparkSession, + sample_gene_index: GeneIndex, ) -> None: """Test the logic of the function that extracts the maximum severity score for a gene given the average of the maximum scores for all protein coding genes in the vicinity.""" feature_name = "vepMaximumNeighbourhood" - sample_gene_index = GeneIndex( - _df=spark.createDataFrame( - [ - { - "geneId": "gene1", - "biotype": "protein_coding", - "chromosome": "1", - }, - { - "geneId": "gene2", - "biotype": "lncRNA", - "chromosome": "1", - }, - ], - GeneIndex.get_schema(), - ), - _schema=GeneIndex.get_schema(), - ) observed_df = ( common_neighbourhood_vep_feature_logic( self.sample_study_locus, From 9c523972ad374e2ed990ace5fc197062099b8095 Mon Sep 17 00:00:00 2001 From: Yakov Date: Thu, 17 Oct 2024 15:02:45 +0100 Subject: [PATCH 113/188] fix: filter nan in CSs (#855) * fix: filter nan in CSs * fix: fix v1 --- src/gentropy/susie_finemapper.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index 5ec347e6a..a5087e0d2 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -324,8 +324,8 @@ def susie_inf_to_studylocus( # noqa: C901 df = pd.DataFrame( { "credibleSetIndex": cred_set_index, - "purityMeanR2": purity_mean_r2, - "purityMinR2": purity_min_r2, + "purityMeanR2": list_purity_mean_r2, + "purityMinR2": list_purity_min_r2, "zScore": z_values, "neglogpval": neglogpval, } @@ -357,6 +357,7 @@ def susie_inf_to_studylocus( # noqa: C901 cred_sets = cred_sets.filter( (f.col("neglogpval") >= -np.log10(lead_pval_threshold)) & (f.col("credibleSetlog10BF") >= cs_lbf_thr * 0.4342944819) + & (~f.isnan(f.col("credibleSetlog10BF"))) & (f.col("purityMinR2") >= purity_min_r2_threshold) & (f.col("purityMeanR2") >= purity_mean_r2_threshold) ) From c68a1444a4e739d6e689f11e0aeb3976b97d641b Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:27:44 +0200 Subject: [PATCH 114/188] revert(finngen): restore the studyId prefix in finngen cs and si (#856) --- src/gentropy/config.py | 5 +- .../datasource/finngen/finemapping.py | 11 ++- .../datasource/finngen/study_index.py | 76 ++++++++++++++---- .../finngen_ukb_meta/study_index.py | 62 ++++++++------- src/gentropy/finngen_finemapping_ingestion.py | 8 +- src/gentropy/finngen_studies.py | 24 +++--- .../finngen/test_finngen_finemapping.py | 4 + .../finngen/test_finngen_study_index.py | 77 ++++++++++++++++--- 8 files changed, 194 insertions(+), 73 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index ad941f5e0..39ed62f9e 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -169,13 +169,14 @@ class FinngenFinemappingConfig(StepConfig): "gs://finngen-public-data-r11/finemap/full/susie/*.snp.bgz" ) finngen_susie_finemapping_cs_summary_files: str = ( - "gs://finngen-public-data-r11/finemap/summary/*.cred.summary.tsv" + "gs://finngen-public-data-r11/finemap/summary/*SUSIE.cred.summary.tsv" ) finngen_finemapping_out: str = MISSING + finngen_finemapping_lead_pvalue_threshold: float = 1e-5 + finngen_release_prefix: str = "FINNGEN_R11" _target_: str = ( "gentropy.finngen_finemapping_ingestion.FinnGenFinemappingIngestionStep" ) - finngen_finemapping_lead_pvalue_threshold: float = 1e-5 @dataclass diff --git a/src/gentropy/datasource/finngen/finemapping.py b/src/gentropy/datasource/finngen/finemapping.py index 3c83ba8ff..5b8d21864 100644 --- a/src/gentropy/datasource/finngen/finemapping.py +++ b/src/gentropy/datasource/finngen/finemapping.py @@ -206,6 +206,7 @@ def from_finngen_susie_finemapping( spark: SparkSession, finngen_susie_finemapping_snp_files: (str | list[str]), finngen_susie_finemapping_cs_summary_files: (str | list[str]), + finngen_release_prefix: str, credset_lbf_threshold: float = 0.8685889638065036, ) -> StudyLocus: """Process the SuSIE finemapping output for FinnGen studies. @@ -261,6 +262,7 @@ def from_finngen_susie_finemapping( spark (SparkSession): SparkSession object. finngen_susie_finemapping_snp_files (str | list[str]): SuSIE finemapping output filename(s). finngen_susie_finemapping_cs_summary_files (str | list[str]): filename of SuSIE finemapping credible set summaries. + finngen_release_prefix (str): Finngen project release prefix. Should look like FINNGEN_R*. credset_lbf_threshold (float, optional): Filter out credible sets below, Default 0.8685889638065036 == np.log10(np.exp(2)), this is threshold from publication. Returns: @@ -295,7 +297,9 @@ def from_finngen_susie_finemapping( .filter(f.col("cs").cast(t.IntegerType()) > 0) .select( # Add study idenfitier. - f.col("trait").cast(t.StringType()).alias("studyId"), + f.concat_ws("_", f.lit(finngen_release_prefix), f.col("trait")) + .cast(t.StringType()) + .alias("studyId"), f.col("region"), # Add variant information. f.regexp_replace(f.col("v"), ":", "_").alias("variantId"), @@ -408,7 +412,10 @@ def from_finngen_susie_finemapping( (f.col("credibleSetlog10BF") > credset_lbf_threshold) | (f.col("credibleSetIndex") == 1) ) - .withColumn("studyId", f.col("trait")) + .withColumn( + "studyId", + f.concat_ws("_", f.lit(finngen_release_prefix), f.col("trait")), + ) ) processed_finngen_finemapping_df = processed_finngen_finemapping_df.join( diff --git a/src/gentropy/datasource/finngen/study_index.py b/src/gentropy/datasource/finngen/study_index.py index 3946323f4..dc0389aa1 100644 --- a/src/gentropy/datasource/finngen/study_index.py +++ b/src/gentropy/datasource/finngen/study_index.py @@ -3,6 +3,7 @@ from __future__ import annotations import re +from typing import TypedDict from urllib.request import urlopen import pyspark.sql.functions as f @@ -11,6 +12,13 @@ from gentropy.dataset.study_index import StudyIndex +class FinngenPrefixMatch(TypedDict): + """Class to store the output of the validate_release_prefix.""" + + prefix: str + release: str + + class FinnGenStudyIndex: """Study index dataset from FinnGen. @@ -25,11 +33,57 @@ class FinnGenStudyIndex: Some fields are also populated as constants, such as study type and the initial sample size. """ + @staticmethod + def validate_release_prefix(release_prefix: str) -> FinngenPrefixMatch: + """Validate release prefix passed to finngen StudyIndex. + + Args: + release_prefix (str): Finngen release prefix, should be a string like FINNGEN_R*. + + Returns: + FinngenPrefixMatch: Object containing valid prefix and release strings. + + Raises: + ValueError: when incorrect release prefix is provided. + + This method ensures that the trailing underscore is removed from prefix. + """ + pattern = re.compile(r"FINNGEN_(?PR\d+){1}_?") + pattern_match = pattern.match(release_prefix) + if not pattern_match: + raise ValueError( + f"Invalid FinnGen release prefix: {release_prefix}, use the format FINNGEN_R*" + ) + release = pattern_match.group("release").upper() + if release_prefix.endswith("_"): + release_prefix = release_prefix[:-1] + return FinngenPrefixMatch(prefix=release_prefix, release=release) + + @staticmethod + def read_efo_curation(session: SparkSession, url: str) -> DataFrame: + """Read efo curation from provided url. + + Args: + session (SparkSession): Session to use when reading the mapping file. + url (str): Url to the mapping file. The file provided should be a tsv file. + + Returns: + DataFrame: DataFrame with EFO mappings. + + Example of the file can be found in https://raw.githubusercontent.com/opentargets/curation/refs/heads/master/mappings/disease/manual_string.tsv. + """ + csv_data = urlopen(url).readlines() + csv_rows = [row.decode("utf8") for row in csv_data] + rdd = session.sparkContext.parallelize(csv_rows) + # NOTE: type annotations for spark.read.csv miss the fact that the first param can be [RDD[str]] + efo_curation_mapping_df = session.read.csv(rdd, header=True, sep="\t") + return efo_curation_mapping_df + @staticmethod def join_efo_mapping( study_index: StudyIndex, efo_curation_mapping: DataFrame, - finngen_release_prefix: str, + finngen_release: str, ) -> StudyIndex: """Add EFO mapping to the Finngen study index table. @@ -44,24 +98,11 @@ def join_efo_mapping( Args: study_index (StudyIndex): Study index table. efo_curation_mapping (DataFrame): Dataframe with EFO mappings. - finngen_release_prefix (str): FinnGen release prefix. + finngen_release (str): FinnGen release. Returns: StudyIndex: Study index table with added EFO mappings. - - Raises: - ValueError: when incorrect release prefix is provided. """ - finngen_release_prefix_regex = re.compile(r"FINNGEN_(?PR\d+){1}_?") - finngen_release_prefix_match = finngen_release_prefix_regex.match( - finngen_release_prefix - ) - if not finngen_release_prefix_match: - raise ValueError( - f"Invalid FinnGen release prefix: {finngen_release_prefix}, use the format FINNGEN_R*" - ) - finngen_release = finngen_release_prefix_match.group("release").upper() - efo_mappings = ( efo_curation_mapping.withColumn("STUDY", f.upper(f.col("STUDY"))) .filter(f.col("STUDY").contains("FINNGEN")) @@ -109,9 +150,12 @@ def from_source( json_data = urlopen(finngen_phenotype_table_url).read().decode("utf-8") rdd = spark.sparkContext.parallelize([json_data]) raw_df = spark.read.json(rdd) + return StudyIndex( _df=raw_df.select( - f.concat(f.col("phenocode")).alias("studyId"), + f.concat( + f.concat_ws("_", f.lit(finngen_release_prefix), f.col("phenocode")) + ).alias("studyId"), f.col("phenostring").alias("traitFromSource"), f.col("num_cases").cast("integer").alias("nCases"), f.col("num_controls").cast("integer").alias("nControls"), diff --git a/src/gentropy/datasource/finngen_ukb_meta/study_index.py b/src/gentropy/datasource/finngen_ukb_meta/study_index.py index fe6c74beb..eefb2da7c 100644 --- a/src/gentropy/datasource/finngen_ukb_meta/study_index.py +++ b/src/gentropy/datasource/finngen_ukb_meta/study_index.py @@ -1,4 +1,5 @@ """Study Index for Finngen data source.""" + from __future__ import annotations from urllib.request import urlopen @@ -32,35 +33,42 @@ def from_source( StudyIndex: Parsed and annotated FinnGen UKB meta-analysis study table. """ # Read the raw study index and process. - study_index_df = ( - spark.read.csv(raw_study_index_path_from_tsv, sep="\t", header=True) - .select( - f.lit("gwas").alias("studyType"), - f.lit("FINNGEN_R11_UKB_META").alias("projectId"), - f.col("_gentropy_study_id").alias("studyId"), - f.col("name").alias("traitFromSource"), - f.lit(True).alias("hasSumstats"), - f.col("_gentropy_summary_stats_link").alias("summarystatsLocation"), - (f.col("fg_n_cases") + f.col("ukbb_n_cases") + f.col("fg_n_controls") + f.col("ukbb_n_controls")).cast("integer").alias("nSamples"), - f.array( - f.struct( - (f.col("fg_n_cases") + f.col("fg_n_controls")).cast("integer").alias("sampleSize"), - f.lit("Finnish").alias("ancestry"), - ), - f.struct( - (f.col("ukbb_n_cases") + f.col("ukbb_n_controls")).cast("integer").alias("sampleSize"), - f.lit("European").alias("ancestry"), - ), - ).alias("discoverySamples"), + study_index_df = spark.read.csv( + raw_study_index_path_from_tsv, sep="\t", header=True + ).select( + f.lit("gwas").alias("studyType"), + f.lit("FINNGEN_R11_UKB_META").alias("projectId"), + f.col("_gentropy_study_id").alias("studyId"), + f.col("name").alias("traitFromSource"), + f.lit(True).alias("hasSumstats"), + f.col("_gentropy_summary_stats_link").alias("summarystatsLocation"), + ( + f.col("fg_n_cases") + + f.col("ukbb_n_cases") + + f.col("fg_n_controls") + + f.col("ukbb_n_controls") ) + .cast("integer") + .alias("nSamples"), + f.array( + f.struct( + (f.col("fg_n_cases") + f.col("fg_n_controls")) + .cast("integer") + .alias("sampleSize"), + f.lit("Finnish").alias("ancestry"), + ), + f.struct( + (f.col("ukbb_n_cases") + f.col("ukbb_n_controls")) + .cast("integer") + .alias("sampleSize"), + f.lit("European").alias("ancestry"), + ), + ).alias("discoverySamples"), ) # Add population structure. - study_index_df = ( - study_index_df - .withColumn( - "ldPopulationStructure", - cls.aggregate_and_map_ancestries(f.col("discoverySamples")), - ) + study_index_df = study_index_df.withColumn( + "ldPopulationStructure", + cls.aggregate_and_map_ancestries(f.col("discoverySamples")), ) # Create study index. study_index = StudyIndex( @@ -75,6 +83,6 @@ def from_source( study_index = FinnGenStudyIndex.join_efo_mapping( study_index, efo_curation_mapping, - finngen_release_prefix="FINNGEN_R11", + finngen_release="R11", ) return study_index diff --git a/src/gentropy/finngen_finemapping_ingestion.py b/src/gentropy/finngen_finemapping_ingestion.py index ca5ca1656..c093bdb3d 100644 --- a/src/gentropy/finngen_finemapping_ingestion.py +++ b/src/gentropy/finngen_finemapping_ingestion.py @@ -8,6 +8,7 @@ from gentropy.common.session import Session from gentropy.config import FinngenFinemappingConfig from gentropy.datasource.finngen.finemapping import FinnGenFinemapping +from gentropy.datasource.finngen.study_index import FinnGenStudyIndex @dataclass @@ -21,6 +22,7 @@ def __init__( finngen_susie_finemapping_snp_files: str = FinngenFinemappingConfig().finngen_susie_finemapping_snp_files, finngen_susie_finemapping_cs_summary_files: str = FinngenFinemappingConfig().finngen_susie_finemapping_cs_summary_files, finngen_finemapping_lead_pvalue_threshold: float = FinngenFinemappingConfig().finngen_finemapping_lead_pvalue_threshold, + finngen_release_prefix: str = FinngenFinemappingConfig().finngen_release_prefix, ) -> None: """Run FinnGen finemapping ingestion step. @@ -30,14 +32,18 @@ def __init__( finngen_susie_finemapping_snp_files(str): Path to the FinnGen SuSIE finemapping results. finngen_susie_finemapping_cs_summary_files (str): FinnGen SuSIE summaries for CS filters(LBF>2). finngen_finemapping_lead_pvalue_threshold (float): Lead p-value threshold. + finngen_release_prefix (str): Finngen project release prefix. Should look like FINNGEN_R*. """ # Read finemapping outputs from the input paths. - + finngen_release_prefix = FinnGenStudyIndex.validate_release_prefix( + finngen_release_prefix + )["prefix"] ( FinnGenFinemapping.from_finngen_susie_finemapping( spark=session.spark, finngen_susie_finemapping_snp_files=finngen_susie_finemapping_snp_files, finngen_susie_finemapping_cs_summary_files=finngen_susie_finemapping_cs_summary_files, + finngen_release_prefix=finngen_release_prefix, ) # Flagging sub-significnat loci: .validate_lead_pvalue( diff --git a/src/gentropy/finngen_studies.py b/src/gentropy/finngen_studies.py index 80866ac99..d7fd558ef 100644 --- a/src/gentropy/finngen_studies.py +++ b/src/gentropy/finngen_studies.py @@ -2,8 +2,6 @@ from __future__ import annotations -from urllib.request import urlopen - from gentropy.common.session import Session from gentropy.config import FinngenStudiesConfig from gentropy.datasource.finngen.study_index import FinnGenStudyIndex @@ -35,26 +33,26 @@ def __init__( efo_curation_mapping_url (str): URL to the EFO curation mapping file sample_size (int): Number of individuals that participated in sample collection, derived from finngen release metadata. """ + _match = FinnGenStudyIndex.validate_release_prefix(finngen_release_prefix) + release_prefix = _match["prefix"] + release = _match["release"] + + efo_curation_df = FinnGenStudyIndex.read_efo_curation( + session.spark, + efo_curation_mapping_url, + ) study_index = FinnGenStudyIndex.from_source( session.spark, finngen_phenotype_table_url, - finngen_release_prefix, + release_prefix, finngen_summary_stats_url_prefix, finngen_summary_stats_url_suffix, sample_size, ) - - # NOTE: hack to allow spark to read directly from the URL. - csv_data = urlopen(efo_curation_mapping_url).readlines() - csv_rows = [row.decode("utf8") for row in csv_data] - rdd = session.spark.sparkContext.parallelize(csv_rows) - # NOTE: type annotations for spark.read.csv miss the fact that the first param can be [RDD[str]] - efo_curation_mapping = session.spark.read.csv(rdd, header=True, sep="\t") - study_index_with_efo = FinnGenStudyIndex.join_efo_mapping( study_index, - efo_curation_mapping, - finngen_release_prefix, + efo_curation_df, + release, ) study_index_with_efo.df.write.mode(session.write_mode).parquet( finngen_study_index_out diff --git a/tests/gentropy/datasource/finngen/test_finngen_finemapping.py b/tests/gentropy/datasource/finngen/test_finngen_finemapping.py index 1e5d486b7..4c7e12bf5 100644 --- a/tests/gentropy/datasource/finngen/test_finngen_finemapping.py +++ b/tests/gentropy/datasource/finngen/test_finngen_finemapping.py @@ -44,6 +44,7 @@ def test_finngen_finemapping_from_finngen_susie_finemapping( spark=spark, finngen_susie_finemapping_snp_files=finngen_susie_finemapping_snp_files, finngen_susie_finemapping_cs_summary_files=finngen_susie_finemapping_cs_summary_files, + finngen_release_prefix="FINNGEN_R11", ), StudyLocus, ) @@ -77,9 +78,12 @@ def test_finngen_finemapping_ingestion_step( finngen_susie_finemapping_cs_summary_files=finngen_susie_finemapping_cs_summary_files, finngen_susie_finemapping_snp_files=finngen_susie_finemapping_snp_files, finngen_finemapping_lead_pvalue_threshold=1e-5, + finngen_release_prefix="FINNGEN_R11", ) assert output_path.is_dir() assert (output_path / "_SUCCESS").exists() cs = StudyLocus.from_parquet(session=session, path=str(output_path)) assert cs.df.count() == 1 + study_id: str = cs.df.select("studyId").collect()[0]["studyId"] + assert study_id.startswith("FINNGEN_R11_") diff --git a/tests/gentropy/datasource/finngen/test_finngen_study_index.py b/tests/gentropy/datasource/finngen/test_finngen_study_index.py index 07d014d13..0d0537413 100644 --- a/tests/gentropy/datasource/finngen/test_finngen_study_index.py +++ b/tests/gentropy/datasource/finngen/test_finngen_study_index.py @@ -7,17 +7,21 @@ from unittest.mock import MagicMock import pytest +from pyspark.sql import DataFrame from pyspark.sql import types as T from gentropy.dataset.study_index import StudyIndex -from gentropy.datasource.finngen.study_index import FinnGenStudyIndex +from gentropy.datasource.finngen.study_index import ( + FinngenPrefixMatch, + FinnGenStudyIndex, +) from gentropy.finngen_studies import FinnGenStudiesStep if TYPE_CHECKING: from pathlib import Path from typing import Callable - from pyspark.sql import DataFrame, SparkSession + from pyspark.sql import SparkSession from gentropy.common.session import Session @@ -28,21 +32,21 @@ def finngen_study_index_mock(spark: SparkSession) -> StudyIndex: data = [ # NOTE: Study maps to a single EFO trait ( - "STUDY_1", + "FINNGEN_R11_STUDY_1", "Actinomycosis", "FINNGEN_R11", "gwas", ), # NOTE: Study does not map to EFO traits ( - "STUDY_2", + "FINNGEN_R11_STUDY_2", "Some other trait", "FINNGEN_R11", "gwas", ), # NOTE: Study maps to two EFO traits ( - "STUDY_3", + "FINNGEN_R11_STUDY_3", "Glucose", "FINNGEN_R11", "gwas", @@ -197,7 +201,7 @@ def mock_response(url: str) -> MagicMock: case "https://finngen_phenotypes": value = finngen_phenotype_table_mock case "https://efo_mappings": - value = "\n".join(["\t".join(row) + "\n" for row in efo_mappings_mock]) + value = "\n".join(["\t".join(row) for row in efo_mappings_mock]) case _: value = "" mock_open = MagicMock() @@ -235,7 +239,6 @@ def test_finngen_study_index_step( """ with monkeypatch.context() as m: m.setattr("gentropy.datasource.finngen.study_index.urlopen", urlopen_mock) - m.setattr("gentropy.finngen_studies.urlopen", urlopen_mock) output_path = str(tmp_path / "study_index") FinnGenStudiesStep( session=session, @@ -254,6 +257,20 @@ def test_finngen_study_index_step( # fmt: on +def test_finngen_study_index_read_efo_curation( + monkeypatch: pytest.MonkeyPatch, + spark: SparkSession, + urlopen_mock: Callable[[str], MagicMock], +) -> None: + """Test reading efo curation.""" + with monkeypatch.context() as m: + m.setattr("gentropy.datasource.finngen.study_index.urlopen", urlopen_mock) + efo_df = FinnGenStudyIndex.read_efo_curation(spark, "https://efo_mappings") + assert isinstance(efo_df, DataFrame) + efo_df.show() + assert efo_df.count() == 5 + + def test_finngen_study_index_from_source( monkeypatch: pytest.MonkeyPatch, spark: SparkSession, @@ -280,7 +297,7 @@ def test_finngen_study_index_from_source( assert study_index.df.count() == 3, "Expect two rows at the study_index, as in the input." rows = study_index.df.collect() - expected_study_ids = ["AB1_ACTINOMYCOSIS", "GLUCOSE", "SOME_OTHER_TRAIT"] + expected_study_ids = ["FINNGEN_R11_AB1_ACTINOMYCOSIS", "FINNGEN_R11_GLUCOSE", "FINNGEN_R11_SOME_OTHER_TRAIT"] assert "studyId" in study_index.df.columns, "Expect that studyId column exists." assert sorted([v["studyId"] for v in rows]) == expected_study_ids, "Expect that studyIds are populated from input." @@ -309,6 +326,42 @@ def test_finngen_study_index_from_source( # fmt: on +@pytest.mark.parametrize( + ["prefix", "expected_output", "xfail"], + [ + pytest.param( + "FINNGEN_R11", + FinngenPrefixMatch(prefix="FINNGEN_R11", release="R11"), + False, + id="Correct prefix passed.", + ), + pytest.param( + "FINNGEN_R11_", + FinngenPrefixMatch(prefix="FINNGEN_R11", release="R11"), + False, + id="Underscore is removed from the prefix.", + ), + pytest.param( + "R11", + FinngenPrefixMatch(prefix="FINNGEN_R11", release="R11"), + True, + id="Incorrect prefix raises ValueError.", + ), + ], +) +def test_finngen_validate_release_prefix( + prefix: str, expected_output: FinngenPrefixMatch, xfail: bool +) -> None: + """Test validate_release_prefix.""" + if not xfail: + assert ( + FinnGenStudyIndex.validate_release_prefix(prefix) == expected_output + ), "Incorrect match object" + else: + with pytest.raises(ValueError): + FinnGenStudyIndex.validate_release_prefix(prefix) + + def test_finngen_study_index_add_efos( finngen_study_index_mock: StudyIndex, efo_mappings_df_mock: DataFrame, @@ -319,7 +372,7 @@ def test_finngen_study_index_add_efos( assert efo_column_name not in finngen_study_index_mock.df.columns study_index = FinnGenStudyIndex.join_efo_mapping( finngen_study_index_mock, - finngen_release_prefix="FINNGEN_R11_", + finngen_release="R11", efo_curation_mapping=efo_mappings_df_mock, ) # fmt: off @@ -332,8 +385,8 @@ def test_finngen_study_index_add_efos( for row in study_index.df.select(efo_column_name, "studyId").collect() } expected_efos = { - "STUDY_1": ["EFO_0007128"], - "STUDY_2": [], - "STUDY_3": ["EFO_0002571", "EFO_0004468"], + "FINNGEN_R11_STUDY_1": ["EFO_0007128"], + "FINNGEN_R11_STUDY_2": [], + "FINNGEN_R11_STUDY_3": ["EFO_0002571", "EFO_0004468"], } assert expected_efos == efos, "Expect that EFOs are correctly assigned." From d650a293c2f7fac02dda381ec5ea501e9b56e948 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Fri, 18 Oct 2024 10:53:58 +0200 Subject: [PATCH 115/188] feat(susie_finemapper): allow for extraction of the log file from manifest (#859) * feat(susie_finemapper): allow for extraction of the log file from manifest * fix: add missing extension to the log file * fix: row istead of column --------- Co-authored-by: Szymon Szyszkowski --- src/gentropy/susie_finemapper.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index a5087e0d2..c58f968cb 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -63,6 +63,12 @@ def __init__( ) -> None: """Run fine-mapping on a studyLocusId from a collected studyLocus table. + Method require a `study_locus_manifest_path` file that will contain ["study_locus_input", "study_locus_output", "log_file"]. `log_file` + is optional parameter to the manifest. In case it does not exist, the logs from the finemapper are saved under the same directory + as the `study_locus_output` with `.log` suffix. + Each execution of the method will only evaluate a single row from the `study_locus_manifest` that is inferred from the `study_locus_index` + variable. + Args: session (Session): Spark session study_index_path (str): path to the study index @@ -70,8 +76,8 @@ def __init__( study_locus_index (int): Index (0-based) of the locus in the manifest to process in this call max_causal_snps (int): Maximum number of causal variants in locus, default is 10 lead_pval_threshold (float): p-value threshold for the lead variant from CS, default is 1e-5 - purity_mean_r2_threshold (float): thrshold for purity mean r2 qc metrics for filtering credible sets, default is 0 - purity_min_r2_threshold (float): thrshold for purity min r2 qc metrics for filtering credible sets, default is 0.25 + purity_mean_r2_threshold (float): threshold for purity mean r2 qc metrics for filtering credible sets, default is 0 + purity_min_r2_threshold (float): threshold for purity min r2 qc metrics for filtering credible sets, default is 0.25 cs_lbf_thr (float): credible set logBF threshold for filtering credible sets, default is 2 sum_pips (float): the expected sum of posterior probabilities in the locus, default is 0.99 (99% credible set) susie_est_tausq (bool): estimate tau squared, default is False @@ -88,6 +94,9 @@ def __init__( row = study_locus_manifest.loc[study_locus_index] study_locus_input = row["study_locus_input"] study_locus_output = row["study_locus_output"] + log_output = study_locus_output + ".log" + if "log_output" in study_locus_manifest.columns: + log_output = row["log_output"] + ".log" # Read studyLocus study_locus = ( @@ -140,7 +149,7 @@ def __init__( if result_logging["log"] is not None: # Write log result_logging["log"].to_parquet( - study_locus_output + ".log", + log_output, engine="pyarrow", index=False, ) From 333facb6d0d81e82ec4e42d6c450968be9697020 Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Fri, 18 Oct 2024 11:30:57 +0100 Subject: [PATCH 116/188] chore(coloc): changing the content of `numberColocalisingVariants` field (#857) * chore(coloc): chaning the content of numberColocalisingVariants * chore: pre-commit auto fixes [...] * fix: adjuting for schema and tests * fix(pics): ensuring locus has variantId * fix: pics udf test * fix: udf keys --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- src/gentropy/method/colocalisation.py | 86 +++++++++++++++++-- src/gentropy/method/pics.py | 4 + .../method/test_colocalisation_method.py | 8 ++ tests/gentropy/method/test_pics.py | 4 +- 4 files changed, 91 insertions(+), 11 deletions(-) diff --git a/src/gentropy/method/colocalisation.py b/src/gentropy/method/colocalisation.py index 37ca7b0d7..2867e700c 100644 --- a/src/gentropy/method/colocalisation.py +++ b/src/gentropy/method/colocalisation.py @@ -7,6 +7,7 @@ import numpy as np import pyspark.ml.functions as fml import pyspark.sql.functions as f +import pyspark.sql.types as t from pyspark.ml.linalg import DenseVector, Vectors, VectorUDT from pyspark.sql.types import DoubleType @@ -22,6 +23,53 @@ from gentropy.dataset.study_locus_overlap import StudyLocusOverlap +def get_tag_variant_source(statistics: Column) -> Column: + """Get the source of the tag variant for a locus-overlap row. + + Args: + statistics (Column): statistics column + + Returns: + Column: source of the tag variant + + Examples: + >>> data = [('a', 'b'),(None, 'b'),('a', None),] + >>> ( + ... spark.createDataFrame(data, ['a', 'b']) + ... .select( + ... 'a', 'b', + ... get_tag_variant_source( + ... f.struct( + ... f.col('a').alias('left_posteriorProbability'), + ... f.col('b').alias('right_posteriorProbability'), + ... ) + ... ).alias('source') + ... ) + ... .show() + ... ) + +----+----+------+ + | a| b|source| + +----+----+------+ + | a| b| both| + |null| b| right| + | a|null| left| + +----+----+------+ + + """ + return ( + # Both posterior probabilities are not null: + f.when( + statistics.left_posteriorProbability.isNotNull() + & statistics.right_posteriorProbability.isNotNull(), + f.lit("both"), + ) + # Only the left posterior probability is not null: + .when(statistics.left_posteriorProbability.isNotNull(), f.lit("left")) + # It must be right only: + .otherwise(f.lit("right")) + ) + + class ColocalisationMethodInterface(Protocol): """Colocalisation method interface.""" @@ -103,12 +151,14 @@ def colocalise( """ return Colocalisation( _df=( - overlapping_signals.df.withColumn( - "clpp", - ECaviar._get_clpp( - f.col("statistics.left_posteriorProbability"), - f.col("statistics.right_posteriorProbability"), - ), + overlapping_signals.df.withColumns( + { + "clpp": ECaviar._get_clpp( + f.col("statistics.left_posteriorProbability"), + f.col("statistics.right_posteriorProbability"), + ), + "tagVariantSource": get_tag_variant_source(f.col("statistics")), + } ) .groupBy( "leftStudyLocusId", @@ -117,7 +167,15 @@ def colocalise( "chromosome", ) .agg( - f.count("*").alias("numberColocalisingVariants"), + # Count the number of tag variants that can be found in both loci: + f.size( + f.filter( + f.collect_list(f.col("tagVariantSource")), + lambda x: x == "both", + ) + ) + .cast(t.LongType()) + .alias("numberColocalisingVariants"), f.sum(f.col("clpp")).alias("clpp"), ) .withColumn("colocalisationMethod", f.lit(cls.METHOD_NAME)) @@ -209,7 +267,10 @@ def colocalise( posteriors = f.udf(Coloc._get_posteriors, VectorUDT()) return Colocalisation( _df=( - overlapping_signals.df.select("*", "statistics.*") + overlapping_signals.df.withColumn( + "tagVariantSource", get_tag_variant_source(f.col("statistics")) + ) + .select("*", "statistics.*") # Before summing log_BF columns nulls need to be filled with 0: .fillna(0, subset=["left_logBF", "right_logBF"]) # Sum of log_BFs for each pair of signals @@ -225,7 +286,14 @@ def colocalise( "rightStudyType", ) .agg( - f.count("*").alias("numberColocalisingVariants"), + f.size( + f.filter( + f.collect_list(f.col("tagVariantSource")), + lambda x: x == "both", + ) + ) + .cast(t.LongType()) + .alias("numberColocalisingVariants"), fml.array_to_vector(f.collect_list(f.col("left_logBF"))).alias( "left_logBF" ), diff --git a/src/gentropy/method/pics.py b/src/gentropy/method/pics.py index b1dc46e12..918850527 100644 --- a/src/gentropy/method/pics.py +++ b/src/gentropy/method/pics.py @@ -160,6 +160,10 @@ def _finemap( # If PICS cannot be calculated, we drop the variant from the credible set continue + # Chaing chema: + if "tagVariantId" in tag_dict: + tag_dict["variantId"] = tag_dict.pop("tagVariantId") + pics_snp_mu = PICS._pics_mu(lead_neglog_p, tag_dict["r2Overall"]) pics_snp_std = PICS._pics_standard_deviation( lead_neglog_p, tag_dict["r2Overall"], k diff --git a/tests/gentropy/method/test_colocalisation_method.py b/tests/gentropy/method/test_colocalisation_method.py index c9a99d16f..d44652fbb 100644 --- a/tests/gentropy/method/test_colocalisation_method.py +++ b/tests/gentropy/method/test_colocalisation_method.py @@ -134,6 +134,8 @@ def test_coloc_no_logbf( "statistics": { "left_logBF": None, "right_logBF": None, + "left_posteriorProbability": None, + "right_posteriorProbability": None, }, # irrelevant for COLOC } ], @@ -150,6 +152,12 @@ def test_coloc_no_logbf( [ StructField("left_logBF", DoubleType(), True), StructField("right_logBF", DoubleType(), True), + StructField( + "left_posteriorProbability", DoubleType(), True + ), + StructField( + "right_posteriorProbability", DoubleType(), True + ), ] ), ), diff --git a/tests/gentropy/method/test_pics.py b/tests/gentropy/method/test_pics.py index 3639b408f..d5a8eb5d0 100644 --- a/tests/gentropy/method/test_pics.py +++ b/tests/gentropy/method/test_pics.py @@ -57,8 +57,8 @@ def test_finemap_quality_control( def test__finemap_udf() -> None: """Test the _finemap UDF with a simple case.""" ld_set = [ - Row(variantId="var1", r2Overall=0.8), - Row(variantId="var2", r2Overall=1), + Row(tagVariantId="var1", r2Overall=0.8), + Row(tagVariantId="var2", r2Overall=1), ] result = PICS._finemap(ld_set, lead_neglog_p=10.0, k=6.4) expected = [ From f93a9d353c7596cda7b209349e1d2a325f328c90 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Fri, 18 Oct 2024 16:24:59 +0100 Subject: [PATCH 117/188] fix: susie credible sets with unknown confidence (#862) --- src/gentropy/dataset/study_locus.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index b6fc7b8c9..eaec9672b 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -1175,7 +1175,7 @@ def assign_confidence(self: StudyLocus) -> StudyLocus: df = self.df.withColumn( "confidence", f.when( - (f.col("finemappingMethod") == "SuSiE-inf") + (f.col("finemappingMethod").isin(["SuSiE-inf", "SuSie"])) & ( ~f.array_contains( f.col("qualityControls"), @@ -1185,7 +1185,7 @@ def assign_confidence(self: StudyLocus) -> StudyLocus: CredibleSetConfidenceClasses.FINEMAPPED_IN_SAMPLE_LD.value, ) .when( - (f.col("finemappingMethod") == "SuSiE-inf") + (f.col("finemappingMethod").isin(["SuSiE-inf", "SuSie"])) & ( f.array_contains( f.col("qualityControls"), From 40a582c66ff3d02cc3c691b82cbaf07aa55afe14 Mon Sep 17 00:00:00 2001 From: Yakov Date: Mon, 21 Oct 2024 09:40:50 +0100 Subject: [PATCH 118/188] fix: adding beta for lead variant (#863) * fix: adding beta for lead variant * fix: v1 --- src/gentropy/susie_finemapper.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index c58f968cb..d0759f565 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -415,6 +415,13 @@ def susie_inf_to_studylocus( # noqa: C901 cred_sets = cred_sets.withColumn("locusStart", f.lit(locusStart)) cred_sets = cred_sets.withColumn("locusEnd", f.lit(locusEnd)) + cred_sets = cred_sets.drop("beta").withColumn( + "beta", + f.expr(""" + filter(locus, x -> x.variantId = variantId)[0].beta + """), + ) + return StudyLocus( _df=cred_sets, _schema=StudyLocus.get_schema(), @@ -716,6 +723,8 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 if N_total is None: N_total = 100_000 + locusStart = max(locusStart, 0) + region = chromosome + ":" + str(int(locusStart)) + "-" + str(int(locusEnd)) schema = StudyLocus.get_schema() @@ -860,6 +869,14 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 ) np.fill_diagonal(gnomad_ld, 1) + # Desision tree - number of variants + if gwas_index.count() < 100: + logging.warning("Less than 100 variants after joining GWAS and LD index") + return None + elif gwas_index.count() >= 15_000: + logging.warning("More than 15000 variants after joining GWAS and LD index") + return None + out = SusieFineMapperStep.susie_finemapper_from_prepared_dataframes( GWAS_df=gwas_df, ld_index=gwas_index, From 13c2040bf5b3368225178c7405f448ced194682e Mon Sep 17 00:00:00 2001 From: Tobi Alegbe Date: Mon, 21 Oct 2024 14:33:53 +0200 Subject: [PATCH 119/188] fix: biosample index add efo cell types (#853) * feat(biosample_index_add_efo): update regex to allow removal of multiple biosample id url prefixes * feat(biosample_index_add_efo): add a dummy dataset for efo biosample id, created by copy-pasting nodes and edges from full json * feat(biosample_index_add_efo): add efo (cell info) to biosample index * feat(biosample_index_add_efo): add biosample index efo tests * chore: pre-commit auto fixes [...] * chore: remove redundant tests --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Daniel Suveges --- src/gentropy/biosample_index.py | 5 +- src/gentropy/config.py | 1 + src/gentropy/dataset/biosample_index.py | 22 + .../datasource/biosample_ontologies/utils.py | 18 +- .../data_samples/efo_biosample_sample.json | 1424 +++++++++++++++++ .../test_biosample_ontology.py | 22 +- 6 files changed, 1479 insertions(+), 13 deletions(-) create mode 100644 tests/gentropy/data_samples/efo_biosample_sample.json diff --git a/src/gentropy/biosample_index.py b/src/gentropy/biosample_index.py index e85c2e135..e0b5e9b10 100644 --- a/src/gentropy/biosample_index.py +++ b/src/gentropy/biosample_index.py @@ -16,6 +16,7 @@ def __init__( session: Session, cell_ontology_input_path: str, uberon_input_path: str, + efo_input_path: str, biosample_index_path: str, ) -> None: """Run Biosample index generation step. @@ -24,11 +25,13 @@ def __init__( session (Session): Session object. cell_ontology_input_path (str): Input cell ontology dataset path. uberon_input_path (str): Input uberon dataset path. + efo_input_path (str): Input efo dataset path. biosample_index_path (str): Output gene index dataset path. """ cell_ontology_index = extract_ontology_from_json(cell_ontology_input_path, session.spark) uberon_index = extract_ontology_from_json(uberon_input_path, session.spark) + efo_index = extract_ontology_from_json(efo_input_path, session.spark).retain_rows_with_ancestor_id(["CL_0000000"]) - biosample_index = cell_ontology_index.merge_indices([uberon_index]) + biosample_index = cell_ontology_index.merge_indices([uberon_index, efo_index]) biosample_index.df.write.mode(session.write_mode).parquet(biosample_index_path) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 39ed62f9e..26c676e9b 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -57,6 +57,7 @@ class BiosampleIndexConfig(StepConfig): cell_ontology_input_path: str = MISSING uberon_input_path: str = MISSING + efo_input_path: str = MISSING biosample_index_path: str = MISSING _target_: str = "gentropy.biosample_index.BiosampleIndexStep" diff --git a/src/gentropy/dataset/biosample_index.py b/src/gentropy/dataset/biosample_index.py index 39c597142..bae3c0a4f 100644 --- a/src/gentropy/dataset/biosample_index.py +++ b/src/gentropy/dataset/biosample_index.py @@ -70,3 +70,25 @@ def merge_indices( _df=aggregated_df, _schema=BiosampleIndex.get_schema() ) + + def retain_rows_with_ancestor_id( + self: BiosampleIndex, + ancestor_ids : list[str] + ) -> BiosampleIndex: + """Filter the biosample index to retain only rows with the given ancestor IDs. + + Args: + ancestor_ids (list[str]): Ancestor IDs to filter on. + + Returns: + BiosampleIndex: Filtered biosample index. + """ + # Create a Spark array of ancestor IDs prior to filtering + ancestor_ids_array = f.array(*[f.lit(id) for id in ancestor_ids]) + + return BiosampleIndex( + _df=self.df.filter( + f.size(f.array_intersect(f.col("ancestors"), ancestor_ids_array)) > 0 + ), + _schema=BiosampleIndex.get_schema() + ) diff --git a/src/gentropy/datasource/biosample_ontologies/utils.py b/src/gentropy/datasource/biosample_ontologies/utils.py index 3ef1747ee..2d9bb6bc4 100644 --- a/src/gentropy/datasource/biosample_ontologies/utils.py +++ b/src/gentropy/datasource/biosample_ontologies/utils.py @@ -1,4 +1,6 @@ """Utility functions for Biosample ontology processing.""" +import re + from pyspark.sql import DataFrame, SparkSession from pyspark.sql import functions as f from pyspark.sql.types import ArrayType, StringType @@ -92,12 +94,22 @@ def get_relationships( f.col("edge.pred").alias("predicate"), f.col("edge.obj").alias("object") ) - df_edges = df_edges.withColumn("subject", f.regexp_replace(f.col("subject"), "http://purl.obolibrary.org/obo/", "")) - df_edges = df_edges.withColumn("object", f.regexp_replace(f.col("object"), "http://purl.obolibrary.org/obo/", "")) + + # Remove certain URL prefixes from IDs + urls_to_remove = [ + "http://purl.obolibrary.org/obo/", + "http://www.ebi.ac.uk/efo/" + ] + # Create a regex pattern that matches any of the URLs + escaped_urls_pattern = "|".join([re.escape(url) for url in urls_to_remove]) + + + df_edges = df_edges.withColumn("subject", f.regexp_replace(f.col("subject"), escaped_urls_pattern, "")) + df_edges = df_edges.withColumn("object", f.regexp_replace(f.col("object"), escaped_urls_pattern, "")) # Extract the relevant information from the nodes transformed_df = df_nodes.select( - f.regexp_replace(f.col("node.id"), "http://purl.obolibrary.org/obo/", "").alias("biosampleId"), + f.regexp_replace(f.col("node.id"), escaped_urls_pattern, "").alias("biosampleId"), f.coalesce(f.col("node.lbl"), f.col("node.id")).alias("biosampleName"), f.col("node.meta.definition.val").alias("description"), f.collect_set(f.col("node.meta.xrefs.val")).over(Window.partitionBy("node.id")).getItem(0).alias("xrefs"), diff --git a/tests/gentropy/data_samples/efo_biosample_sample.json b/tests/gentropy/data_samples/efo_biosample_sample.json new file mode 100644 index 000000000..ebe979721 --- /dev/null +++ b/tests/gentropy/data_samples/efo_biosample_sample.json @@ -0,0 +1,1424 @@ +{ + "graphs": [ + { + "id": "http://www.ebi.ac.uk/efo/efo.owl", + "meta": { + "basicPropertyValues": [ + { + "pred": "http://purl.obolibrary.org/obo/format-version", + "val": "1.4" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Catherine Leroy" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Dani Welter" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Drashtti Vasant" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Ele Holloway" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Eleanor Williams" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Emma Kate Hastings" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Gautier Koscielny" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Helen Parkinson" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "James Malone" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Jon Ison" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Laura Huerta Martinez" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Natalja Kurbatova" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Olamidipupo Ajigboye" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Paola Roncaglia" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Simon Jupp" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Sirarat Sarntivijai" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Tomasz Adamusiak" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Trish Whetzel" + }, + { + "pred": "http://purl.org/dc/elements/1.1/creator", + "val": "Zoe May Pendlington" + }, + { + "pred": "http://purl.org/dc/elements/1.1/rights", + "val": "Copyright [2014] EMBL - European Bioinformatics Institute\nLicensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the\nLicense. " + }, + { + "pred": "http://purl.org/dc/terms/license", + "val": "www.apache.org/licenses/LICENSE-2.0" + }, + { + "pred": "http://www.w3.org/2000/01/rdf-schema#comment", + "val": "2024-10-15" + }, + { + "pred": "http://www.w3.org/2002/07/owl#versionInfo", + "val": "3.71.0" + } + ], + "version": "http://www.ebi.ac.uk/efo/releases/v3.71.0/efo.owl" + }, + "nodes": [ + { + "id": "http://dbpedia.org/resource/China", + "lbl": "China", + "type": "CLASS", + "meta": { + "xrefs": [ + { + "val": "GAZ:00002845" + } + ] + } + }, + { + "id": "http://dbpedia.org/resource/India", + "lbl": "India", + "type": "CLASS", + "meta": { + "xrefs": [ + { + "val": "GAZ:00002839" + } + ] + } + }, + { + "id": "http://dbpedia.org/resource/Iran", + "lbl": "Iran", + "type": "CLASS", + "meta": { + "synonyms": [ + { + "pred": "hasExactSynonym", + "val": "Iran (Islamic Republic of)" + }, + { + "pred": "hasExactSynonym", + "val": "Iran (Islamic Republic of)" + } + ], + "xrefs": [ + { + "val": "GAZ:00004474" + } + ] + } + }, + { + "id": "http://dbpedia.org/resource/Japan", + "lbl": "Japan", + "type": "CLASS", + "meta": { + "xrefs": [ + { + "val": "GAZ:00002747" + } + ] + } + }, + { + "id": "http://dbpedia.org/resource/North_Korea", + "lbl": "North Korea", + "type": "CLASS", + "meta": { + "synonyms": [ + { + "pred": "hasExactSynonym", + "val": "Democratic People's Republic of Korea" + } + ], + "xrefs": [ + { + "val": "GAZ:00002801" + } + ] + } + }, + { + "id": "http://dbpedia.org/resource/Philippines", + "lbl": "Philippines", + "type": "CLASS", + "meta": { + "xrefs": [ + { + "val": "GAZ:00004525" + } + ], + "basicPropertyValues": [ + { + "pred": "http://purl.obolibrary.org/obo/IAO_0000118", + "val": "The Philippines" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CLO_0036460", + "lbl": "GM06944", + "type": "CLASS", + "meta": { + "definition": { + "val": "TRANSLOCATED CHROMOSOME" + }, + "synonyms": [ + { + "pred": "hasExactSynonym", + "val": "GM06944 cell" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CLO_0036870", + "lbl": "GM07029", + "type": "CLASS", + "meta": { + "definition": { + "val": "INTERNATIONAL HAPMAP PROJECT - CEPH (PLATE I) [UTAH RESIDENTS WITH ANCESTRY FROM NORTHERN AND WESTERN EUROPE] CEPH/UTAH PEDIGREE 1340 CYTOCHROME P450, SUBFAMILY IIC, POLYPEPTIDE 19; CYP2C19" + }, + "synonyms": [ + { + "pred": "hasExactSynonym", + "val": "GM07029 cell" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000000", + "lbl": "cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "A material entity of anatomical origin (part of or deriving from an organism) that has as its parts a maximally connected cell compartment surrounded by a plasma membrane.", + "xrefs": ["CARO:mah"] + }, + "comments": [ + "The definition of cell is intended to represent all cells, and thus a cell is defined as a material entity and not an anatomical structure, which implies that it is part of an organism (or the entirety of one)." + ], + "subsets": [ + "http://purl.obolibrary.org/obo/cl#cellxgene_subset", + "http://purl.obolibrary.org/obo/ubprop#_upper_level" + ], + "xrefs": [ + { + "val": "CALOHA:TS-2035" + }, + { + "val": "CALOHA:TS-2035\n" + }, + { + "val": "FBbt:00007002" + }, + { + "val": "FMA:68646" + }, + { + "val": "FMA:68646\n " + }, + { + "val": "GO:0005623" + }, + { + "val": "GO:0005623\n" + }, + { + "val": "KUPO:0000002" + }, + { + "val": "KUPO:0000002\n\n" + }, + { + "val": "MESH:D002477" + }, + { + "val": "NCIt:C12508" + }, + { + "val": "NCIt:C48694" + }, + { + "val": "VHOG:0001533" + }, + { + "val": "VHOG:0001533\n\n" + }, + { + "val": "WBbt:0004017" + }, + { + "val": "WBbt:0004017\n" + }, + { + "val": "XAO:0003012" + }, + { + "val": "XAO:0003012\n\n" + }, + { + "val": "ZFA:0009000" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000002", + "lbl": "obsolete immortal cell line cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "OBSOLETE: A cell line cell that is expected to be capable of an unlimited number of divisions, and is thus able to support indefinite growth/propagation in vitro as part of a immortal cell line.", + "xrefs": ["ReO:mhb"] + }, + "comments": [ + "Obsoleted in July 2013 and replaced by the CLO 'immortal cell line cell' class, as a result of CLO-OBI-CL alignment efforts.\n\nCovers cells actively being cultured or stored in a quiescent state for future use." + ], + "synonyms": [ + { + "pred": "hasExactSynonym", + "val": "continuous cell line cell" + }, + { + "pred": "hasExactSynonym", + "val": "permanent cell line cell" + } + ], + "basicPropertyValues": [ + { + "pred": "http://purl.obolibrary.org/obo/IAO_0100001", + "val": "http://purl.obolibrary.org/obo/CLO_0000019" + }, + { + "pred": "http://purl.obolibrary.org/obo/IAO_0100001", + "val": "http://purl.obolibrary.org/obo/CLO_0000019" + }, + { + "pred": "http://www.ebi.ac.uk/efo/obsoleted_in_version", + "val": "3.16.0" + }, + { + "pred": "http://www.ebi.ac.uk/efo/reason_for_obsolescence", + "val": "Use http://purl.obolibrary.org/obo/CLO_0000019" + } + ], + "deprecated": true + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000007", + "lbl": "early embryonic cell (metazoa)", + "type": "CLASS", + "meta": { + "definition": { + "val": "A cell found in the embryo before the formation of all the gem layers is complete.", + "xrefs": ["GOC:tfm"] + }, + "xrefs": [ + { + "val": "ZFA:0009002" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000010", + "lbl": "cultured cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "A cell in vitro that is or has been maintained or propagated as part of a cell culture.", + "xrefs": ["ReO:mhb"] + }, + "comments": [ + "Note that this class was re-labeled to 'cultured cell' instead of 'cell line cell', as it intent was clarified to cover any cultured cells of multicellular and unicellular organisms. This includes cells actively being cultured, or cells that have been cultured but are stored in a quiescent state for future use. In having been cultured, cells must establish homeostasis and often replicate in a foreign environment. Accomodation of this stress initiates a selection of cells fit for such challenges, wherein necessary adaptive biochemical and.or genetic changes can occur. These changes can set them apart from the in vivo cells from which they derive, and such changes will typically accumulate and change over increasing time in culture." + ], + "xrefs": [ + { + "val": "MESH:D002478" + }, + { + "val": "MO:562" + }, + { + "val": "MeSH:D002460" + }, + { + "val": "NCIt:C16403" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000015", + "lbl": "male germ cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "A germ cell that supports male gamete production. In some species, non-germ cells known as Sertoli cells also play a role in spermatogenesis.", + "xrefs": [ + "PMID:29462262", + "https://orcid.org/0000-0001-5208-3432" + ] + }, + "subsets": ["http://purl.obolibrary.org/obo/cl#cellxgene_subset"], + "xrefs": [ + { + "val": "FMA:72290" + }, + { + "val": "MA:0002765" + }, + { + "val": "VHOG:0001531" + }, + { + "val": "ncithesaurus:Spermatogenic_Cell" + } + ], + "basicPropertyValues": [ + { + "pred": "http://www.w3.org/2000/01/rdf-schema#seeAlso", + "val": "https://github.com/obophenotype/cell-ontology/issues/574" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000019", + "lbl": "sperm", + "type": "CLASS", + "meta": { + "definition": { + "val": "A mature male germ cell that develops from a spermatid.", + "xrefs": ["GOC:tfm", "MESH:D013094"] + }, + "subsets": ["http://purl.obolibrary.org/obo/cl#cellxgene_subset"], + "synonyms": [ + { + "pred": "hasExactSynonym", + "val": "sperm cell" + }, + { + "pred": "hasExactSynonym", + "val": "spermatozoid" + }, + { + "pred": "hasExactSynonym", + "val": "spermatozoon" + } + ], + "xrefs": [ + { + "val": "BTO:0001277" + }, + { + "val": "BTO:0002046" + }, + { + "val": "CALOHA:TS-0949" + }, + { + "val": "FBbt:00004954" + }, + { + "val": "FMA:67338" + }, + { + "val": "MA:0002765" + }, + { + "val": "MAT:0000131" + }, + { + "val": "MeSH:D012661" + }, + { + "val": "NCIt:C12602" + }, + { + "val": "NCIt:C13277" + }, + { + "val": "SAEL:93" + }, + { + "val": "WBbt:0005321" + }, + { + "val": "WBbt:0006798" + }, + { + "val": "ZFA:0009006" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000023", + "lbl": "oocyte", + "type": "CLASS", + "meta": { + "definition": { + "val": "A female germ cell that has entered meiosis.", + "xrefs": ["GOC:tfm", "ISBN:0721662544"] + }, + "subsets": ["http://purl.obolibrary.org/obo/cl#cellxgene_subset"], + "synonyms": [ + { + "pred": "hasRelatedSynonym", + "val": "oogonium" + } + ], + "xrefs": [ + { + "val": "BTO:0000964" + }, + { + "val": "CALOHA:TS-0711" + }, + { + "val": "FBbt:00004886" + }, + { + "val": "FMA:18644" + }, + { + "val": "MESH:D009865" + }, + { + "val": "NCIt:C12598" + }, + { + "val": "SNOMEDCT:86082002" + }, + { + "val": "WBbt:0006797" + }, + { + "val": "ZFA:0001109" + }, + { + "val": "ZFA:0009008" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000025", + "lbl": "egg cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "A female gamete where meiosis has progressed to metaphase II and is able to participate in fertilization.", + "xrefs": ["GOC:tfm", "ISBN:0721662544"] + }, + "synonyms": [ + { + "pred": "hasExactSynonym", + "val": "mature oocyte" + }, + { + "pred": "hasExactSynonym", + "val": "ovum" + } + ], + "xrefs": [ + { + "val": "BTO:0000369" + }, + { + "val": "BTO:0003801" + }, + { + "val": "CALOHA:TS-2191" + }, + { + "val": "FBbt:00057012" + }, + { + "val": "FMA:67343" + }, + { + "val": "MAT:0000213" + }, + { + "val": "MESH:D010063" + }, + { + "val": "MeSH:D010063" + }, + { + "val": "PO:0020094" + }, + { + "val": "SNOMEDCT:73153001" + }, + { + "val": "ZFA:0001570" + } + ], + "basicPropertyValues": [ + { + "pred": "http://xmlns.com/foaf/0.1/depiction", + "val": "https://www.swissbiopics.org/api/image/Egg_cell.svg" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000031", + "lbl": "neuroblast (sensu Vertebrata)", + "type": "CLASS", + "meta": { + "definition": { + "val": "A cell that will develop into a neuron often after a migration phase.", + "xrefs": ["GOC:NV", "http://en.wikipedia.org/wiki/Neuroblast"] + }, + "subsets": ["http://purl.obolibrary.org/obo/cl#cellxgene_subset"], + "synonyms": [ + { + "pred": "hasExactSynonym", + "val": "neuroblast" + } + ], + "xrefs": [ + { + "val": "BTO:0000930" + }, + { + "val": "FMA:70563" + }, + { + "val": "ZFA:0009011" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000034", + "lbl": "stem cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "A relatively undifferentiated cell that retains the ability to divide and proliferate throughout life to provide progenitor cells that can differentiate into specialized cells.", + "xrefs": ["GOC:tfm", "MESH:D013234"] + }, + "comments": [ + "This term applies to metazoan. For plant stem cells, consider using PO:0004011 ‘initial cell’ or its parent PO:0004010 ‘meristematic cell’." + ], + "subsets": [ + "http://purl.obolibrary.org/obo/cl#cellxgene_subset", + "http://purl.obolibrary.org/obo/cl#general_cell_types_upper_slim", + "http://purl.obolibrary.org/obo/uberon/core#human_reference_atlas" + ], + "synonyms": [ + { + "pred": "hasExactSynonym", + "val": "animal stem cell" + } + ], + "xrefs": [ + { + "val": "CALOHA:TS-2086" + }, + { + "val": "FMA:63368" + }, + { + "val": "NCIt:C12662" + }, + { + "val": "SNOMEDCT:419758009" + }, + { + "val": "ZFA:0005957" + } + ], + "basicPropertyValues": [ + { + "pred": "http://purl.obolibrary.org/obo/RO_0002175", + "val": "http://purl.obolibrary.org/obo/NCBITaxon_9606" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000037", + "lbl": "hematopoietic stem cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "A stem cell from which all cells of the lymphoid and myeloid lineages develop, including blood cells and cells of the immune system. Hematopoietic stem cells lack cell markers of effector cells (lin-negative). Lin-negative is defined by lacking one or more of the following cell surface markers: CD2, CD3 epsilon, CD4, CD5 ,CD8 alpha chain, CD11b, CD14, CD19, CD20, CD56, ly6G, ter119.", + "xrefs": [ + "GOC:add", + "GOC:dsd", + "GOC:tfm", + "PMID:19022770", + "http://en.wikipedia.org/wiki/Hematopoietic_stem_cell" + ] + }, + "comments": [ + "Markers differ between species, and two sets of markers have been described for mice. HSCs are reportedly CD34-positive, CD45-positive, CD48-negative, CD150-positive, CD133-positive, and CD244-negative." + ], + "subsets": [ + "http://purl.obolibrary.org/obo/cl#blood_and_immune_upper_slim", + "http://purl.obolibrary.org/obo/cl#cellxgene_subset", + "http://purl.obolibrary.org/obo/uberon/core#human_reference_atlas" + ], + "synonyms": [ + { + "pred": "hasExactSynonym", + "val": "blood forming stem cell" + }, + { + "pred": "hasExactSynonym", + "val": "hemopoietic stem cell" + }, + { + "synonymType": "http://purl.obolibrary.org/obo/OMO_0003000", + "pred": "hasRelatedSynonym", + "val": "HSC" + }, + { + "pred": "hasRelatedSynonym", + "val": "colony forming unit hematopoietic" + } + ], + "xrefs": [ + { + "val": "BTO:0000725" + }, + { + "val": "CALOHA:TS-0448" + }, + { + "val": "FMA:86475" + }, + { + "val": "MESH:D006412" + }, + { + "val": "NCIt:C12551" + }, + { + "val": "SNOMEDCT:418318001" + }, + { + "val": "VHOG:0001485" + }, + { + "val": "ZFA:0009014" + } + ], + "basicPropertyValues": [ + { + "pred": "http://purl.obolibrary.org/obo/RO_0002175", + "val": "http://purl.obolibrary.org/obo/NCBITaxon_9606" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000038", + "lbl": "erythroid progenitor cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "A progenitor cell committed to the erythroid lineage.", + "xrefs": ["GOC:add", "ISBN:0721601464"] + }, + "subsets": [ + "http://purl.obolibrary.org/obo/cl#blood_and_immune_upper_slim", + "http://purl.obolibrary.org/obo/cl#cellxgene_subset" + ], + "synonyms": [ + { + "synonymType": "http://purl.obolibrary.org/obo/OMO_0003000", + "pred": "hasRelatedSynonym", + "val": "BFU-E" + }, + { + "synonymType": "http://purl.obolibrary.org/obo/OMO_0003000", + "pred": "hasRelatedSynonym", + "val": "CFU-E" + }, + { + "pred": "hasRelatedSynonym", + "val": "blast forming unit erythroid" + }, + { + "pred": "hasRelatedSynonym", + "val": "burst forming unit erythroid" + }, + { + "pred": "hasRelatedSynonym", + "val": "colony forming unit erythroid" + }, + { + "pred": "hasRelatedSynonym", + "val": "erythroid stem cell" + } + ], + "xrefs": [ + { + "val": "BTO:0004911" + }, + { + "val": "NCIt:C12526" + }, + { + "val": "ZFA:0009015" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000040", + "lbl": "monoblast", + "type": "CLASS", + "meta": { + "definition": { + "val": "A myeloid progenitor cell committed to the monocyte lineage. This cell is CD11b-positive, has basophilic cytoplasm, euchromatin, and the presence of a nucleolus.", + "xrefs": [ + "GOC:add", + "PMID:1104740", + "http://en.wikipedia.org/wiki/Monoblast", + "http://www.copewithcytokines.de" + ] + }, + "comments": [ + "Morphology: mononuclear cell, diameter 12-20 _M, non-granular, N/C ratio 3/1 - 4/1; markers: CD11b (shared with many other myeloid cells); location: Adult: bone marrow; fetal: liver, Yolk Sac; role or process: hematopoiesis, monocyte development; lineage: hematopoietic, myeloid." + ], + "synonyms": [ + { + "synonymType": "http://purl.obolibrary.org/obo/OMO_0003000", + "pred": "hasRelatedSynonym", + "val": "CFU-M" + }, + { + "pred": "hasRelatedSynonym", + "val": "colony forming unit macrophage" + }, + { + "pred": "hasRelatedSynonym", + "val": "colony forming unit monocyte" + }, + { + "pred": "hasRelatedSynonym", + "val": "monocyte stem cell" + } + ], + "xrefs": [ + { + "val": "CALOHA:TS-1195" + }, + { + "val": "FMA:83553" + }, + { + "val": "NCIt:C13014" + }, + { + "val": "SNOMEDCT:53945006" + }, + { + "val": "ZFA:0009017" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000041", + "lbl": "mature eosinophil", + "type": "CLASS", + "meta": { + "definition": { + "val": "A fully differentiated eosinophil, a granular leukocyte with a nucleus that usually has two lobes connected by one or more slender threads, and cytoplasm containing coarse, round granules that are uniform in size and which can be stained by the dye eosin. Cells are also differentiated from other granulocytes by a small nuclear-to-cytoplasm ratio (1:3). This cell type is CD49d-positive.", + "xrefs": [ + "GOC:add", + "GOC:dsd", + "GOC:tfm", + "ISBN:0721601464", + "http://www.cap.org" + ] + }, + "comments": [ + "Eosinophils are CD125-positive (IL-5R), GM-CSFR-positive, IL-3R-positive, VLA4-positive. They can also express MHC Class I & II, CD4, CD9, CD11a, CD11b, CD11c, CD13, CD15, CD16, CD17, CD18, CD24, CD25,CD28, CD29, CD32, CD33, CD35, CD37, CD39, CD43, CD44, CD45, CD45RB, CD45RO, CD46, CD47, CD48, CD49d, CD49f, CD50, CD52, CD53, CD54, CD55, CD58, CD59, CD62L, CD63, CD65, CD66, CD69, CD71, CD76, CD80, CD81, CD82, CD86, CD87, CD88, CD89, CD92, CD95, CD97, CD98, CD99, CD100, CD101, CD116, CD117, CD119, CD120, CD123, CD124, CD125, CD131, CD137, CD139, CD148, CD149, CD151, CD153, CD156, CD162, CD161, CD162, CD165, CD174, CD182, CD183, CD191, CD192, CD193, CD196, CD213, IL9R, ad integrin, beta-7 integrin, FceRI, IL13Ra1, TGFbR, PAFR, LTB4R, C3aR, CystLT1R, CystLT2R, fMLPR, CRTH2 (PGD2 receptor), histamine 4R, IDO, KYN, PAR-2, Siglec-8, Siglec-10, LIR1, LIR2, LIR3, LIR7, TLR7, TLR8, and VLA-4. Eosinophils can also secrete CXCL1, eotaxin-1, GM-CSF, IL-2, IL-3, IL-4, IL-5, IL-6, IL-8, IL-10, IL-12, IL-13, IL-16, IL-18, IFN-gamma, LTC4, MIP-1alpha, NGF, PAF, RANTES, substance P, TGF-alpha, TGF-beta, TNF-alpha, and VIP." + ], + "subsets": [ + "http://purl.obolibrary.org/obo/uberon/core#human_reference_atlas" + ], + "synonyms": [ + { + "pred": "hasBroadSynonym", + "val": "polymorphonuclear leucocyte" + }, + { + "pred": "hasBroadSynonym", + "val": "polymorphonuclear leukocyte" + }, + { + "pred": "hasExactSynonym", + "val": "mature eosinocyte" + }, + { + "pred": "hasExactSynonym", + "val": "mature eosinophil leucocyte" + }, + { + "pred": "hasExactSynonym", + "val": "mature eosinophil leukocyte" + } + ], + "basicPropertyValues": [ + { + "pred": "http://purl.obolibrary.org/obo/IAO_0000116", + "val": "The status of eosinophils as true professional antigen presenting cells is unclear, despite their ability to present exogenous peptides and peptides processed from exogenous proteins (in certain studies) via MHC Class II and activate T cells. Per the equivalence axioms, 'eosinophil' is reasoned to be a subclass of 'professional antigen presenting cell', though the role of eosinophils as such in the body may be limited." + }, + { + "pred": "http://purl.obolibrary.org/obo/RO_0002175", + "val": "http://purl.obolibrary.org/obo/NCBITaxon_9606" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000042", + "lbl": "neutrophilic myeloblast", + "type": "CLASS", + "meta": { + "definition": { + "val": "A myeloblast committed to the neutrophil lineage. This cell type is GATA-1 positive, C/EBPa-positive, AML-1-positive, c-myb-positive and has low expression of PU.1 transcription factor.", + "xrefs": [ + "GOC:add", + "ISBN:0721601464", + "PMID:12560239", + "PMID:15514007" + ] + }, + "comments": [ + "These cells are CD11b-negative, CD15-negative, CD16-negative, CD35-negative, CD49d-positive, CD68-positive, lactotransferrin-negative, and fMLP receptor-negative. They are found in the Band 3 fraction." + ], + "synonyms": [ + { + "pred": "hasRelatedSynonym", + "val": "neutrophilic granuloblast" + } + ], + "xrefs": [ + { + "val": "ZFA:0009018" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000043", + "lbl": "mature basophil", + "type": "CLASS", + "meta": { + "definition": { + "val": "A fully differentiated basophil, a granular leukocyte with an irregularly shaped, pale-staining nucleus that is partially constricted into two lobes, and with cytoplasm that contains coarse granules of variable size. Basophils contain vasoactive amines such as histamine and serotonin, which are released on appropriate stimulation.", + "xrefs": [ + "GOC:add", + "GOC:dsd", + "GOC:tfm", + "ISBN:0721601464", + "PMID:18466030", + "PMID:19231594", + "PMID:20837449", + "http://www.cap.org" + ] + }, + "comments": [ + "Mature basophils are also capable of producing IL-3, IL-5, IL-6, IL-8, IL-13, IL-25, CCL22, tslp, vegf, and LTC4." + ], + "synonyms": [ + { + "pred": "hasBroadSynonym", + "val": "polymorphonuclear leucocyte" + }, + { + "pred": "hasBroadSynonym", + "val": "polymorphonuclear leukocyte" + }, + { + "pred": "hasExactSynonym", + "val": "mature basophil leucocyte" + }, + { + "pred": "hasExactSynonym", + "val": "mature basophil leukocyte" + } + ], + "xrefs": [ + { + "val": "BTO:0001026" + }, + { + "val": "CALOHA:TS-0688" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000047", + "lbl": "neural stem cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "An undifferentiated neural cell that originates from the neuroectoderm and has the capacity both to perpetually self-renew without differentiating and to generate multiple central nervous system neuronal and glial cell types.", + "xrefs": ["PMID:30639325"] + }, + "subsets": ["http://purl.obolibrary.org/obo/cl#cellxgene_subset"], + "synonyms": [ + { + "pred": "hasExactSynonym", + "val": "NSC" + }, + { + "pred": "hasExactSynonym", + "val": "neuronal stem cell", + "xrefs": ["PMID:16305818"] + } + ], + "xrefs": [ + { + "val": "BTO:0002881" + }, + { + "val": "CALOHA:TS-2360" + }, + { + "val": "FMA:86684" + }, + { + "val": "NCIt:C12985" + }, + { + "val": "ZFA:0009019" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000048", + "lbl": "multi fate stem cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "A stem cell that can give rise to multiple lineages of cells.", + "xrefs": ["GOC:add"] + }, + "synonyms": [ + { + "pred": "hasExactSynonym", + "val": "multi-fate stem cell" + }, + { + "pred": "hasExactSynonym", + "val": "multifate stem cell" + }, + { + "pred": "hasExactSynonym", + "val": "multipotent cell" + }, + { + "pred": "hasExactSynonym", + "val": "multipotent stem cell" + } + ], + "xrefs": [ + { + "val": "FMA:84789" + }, + { + "val": "ZFA:0009020" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000049", + "lbl": "common myeloid progenitor", + "type": "CLASS", + "meta": { + "definition": { + "val": "A progenitor cell committed to myeloid lineage, including the megakaryocyte and erythroid lineages.", + "xrefs": ["GOC:add", "ISBN:0878932437", "MESH:D023461"] + }, + "comments": [ + "This cell type is intended to be compatible with any vertebrate common myeloid progenitor. For mammalian CMP known to be CD34-positive, please use the term 'common myeloid progenitor, CD34-positive' (CL_0001059)." + ], + "subsets": [ + "http://purl.obolibrary.org/obo/cl#cellxgene_subset", + "http://purl.obolibrary.org/obo/uberon/core#human_reference_atlas" + ], + "synonyms": [ + { + "pred": "hasExactSynonym", + "val": "common myeloid precursor" + }, + { + "synonymType": "http://purl.obolibrary.org/obo/OMO_0003000", + "pred": "hasRelatedSynonym", + "val": "CFU-GEMM", + "xrefs": ["ISBN:0878932437"] + }, + { + "synonymType": "http://purl.obolibrary.org/obo/OMO_0003000", + "pred": "hasRelatedSynonym", + "val": "CFU-S", + "xrefs": ["ISBN:0878932437"] + }, + { + "synonymType": "http://purl.obolibrary.org/obo/OMO_0003000", + "pred": "hasRelatedSynonym", + "val": "CMP", + "xrefs": ["ISBN:0878932437"] + }, + { + "pred": "hasRelatedSynonym", + "val": "colony forming unit granulocyte, erythrocyte, macrophage, and megakaryocyte", + "xrefs": ["ISBN:0878932437"] + }, + { + "pred": "hasRelatedSynonym", + "val": "multipotential myeloid stem cell", + "xrefs": ["ISBN:0878932437"] + }, + { + "pred": "hasRelatedSynonym", + "val": "myeloid stem cell", + "xrefs": ["ISBN:0878932437"] + }, + { + "pred": "hasRelatedSynonym", + "val": "pluripotent stem cell (bone marrow)", + "xrefs": ["ISBN:0878932437"] + } + ], + "xrefs": [ + { + "val": "BTO:0004730" + }, + { + "val": "ZFA:0009021" + } + ], + "basicPropertyValues": [ + { + "pred": "http://purl.obolibrary.org/obo/RO_0002175", + "val": "http://purl.obolibrary.org/obo/NCBITaxon_9606" + } + ] + } + }, + { + "id": "http://purl.obolibrary.org/obo/CL_0000050", + "lbl": "megakaryocyte-erythroid progenitor cell", + "type": "CLASS", + "meta": { + "definition": { + "val": "A progenitor cell committed to the megakaryocyte and erythroid lineages.", + "xrefs": [ + "GOC:add", + "GOC:dsd", + "GOC:tfm", + "MESH:D055015", + "PMID:16647566", + "http://en.wikipedia.org/wiki/Megakaryocyte-erythroid_progenitor_cell" + ] + }, + "comments": [ + "MEPs are reportedly CD19-negative, CD34-negative, CD45RA-negative, CD110-positive, CD117-positive, and SCA1-negative and reportedly express the transcription factors GATA-1 and NF-E2." + ], + "subsets": [ + "http://purl.obolibrary.org/obo/cl#cellxgene_subset", + "http://purl.obolibrary.org/obo/uberon/core#human_reference_atlas" + ], + "synonyms": [ + { + "pred": "hasExactSynonym", + "val": "CFU-EM" + }, + { + "pred": "hasExactSynonym", + "val": "CFU-MegE" + }, + { + "pred": "hasExactSynonym", + "val": "MEP" + }, + { + "pred": "hasExactSynonym", + "val": "Meg/E progenitor" + }, + { + "pred": "hasExactSynonym", + "val": "colony forming unit erythroid megakaryocyte" + }, + { + "pred": "hasExactSynonym", + "val": "megakaryocyte/erythrocyte progenitor" + }, + { + "pred": "hasExactSynonym", + "val": "megakaryocyte/erythroid progenitor cell" + } + ], + "xrefs": [ + { + "val": "ZFA:0009022" + } + ], + "basicPropertyValues": [ + { + "pred": "http://purl.obolibrary.org/obo/RO_0002175", + "val": "http://purl.obolibrary.org/obo/NCBITaxon_9606" + } + ] + } + } + ], + "edges": [ + { + "sub": "http://dbpedia.org/resource/China", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/NCIT_C25464" + }, + { + "sub": "http://dbpedia.org/resource/India", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/NCIT_C25464" + }, + { + "sub": "http://dbpedia.org/resource/Iran", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/NCIT_C25464" + }, + { + "sub": "http://dbpedia.org/resource/Japan", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/NCIT_C25464" + }, + { + "sub": "http://dbpedia.org/resource/North_Korea", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/NCIT_C25464" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000000", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/GO_0005575" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000002", + "pred": "is_a", + "obj": "http://www.w3.org/2002/07/owl#Thing" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000007", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0002321" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000010", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000010", + "pred": "http://purl.obolibrary.org/obo/RO_0000056", + "obj": "http://www.ebi.ac.uk/efo/EFO_0002694" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000015", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000586" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000019", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000015" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000019", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/UBERON_0000061" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000019", + "pred": "http://purl.obolibrary.org/obo/BFO_0000050", + "obj": "http://purl.obolibrary.org/obo/UBERON_0000079" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000023", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000586" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000023", + "pred": "http://purl.obolibrary.org/obo/BFO_0000050", + "obj": "http://purl.obolibrary.org/obo/UBERON_0000474" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000025", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000586" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000025", + "pred": "is_a", + "obj": "http://www.ebi.ac.uk/efo/EFO_0000988" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000031", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000055" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000034", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_1000497", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0008019", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002494", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002351", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002319", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002319", + "pred": "is_a", + "obj": "http://www.ebi.ac.uk/efo/EFO_0002963" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002319", + "pred": "http://purl.obolibrary.org/obo/RO_0001025", + "obj": "http://purl.obolibrary.org/obo/UBERON_0001016" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002321", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0002092", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0001063", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000988", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000670", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000628", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000586", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000520", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000520", + "pred": "is_a", + "obj": "http://www.ebi.ac.uk/efo/EFO_0000324" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000521", + "pred": "is_a", + "obj": "http://purl.obolibrary.org/obo/CL_0000000" + }, + { + "sub": "http://purl.obolibrary.org/obo/CL_0000521", + "pred": "is_a", + "obj": "http://www.ebi.ac.uk/efo/EFO_0000324" + } + ] + } + ] +} diff --git a/tests/gentropy/datasource/biosample_ontologies/test_biosample_ontology.py b/tests/gentropy/datasource/biosample_ontologies/test_biosample_ontology.py index b88623b0d..a9099048c 100644 --- a/tests/gentropy/datasource/biosample_ontologies/test_biosample_ontology.py +++ b/tests/gentropy/datasource/biosample_ontologies/test_biosample_ontology.py @@ -16,24 +16,27 @@ class TestOntologyParger: SAMPLE_CELL_ONTOLOGY_PATH = "tests/gentropy/data_samples/cell_ontology_sample.json" SAMPLE_UBERON_PATH = "tests/gentropy/data_samples/uberon_sample.json" + SAMPLE_EFO_PATH = "tests/gentropy/data_samples/efo_biosample_sample.json" - def test_cell_ontology_parser( - self: TestOntologyParger, spark: SparkSession - ) -> None: - """Test cell ontology parser.""" + def test_ontology_parser(self: TestOntologyParger, spark: SparkSession) -> None: + """Test all ontology parsers.""" cell_ontology = extract_ontology_from_json( self.SAMPLE_CELL_ONTOLOGY_PATH, spark ) + uberon = extract_ontology_from_json(self.SAMPLE_UBERON_PATH, spark) + efo_cell_line = extract_ontology_from_json( + self.SAMPLE_EFO_PATH, spark + ).retain_rows_with_ancestor_id(["CL_0000000"]) + assert isinstance( cell_ontology, BiosampleIndex ), "Cell ontology subset is not parsed correctly to BiosampleIndex." - - def test_uberon_parser(self: TestOntologyParger, spark: SparkSession) -> None: - """Test uberon parser.""" - uberon = extract_ontology_from_json(self.SAMPLE_UBERON_PATH, spark) assert isinstance( uberon, BiosampleIndex ), "Uberon subset is not parsed correctly to BiosampleIndex." + assert isinstance( + efo_cell_line, BiosampleIndex + ), "EFO cell line subset is not parsed correctly to BiosampleIndex." def test_merge_biosample_indices( self: TestOntologyParger, spark: SparkSession @@ -43,8 +46,9 @@ def test_merge_biosample_indices( self.SAMPLE_CELL_ONTOLOGY_PATH, spark ) uberon = extract_ontology_from_json(self.SAMPLE_UBERON_PATH, spark) + efo = extract_ontology_from_json(self.SAMPLE_EFO_PATH, spark) - merged = cell_ontology.merge_indices([uberon]) + merged = cell_ontology.merge_indices([uberon, efo]) assert isinstance( merged, BiosampleIndex ), "Merging of biosample indices is not correct." From 7b20d55275bae4baefd95774b7279ea362635a80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Mon, 21 Oct 2024 15:20:48 +0100 Subject: [PATCH 120/188] feat(l2g): extend colocalisation neighbourhood metrics to missing genes in the vicinity (#851) * feat(l2g): wip - limit colocalisation neighbourhood to protein coding genes * feat: adjust logic in `common_neighbourhood_colocalisation_feature_logic` * fix: correct logic in `common_neighbourhood_vep_feature_logic` * chore: adjust tests * chore: wip * feat: add `extend_missing_colocalisation_to_neighbourhood_genes` * feat: add variant index as a dependency to the colocalisation nbh features * test: add `test_extend_missing_colocalisation_to_neighbourhood_genes` * chore: minor * chore: fixes tests --- .../datasets/l2g_features/colocalisation.md | 1 + .../dataset/l2g_features/colocalisation.py | 117 +++++++++- src/gentropy/dataset/l2g_features/vep.py | 2 +- tests/gentropy/dataset/test_l2g_feature.py | 211 ++++++++++++------ 4 files changed, 252 insertions(+), 79 deletions(-) diff --git a/docs/python_api/datasets/l2g_features/colocalisation.md b/docs/python_api/datasets/l2g_features/colocalisation.md index c38c690d7..2b2680e66 100644 --- a/docs/python_api/datasets/l2g_features/colocalisation.md +++ b/docs/python_api/datasets/l2g_features/colocalisation.md @@ -20,4 +20,5 @@ title: From colocalisation ## Common logic ::: gentropy.dataset.l2g_features.colocalisation.common_colocalisation_feature_logic +::: gentropy.dataset.l2g_features.colocalisation.extend_missing_colocalisation_to_neighbourhood_genes ::: gentropy.dataset.l2g_features.colocalisation.common_neighbourhood_colocalisation_feature_logic diff --git a/src/gentropy/dataset/l2g_features/colocalisation.py b/src/gentropy/dataset/l2g_features/colocalisation.py index 319a128da..c7161dc55 100644 --- a/src/gentropy/dataset/l2g_features/colocalisation.py +++ b/src/gentropy/dataset/l2g_features/colocalisation.py @@ -13,6 +13,7 @@ from gentropy.dataset.l2g_gold_standard import L2GGoldStandard from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.variant_index import VariantIndex if TYPE_CHECKING: from pyspark.sql import DataFrame @@ -68,6 +69,54 @@ def common_colocalisation_feature_logic( ) +def extend_missing_colocalisation_to_neighbourhood_genes( + feature_name: str, + local_features: DataFrame, + variant_index: VariantIndex, + gene_index: GeneIndex, + study_locus: StudyLocus, +) -> DataFrame: + """This function creates an artificial dataset of features that represents the missing colocalisation to the neighbourhood genes. + + Args: + feature_name (str): The name of the feature to extend + local_features (DataFrame): The dataframe of features to extend + variant_index (VariantIndex): Variant index containing all variant/gene relationships + gene_index (GeneIndex): Gene index to fetch the gene information + study_locus (StudyLocus): Study locus to traverse between colocalisation and variant index + + Returns: + DataFrame: Dataframe of features that include genes in the neighbourhood not present in the colocalisation results. For these genes, the feature value is set to 0. + """ + coding_variant_gene_lut = ( + variant_index.df.select( + "variantId", f.explode("transcriptConsequences").alias("tc") + ) + .select(f.col("tc.targetId").alias("geneId"), "variantId") + .join(gene_index.df.select("geneId", "biotype"), "geneId", "left") + .filter(f.col("biotype") == "protein_coding") + .drop("biotype") + .distinct() + ) + local_features_w_variant = local_features.join( + study_locus.df.select("studyLocusId", "variantId"), "studyLocusId" + ) + return ( + # Get the genes that are not present in the colocalisation results + coding_variant_gene_lut.join( + local_features_w_variant, ["variantId", "geneId"], "left_anti" + ) + # We now link the missing variant/gene to the study locus from the original dataframe + .join( + local_features_w_variant.select("studyLocusId", "variantId").distinct(), + "variantId", + ) + .drop("variantId") + # Fill the information for missing genes with 0 + .withColumn(feature_name, f.lit(0.0)) + ) + + def common_neighbourhood_colocalisation_feature_logic( study_loci_to_annotate: StudyLocus | L2GGoldStandard, colocalisation_method: str, @@ -79,6 +128,7 @@ def common_neighbourhood_colocalisation_feature_logic( study_index: StudyIndex, gene_index: GeneIndex, study_locus: StudyLocus, + variant_index: VariantIndex, ) -> DataFrame: """Wrapper to call the logic that creates a type of colocalisation features. @@ -92,6 +142,7 @@ def common_neighbourhood_colocalisation_feature_logic( study_index (StudyIndex): Study index to fetch study type and gene gene_index (GeneIndex): Gene index to add gene type study_locus (StudyLocus): Study locus to traverse between colocalisation and study index + variant_index (VariantIndex): Variant index to annotate all overlapping genes Returns: DataFrame: Feature annotation in long format with the columns: studyLocusId, geneId, featureName, featureValue @@ -107,11 +158,23 @@ def common_neighbourhood_colocalisation_feature_logic( colocalisation=colocalisation, study_index=study_index, study_locus=study_locus, - ).join(gene_index.df.select("geneId", "biotype"), "geneId", "left") + ) + extended_local_max = local_max.unionByName( + extend_missing_colocalisation_to_neighbourhood_genes( + local_feature_name, + local_max, + variant_index, + gene_index, + study_locus, + ) + ) # Compute average score in the vicinity (feature will be the same for any gene associated with a studyLocus) # (non protein coding genes in the vicinity are excluded see #3552) regional_mean_per_study_locus = ( - local_max.filter(f.col("biotype") == "protein_coding") + extended_local_max.join( + gene_index.df.select("geneId", "biotype"), "geneId", "left" + ) + .filter(f.col("biotype") == "protein_coding") .groupBy("studyLocusId") .agg(f.mean(local_feature_name).alias("regional_mean")) ) @@ -121,7 +184,7 @@ def common_neighbourhood_colocalisation_feature_logic( feature_name, f.col(local_feature_name) - f.coalesce(f.col("regional_mean"), f.lit(0.0)), ) - .drop("regional_mean", local_feature_name, "biotype") + .drop("regional_mean", local_feature_name) ) @@ -171,7 +234,13 @@ def compute( class EQtlColocClppMaximumNeighbourhoodFeature(L2GFeature): """Max CLPP for each (study, locus) aggregating over all eQTLs.""" - feature_dependency_type = [Colocalisation, StudyIndex, GeneIndex, StudyLocus] + feature_dependency_type = [ + Colocalisation, + StudyIndex, + GeneIndex, + StudyLocus, + VariantIndex, + ] feature_name = "eQtlColocClppMaximumNeighbourhood" @classmethod @@ -256,7 +325,13 @@ def compute( class PQtlColocClppMaximumNeighbourhoodFeature(L2GFeature): """Max CLPP for each (study, locus, gene) aggregating over all pQTLs.""" - feature_dependency_type = [Colocalisation, StudyIndex, GeneIndex, StudyLocus] + feature_dependency_type = [ + Colocalisation, + StudyIndex, + GeneIndex, + StudyLocus, + VariantIndex, + ] feature_name = "pQtlColocClppMaximumNeighbourhood" @classmethod @@ -340,7 +415,13 @@ def compute( class SQtlColocClppMaximumNeighbourhoodFeature(L2GFeature): """Max CLPP for each (study, locus, gene) aggregating over all sQTLs.""" - feature_dependency_type = [Colocalisation, StudyIndex, GeneIndex, StudyLocus] + feature_dependency_type = [ + Colocalisation, + StudyIndex, + GeneIndex, + StudyLocus, + VariantIndex, + ] feature_name = "sQtlColocClppMaximumNeighbourhood" @classmethod @@ -424,7 +505,13 @@ def compute( class EQtlColocH4MaximumNeighbourhoodFeature(L2GFeature): """Max H4 for each (study, locus) aggregating over all eQTLs.""" - feature_dependency_type = [Colocalisation, StudyIndex, GeneIndex, StudyLocus] + feature_dependency_type = [ + Colocalisation, + StudyIndex, + GeneIndex, + StudyLocus, + VariantIndex, + ] feature_name = "eQtlColocH4MaximumNeighbourhood" @classmethod @@ -508,7 +595,13 @@ def compute( class PQtlColocH4MaximumNeighbourhoodFeature(L2GFeature): """Max H4 for each (study, locus) aggregating over all pQTLs.""" - feature_dependency_type = [Colocalisation, StudyIndex, GeneIndex, StudyLocus] + feature_dependency_type = [ + Colocalisation, + StudyIndex, + GeneIndex, + StudyLocus, + VariantIndex, + ] feature_name = "pQtlColocH4MaximumNeighbourhood" @classmethod @@ -592,7 +685,13 @@ def compute( class SQtlColocH4MaximumNeighbourhoodFeature(L2GFeature): """Max H4 for each (study, locus) aggregating over all sQTLs.""" - feature_dependency_type = [Colocalisation, StudyIndex, GeneIndex, StudyLocus] + feature_dependency_type = [ + Colocalisation, + StudyIndex, + GeneIndex, + StudyLocus, + VariantIndex, + ] feature_name = "sQtlColocH4MaximumNeighbourhood" @classmethod diff --git a/src/gentropy/dataset/l2g_features/vep.py b/src/gentropy/dataset/l2g_features/vep.py index 557d9a509..91b03d57b 100644 --- a/src/gentropy/dataset/l2g_features/vep.py +++ b/src/gentropy/dataset/l2g_features/vep.py @@ -114,7 +114,7 @@ def common_neighbourhood_vep_feature_logic( feature_name, f.col(local_feature_name) - f.coalesce(f.col("regional_mean"), f.lit(0.0)), ) - .drop("regional_metric", local_feature_name, "biotype") + .drop("regional_mean", local_feature_name, "biotype") ) diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py index bd6fca97c..212ad32a6 100644 --- a/tests/gentropy/dataset/test_l2g_feature.py +++ b/tests/gentropy/dataset/test_l2g_feature.py @@ -34,6 +34,7 @@ SQtlColocH4MaximumNeighbourhoodFeature, common_colocalisation_feature_logic, common_neighbourhood_colocalisation_feature_logic, + extend_missing_colocalisation_to_neighbourhood_genes, ) from gentropy.dataset.l2g_features.distance import ( DistanceFootprintMeanFeature, @@ -135,6 +136,11 @@ def sample_gene_index(spark: SparkSession) -> GeneIndex: "biotype": "lncRNA", "chromosome": "1", }, + { + "geneId": "gene3", + "biotype": "protein_coding", + "chromosome": "1", + }, ], GeneIndex.get_schema(), ), @@ -142,6 +148,66 @@ def sample_gene_index(spark: SparkSession) -> GeneIndex: ) +@pytest.fixture(scope="module") +def sample_variant_index(spark: SparkSession) -> VariantIndex: + """Create a sample variant index for testing.""" + return VariantIndex( + _df=spark.createDataFrame( + [ + ( + "var1", + "chrom", + 1, + "A", + "T", + [ + { + "targetId": "gene1", + "consequenceScore": 0.66, + "isEnsemblCanonical": True, + }, + { + "targetId": "gene2", + "consequenceScore": 1.0, + "isEnsemblCanonical": True, + }, + { + "targetId": "gene3", + "consequenceScore": 0.0, + "isEnsemblCanonical": True, + }, + ], + ), + ], + schema=StructType( + [ + StructField("variantId", StringType(), True), + StructField("chromosome", StringType(), True), + StructField("position", IntegerType(), True), + StructField("referenceAllele", StringType(), True), + StructField("alternateAllele", StringType(), True), + StructField( + "transcriptConsequences", + ArrayType( + StructType( + [ + StructField("targetId", StringType(), True), + StructField( + "isEnsemblCanonical", BooleanType(), True + ), + StructField("consequenceScore", FloatType(), True), + ] + ) + ), + True, + ), + ] + ), + ), + _schema=VariantIndex.get_schema(), + ) + + class TestCommonColocalisationFeatureLogic: """Test the common logic of the colocalisation features.""" @@ -183,10 +249,46 @@ def test__common_colocalisation_feature_logic( observed_df.collect() == expected_df.collect() ), "The feature values are not as expected." - def test__common_neighbourhood_colocalisation_feature_logic( + def test_extend_missing_colocalisation_to_neighbourhood_genes( self: TestCommonColocalisationFeatureLogic, spark: SparkSession, sample_gene_index: GeneIndex, + sample_variant_index: VariantIndex, + ) -> None: + """Test the extend_missing_colocalisation_to_neighbourhood_genes function.""" + local_features = spark.createDataFrame( + [ + { + "studyLocusId": "1", + "geneId": "gene1", + "eQtlColocH4Maximum": 0.81, + }, + { + "studyLocusId": "1", + "geneId": "gene2", + "eQtlColocH4Maximum": 0.9, + }, + ], + ) + observed_df = extend_missing_colocalisation_to_neighbourhood_genes( + feature_name="eQtlColocH4Maximum", + local_features=local_features, + variant_index=sample_variant_index, + gene_index=sample_gene_index, + study_locus=self.sample_study_locus, + ).select("studyLocusId", "geneId", "eQtlColocH4Maximum") + expected_df = spark.createDataFrame( + [{"geneId": "gene3", "studyLocusId": "1", "eQtlColocH4Maximum": 0.0}] + ).select("studyLocusId", "geneId", "eQtlColocH4Maximum") + assert ( + observed_df.collect() == expected_df.collect() + ), "The feature values are not as expected." + + def test_common_neighbourhood_colocalisation_feature_logic( + self: TestCommonColocalisationFeatureLogic, + spark: SparkSession, + sample_gene_index: GeneIndex, + sample_variant_index: VariantIndex, ) -> None: """Test the common logic of the neighbourhood colocalisation features.""" feature_name = "eQtlColocH4MaximumNeighbourhood" @@ -200,19 +302,20 @@ def test__common_neighbourhood_colocalisation_feature_logic( study_index=self.sample_studies, study_locus=self.sample_study_locus, gene_index=sample_gene_index, - ).withColumn(feature_name, f.round(f.col(feature_name), 2)) - # expected average is (0.81)/1 = 0.81 + variant_index=sample_variant_index, + ).withColumn(feature_name, f.round(f.col(feature_name), 3)) + # expected average is (0.81 + 0)/2 = 0.405 expected_df = spark.createDataFrame( [ { "studyLocusId": "1", "geneId": "gene1", - "eQtlColocH4MaximumNeighbourhood": 0.0, # 0.81 - 0.81 + "eQtlColocH4MaximumNeighbourhood": 0.405, # 0.81 - 0.405 }, { "studyLocusId": "1", "geneId": "gene2", - "eQtlColocH4MaximumNeighbourhood": 0.09, # 0.9 - 0.81 + "eQtlColocH4MaximumNeighbourhood": 0.495, # 0.9 - 0.405 }, ], ).select("studyLocusId", "geneId", "eQtlColocH4MaximumNeighbourhood") @@ -232,7 +335,7 @@ def _setup(self: TestCommonColocalisationFeatureLogic, spark: SparkSession) -> N [ { "studyLocusId": "1", - "variantId": "lead1", + "variantId": "var1", "studyId": "study1", # this is a GWAS "chromosome": "1", }, @@ -281,25 +384,25 @@ def _setup(self: TestCommonColocalisationFeatureLogic, spark: SparkSession) -> N [ { "studyLocusId": "1", - "variantId": "lead1", + "variantId": "var1", "studyId": "study1", # this is a GWAS "chromosome": "1", }, { "studyLocusId": "2", - "variantId": "lead1", + "variantId": "var1", "studyId": "study2", # this is a QTL (same gee) "chromosome": "1", }, { "studyLocusId": "3", - "variantId": "lead1", + "variantId": "var1", "studyId": "study3", # this is another QTL (same gene) "chromosome": "1", }, { "studyLocusId": "4", - "variantId": "lead1", + "variantId": "var1", "studyId": "study4", # this is another QTL (diff gene) "chromosome": "1", }, @@ -533,6 +636,11 @@ class TestCommonVepFeatureLogic: ( "vepMean", [ + { + "studyLocusId": "1", + "geneId": "gene3", + "vepMean": "0.00", + }, { "studyLocusId": "1", "geneId": "gene1", @@ -548,6 +656,11 @@ class TestCommonVepFeatureLogic: ( "vepMaximum", [ + { + "studyLocusId": "1", + "geneId": "gene3", + "vepMaximum": "0.00", + }, { "studyLocusId": "1", "geneId": "gene1", @@ -567,12 +680,13 @@ def test_common_vep_feature_logic( spark: SparkSession, feature_name: str, expected_data: dict[str, Any], + sample_variant_index: VariantIndex, ) -> None: """Test the logic of the function that extracts features from VEP's functional consequences.""" observed_df = ( common_vep_feature_logic( self.sample_study_locus, - variant_index=self.sample_variant_index, + variant_index=sample_variant_index, feature_name=feature_name, ) .orderBy(feature_name) @@ -593,17 +707,22 @@ def test_common_neighbourhood_vep_feature_logic_no_protein_coding( self: TestCommonVepFeatureLogic, spark: SparkSession, sample_gene_index: GeneIndex, + sample_variant_index: VariantIndex, ) -> None: """Test the logic of the function that extracts the maximum severity score for a gene given the average of the maximum scores for all protein coding genes in the vicinity. Because the genes in the vicinity are all non coding, the neighbourhood features should equal the local ones. """ feature_name = "vepMaximumNeighbourhood" + non_protein_coding_gene_index = GeneIndex( + _df=sample_gene_index.df.filter(f.col("geneId") != "gene3"), + _schema=GeneIndex.get_schema(), + ) observed_df = ( common_neighbourhood_vep_feature_logic( self.sample_study_locus, - variant_index=self.sample_variant_index, - gene_index=sample_gene_index, + variant_index=sample_variant_index, + gene_index=non_protein_coding_gene_index, feature_name=feature_name, ) .withColumn(feature_name, f.round(f.col(feature_name), 2)) @@ -616,7 +735,7 @@ def test_common_neighbourhood_vep_feature_logic_no_protein_coding( ( ["1", "gene1", 0.0], ["1", "gene2", 0.34], - ), # (0.66-0.66) and (1.0 -0.66) + ), # (0.66-0.66) and (1.0-0.66) ["studyLocusId", "geneId", feature_name], ) .orderBy(feature_name) @@ -630,13 +749,14 @@ def test_common_neighbourhood_vep_feature_logic( self: TestCommonVepFeatureLogic, spark: SparkSession, sample_gene_index: GeneIndex, + sample_variant_index: VariantIndex, ) -> None: """Test the logic of the function that extracts the maximum severity score for a gene given the average of the maximum scores for all protein coding genes in the vicinity.""" feature_name = "vepMaximumNeighbourhood" observed_df = ( common_neighbourhood_vep_feature_logic( self.sample_study_locus, - variant_index=self.sample_variant_index, + variant_index=sample_variant_index, gene_index=sample_gene_index, feature_name=feature_name, ) @@ -645,11 +765,16 @@ def test_common_neighbourhood_vep_feature_logic( ) expected_df = ( spark.createDataFrame( - (["1", "gene1", 0.0], ["1", "gene2", 0.34]), + # regional mean is 0.66/2 = 0.33 + ( + ["1", "gene3", -0.33], + ["1", "gene1", 0.33], + ["1", "gene2", 0.67], + ), # (0 - 0.33) and (0.66-0.33) and (1.0 -0.33) ["studyLocusId", "geneId", feature_name], ) - .select("studyLocusId", "geneId", feature_name) .orderBy(feature_name) + .select("studyLocusId", "geneId", feature_name) ) assert ( observed_df.collect() == expected_df.collect() @@ -678,55 +803,3 @@ def _setup(self: TestCommonVepFeatureLogic, spark: SparkSession) -> None: ), _schema=StudyLocus.get_schema(), ) - self.sample_variant_index = VariantIndex( - _df=spark.createDataFrame( - [ - ( - "var1", - "chrom", - 1, - "A", - "T", - [ - { - "targetId": "gene1", - "consequenceScore": 0.66, - "isEnsemblCanonical": True, - }, - { - "targetId": "gene2", - "consequenceScore": 1.0, - "isEnsemblCanonical": True, - }, - ], - ), - ], - schema=StructType( - [ - StructField("variantId", StringType(), True), - StructField("chromosome", StringType(), True), - StructField("position", IntegerType(), True), - StructField("referenceAllele", StringType(), True), - StructField("alternateAllele", StringType(), True), - StructField( - "transcriptConsequences", - ArrayType( - StructType( - [ - StructField("targetId", StringType(), True), - StructField( - "isEnsemblCanonical", BooleanType(), True - ), - StructField( - "consequenceScore", FloatType(), True - ), - ] - ) - ), - True, - ), - ] - ), - ), - _schema=VariantIndex.get_schema(), - ) From de4627f8c076fac899646aa89cdefe1007e28b19 Mon Sep 17 00:00:00 2001 From: Yakov Date: Tue, 22 Oct 2024 10:16:02 +0100 Subject: [PATCH 121/188] fix: add scQTLs into coloc features (#833) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: add scQTLs into coloc features * fix: include single cell qtls in the list of valid study types --------- Co-authored-by: Irene López --- src/gentropy/dataset/colocalisation.py | 7 ++++++- .../dataset/l2g_features/colocalisation.py | 16 ++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/gentropy/dataset/colocalisation.py b/src/gentropy/dataset/colocalisation.py index 568b46007..db0040652 100644 --- a/src/gentropy/dataset/colocalisation.py +++ b/src/gentropy/dataset/colocalisation.py @@ -62,7 +62,12 @@ def extract_maximum_coloc_probability_per_region_and_gene( valid_qtls = list( set(EqtlCatalogueStudyIndex.method_to_study_type_mapping.values()) - ) + ) + [ + f"sc{qtl}" + for qtl in set( + EqtlCatalogueStudyIndex.method_to_study_type_mapping.values() + ) + ] if filter_by_qtls: filter_by_qtls = ( diff --git a/src/gentropy/dataset/l2g_features/colocalisation.py b/src/gentropy/dataset/l2g_features/colocalisation.py index c7161dc55..fdbf3ed18 100644 --- a/src/gentropy/dataset/l2g_features/colocalisation.py +++ b/src/gentropy/dataset/l2g_features/colocalisation.py @@ -211,7 +211,7 @@ def compute( """ colocalisation_method = "ECaviar" colocalisation_metric = "clpp" - qtl_type = "eqtl" + qtl_type = ["eqtl", "sceqtl"] return cls( _df=convert_from_wide_to_long( @@ -260,7 +260,7 @@ def compute( """ colocalisation_method = "ECaviar" colocalisation_metric = "clpp" - qtl_type = "eqtl" + qtl_type = ["eqtl", "sceqtl"] return cls( _df=convert_from_wide_to_long( @@ -393,7 +393,7 @@ def compute( """ colocalisation_method = "ECaviar" colocalisation_metric = "clpp" - qtl_types = ["sqtl", "tuqtl"] + qtl_types = ["sqtl", "tuqtl", "scsqtl", "sctuqtl"] return cls( _df=convert_from_wide_to_long( common_colocalisation_feature_logic( @@ -441,7 +441,7 @@ def compute( """ colocalisation_method = "ECaviar" colocalisation_metric = "clpp" - qtl_types = ["sqtl", "tuqtl"] + qtl_types = ["sqtl", "tuqtl", "scsqtl", "sctuqtl"] return cls( _df=convert_from_wide_to_long( common_neighbourhood_colocalisation_feature_logic( @@ -483,7 +483,7 @@ def compute( """ colocalisation_method = "Coloc" colocalisation_metric = "h4" - qtl_type = "eqtl" + qtl_type = ["eqtl", "sceqtl"] return cls( _df=convert_from_wide_to_long( common_colocalisation_feature_logic( @@ -531,7 +531,7 @@ def compute( """ colocalisation_method = "Coloc" colocalisation_metric = "h4" - qtl_type = "eqtl" + qtl_type = ["eqtl", "sceqtl"] return cls( _df=convert_from_wide_to_long( common_neighbourhood_colocalisation_feature_logic( @@ -663,7 +663,7 @@ def compute( """ colocalisation_method = "Coloc" colocalisation_metric = "h4" - qtl_types = ["sqtl", "tuqtl"] + qtl_types = ["sqtl", "tuqtl", "scsqtl", "sctuqtl"] return cls( _df=convert_from_wide_to_long( common_colocalisation_feature_logic( @@ -711,7 +711,7 @@ def compute( """ colocalisation_method = "Coloc" colocalisation_metric = "h4" - qtl_types = ["sqtl", "tuqtl"] + qtl_types = ["sqtl", "tuqtl", "scsqtl", "sctuqtl"] return cls( _df=convert_from_wide_to_long( common_neighbourhood_colocalisation_feature_logic( From 782a458f6f8f274f8f55d677a4a2e2db3594bf1f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 10:44:21 +0100 Subject: [PATCH 122/188] chore: pre-commit autoupdate (#866) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.6.7 → v0.7.0](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.7...v0.7.0) - [github.com/pre-commit/pre-commit-hooks: v4.6.0 → v5.0.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.6.0...v5.0.0) - [github.com/pre-commit/mirrors-mypy: v1.11.2 → v1.12.1](https://github.com/pre-commit/mirrors-mypy/compare/v1.11.2...v1.12.1) - [github.com/jsh9/pydoclint: 0.5.8 → 0.5.9](https://github.com/jsh9/pydoclint/compare/0.5.8...0.5.9) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a938f03a3..7d2d55fc2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ ci: skip: [poetry-lock] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.7 + rev: v0.7.0 hooks: - id: ruff args: @@ -15,7 +15,7 @@ repos: files: ^((gentropy|utils|tests)/.+)?[^/]+\.py$ - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -65,7 +65,7 @@ repos: stages: [commit-msg] - repo: https://github.com/pre-commit/mirrors-mypy - rev: "v1.11.2" + rev: "v1.12.1" hooks: - id: mypy args: @@ -98,7 +98,7 @@ repos: - id: beautysh - repo: https://github.com/jsh9/pydoclint - rev: 0.5.8 + rev: 0.5.9 hooks: - id: pydoclint From df220e967f9f663317c8b50435f36fda349557da Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Tue, 22 Oct 2024 11:25:06 +0100 Subject: [PATCH 123/188] feat: gwas catalog top-hit + study step (#808) * fix: wrong step parameter * fix: persist va_subset * fix: remove broadcasts * feat: new gwas_catalog_top_hits step * docs: new step added to documentation * fix: incorrect target * fix: failing tests * fix: extra argument * fix: select does not require hasSumstats anymore * feat: study inclusion step repurposed into study index step * docs: fix path for documentation * feat: remove GWASCatalogIngestionStep as it will no longer be necessary * fix: gwas catalog study curation step * refactor: rename step * perf: repartition study locus before PICS to gain parallellisation * fix: incorrect partitioning * revert: partitioning * fix: drop duplicate rows after ingesting associations * fix: fix in study index ingestion * fix: v1 * feat: working study index with sumstats qc and curation * test: deprecate obsoleted testt * test: remove colon causing tests to fail * test: curation quality controls no longer * fix: changing mapping for ancestries adding CSA * fix: revert changes in mapping --------- Co-authored-by: Daniel Suveges Co-authored-by: Vivien Ho <56025826+vivienho@users.noreply.github.com> Co-authored-by: Yakov Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- .../steps/gwas_catalog_inclusion.md | 5 - .../steps/gwas_catalog_ingestion.md | 5 - .../steps/gwas_catalog_study_index.md | 5 + .../python_api/steps/gwas_catalog_top_hits.md | 5 + src/gentropy/assets/schemas/study_index.json | 31 +-- src/gentropy/config.py | 36 ++-- src/gentropy/dataset/study_index.py | 156 +++++++++++++- .../datasource/gwas_catalog/associations.py | 17 +- .../datasource/gwas_catalog/study_index.py | 152 +++----------- .../gwas_catalog/study_index_ot_curation.py | 90 +++++++++ src/gentropy/gwas_catalog_study_curation.py | 34 ++-- src/gentropy/gwas_catalog_study_inclusion.py | 190 ------------------ src/gentropy/gwas_catalog_study_index.py | 99 +++++++++ ..._ingestion.py => gwas_catalog_top_hits.py} | 40 +--- .../dataset/test_dataset_exclusion.py | 8 +- .../test_gwas_catalog_curation.py | 33 --- .../test_gwas_catalog_study_index.py | 18 -- 17 files changed, 436 insertions(+), 488 deletions(-) delete mode 100644 docs/python_api/steps/gwas_catalog_inclusion.md delete mode 100644 docs/python_api/steps/gwas_catalog_ingestion.md create mode 100644 docs/python_api/steps/gwas_catalog_study_index.md create mode 100644 docs/python_api/steps/gwas_catalog_top_hits.md create mode 100644 src/gentropy/datasource/gwas_catalog/study_index_ot_curation.py delete mode 100644 src/gentropy/gwas_catalog_study_inclusion.py create mode 100644 src/gentropy/gwas_catalog_study_index.py rename src/gentropy/{gwas_catalog_ingestion.py => gwas_catalog_top_hits.py} (59%) diff --git a/docs/python_api/steps/gwas_catalog_inclusion.md b/docs/python_api/steps/gwas_catalog_inclusion.md deleted file mode 100644 index e9ede6dd6..000000000 --- a/docs/python_api/steps/gwas_catalog_inclusion.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -title: gwas_catalog_study_inclusion ---- - -::: gentropy.gwas_catalog_study_inclusion.GWASCatalogStudyInclusionGenerator diff --git a/docs/python_api/steps/gwas_catalog_ingestion.md b/docs/python_api/steps/gwas_catalog_ingestion.md deleted file mode 100644 index 69ea92479..000000000 --- a/docs/python_api/steps/gwas_catalog_ingestion.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -title: gwas_catalog_ingestion ---- - -::: gentropy.gwas_catalog_ingestion.GWASCatalogIngestionStep diff --git a/docs/python_api/steps/gwas_catalog_study_index.md b/docs/python_api/steps/gwas_catalog_study_index.md new file mode 100644 index 000000000..4984de2a0 --- /dev/null +++ b/docs/python_api/steps/gwas_catalog_study_index.md @@ -0,0 +1,5 @@ +--- +title: gwas_catalog_study_inclusion +--- + +::: gentropy.gwas_catalog_study_index.GWASCatalogStudyIndexGenerationStep diff --git a/docs/python_api/steps/gwas_catalog_top_hits.md b/docs/python_api/steps/gwas_catalog_top_hits.md new file mode 100644 index 000000000..03d81eafb --- /dev/null +++ b/docs/python_api/steps/gwas_catalog_top_hits.md @@ -0,0 +1,5 @@ +--- +title: GWAS Catalog Top Hits Ingestion Step +--- + +::: gentropy.gwas_catalog_top_hits.GWASCatalogTopHitIngestionStep diff --git a/src/gentropy/assets/schemas/study_index.json b/src/gentropy/assets/schemas/study_index.json index a2dac1bca..9c50d4a19 100644 --- a/src/gentropy/assets/schemas/study_index.json +++ b/src/gentropy/assets/schemas/study_index.json @@ -264,33 +264,12 @@ "metadata": {} }, { - "name": "sumStatQCPerformed", - "type": "boolean", - "nullable": true, - "metadata": {} - }, - { - "name": "sumStatQCValues", + "name": "sumstatQCValues", "type": { - "type": "array", - "elementType": { - "type": "struct", - "fields": [ - { - "name": "QCCheckName", - "type": "string", - "nullable": true, - "metadata": {} - }, - { - "name": "QCCheckValue", - "type": "float", - "nullable": true, - "metadata": {} - } - ] - }, - "containsNull": true + "type": "map", + "keyType": "string", + "valueType": "float", + "valueContainsNull": true }, "nullable": true, "metadata": {} diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 26c676e9b..d47c1b8ab 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -68,44 +68,36 @@ class GWASCatalogStudyCurationConfig(StepConfig): catalog_study_files: list[str] = MISSING catalog_ancestry_files: list[str] = MISSING - catalog_sumstats_lut: str = MISSING gwas_catalog_study_curation_out: str = MISSING gwas_catalog_study_curation_file: str = MISSING _target_: str = "gentropy.gwas_catalog_study_curation.GWASCatalogStudyCurationStep" @dataclass -class GWASCatalogStudyInclusionConfig(StepConfig): - """GWAS Catalog study inclusion step configuration.""" +class GWASCatalogStudyIndexGenerationStep(StepConfig): + """GWAS Catalog study index generation.""" catalog_study_files: list[str] = MISSING catalog_ancestry_files: list[str] = MISSING - catalog_associations_file: str = MISSING - gwas_catalog_study_curation_file: str = MISSING - variant_annotation_path: str = MISSING - harmonised_study_file: str = MISSING - criteria: str = MISSING - inclusion_list_path: str = MISSING - exclusion_list_path: str = MISSING + study_index_path: str = MISSING + gwas_catalog_study_curation_file: str | None = None + sumstats_qc_path: str | None = None _target_: str = ( - "gentropy.gwas_catalog_study_inclusion.GWASCatalogStudyInclusionGenerator" + "gentropy.gwas_catalog_study_index.GWASCatalogStudyIndexGenerationStep" ) @dataclass -class GWASCatalogIngestionConfig(StepConfig): +class GWASCatalogTopHitIngestionConfig(StepConfig): """GWAS Catalog ingestion step configuration.""" catalog_study_files: list[str] = MISSING catalog_ancestry_files: list[str] = MISSING - catalog_sumstats_lut: str = MISSING catalog_associations_file: str = MISSING variant_annotation_path: str = MISSING catalog_studies_out: str = MISSING catalog_associations_out: str = MISSING - gwas_catalog_study_curation_file: str | None = None - inclusion_list_path: str | None = None - _target_: str = "gentropy.gwas_catalog_ingestion.GWASCatalogIngestionStep" + _target_: str = "gentropy.gwas_catalog_top_hits.GWASCatalogTopHitIngestionStep" @dataclass @@ -658,17 +650,19 @@ def register_config() -> None: ) cs.store( group="step", - name="gwas_catalog_study_inclusion", - node=GWASCatalogStudyInclusionConfig, - ) - cs.store( - group="step", name="gwas_catalog_ingestion", node=GWASCatalogIngestionConfig + name="gwas_catalog_study_index", + node=GWASCatalogStudyIndexGenerationStep, ) cs.store( group="step", name="gwas_catalog_sumstat_preprocess", node=GWASCatalogSumstatsPreprocessConfig, ) + cs.store( + group="step", + name="gwas_catalog_top_hit_ingestion", + node=GWASCatalogTopHitIngestionConfig, + ) cs.store(group="step", name="ld_based_clumping", node=LDBasedClumpingConfig) cs.store(group="step", name="ld_index", node=LDIndexConfig) cs.store(group="step", name="locus_to_gene", node=LocusToGeneConfig) diff --git a/src/gentropy/dataset/study_index.py b/src/gentropy/dataset/study_index.py index e7023ee9b..92bdc61a4 100644 --- a/src/gentropy/dataset/study_index.py +++ b/src/gentropy/dataset/study_index.py @@ -13,6 +13,7 @@ from gentropy.assets import data from gentropy.common.schemas import parse_spark_schema +from gentropy.common.spark_helpers import convert_from_wide_to_long from gentropy.dataset.dataset import Dataset if TYPE_CHECKING: @@ -32,13 +33,29 @@ class StudyQualityCheck(Enum): UNKNOWN_STUDY_TYPE (str): Indicating the provided type of study is not supported. UNKNOWN_BIOSAMPLE (str): Flagging if a biosample identifier is not found in the reference. DUPLICATED_STUDY (str): Flagging if a study identifier is not unique. + SUMSTATS_NOT_AVAILABLE (str): Flagging if harmonized summary statistics are not available or empty. + NO_OT_CURATION (str): Flagging if a study has not been curated by Open Targets. + FAILED_MEAN_BETA_CHECK (str): Flagging if the mean beta QC check value is not within the expected range. + FAILED_PZ_CHECK (str): Flagging if the PZ QC check values are not within the expected range. + FAILED_GC_LAMBDA_CHECK (str): Flagging if the GC lambda value is not within the expected range. + SMALL_NUMBER_OF_SNPS (str): Flagging if the number of SNPs in the study is below the expected threshold. """ - UNRESOLVED_TARGET = "Target/gene identifier could not match to reference." - UNRESOLVED_DISEASE = "No valid disease identifier found." - UNKNOWN_STUDY_TYPE = "This type of study is not supported." - UNKNOWN_BIOSAMPLE = "Biosample identifier was not found in the reference." - DUPLICATED_STUDY = "The identifier of this study is not unique." + UNRESOLVED_TARGET = "Target/gene identifier could not match to reference" + UNRESOLVED_DISEASE = "No valid disease identifier found" + UNKNOWN_STUDY_TYPE = "This type of study is not supported" + UNKNOWN_BIOSAMPLE = "Biosample identifier was not found in the reference" + DUPLICATED_STUDY = "The identifier of this study is not unique" + SUMSTATS_NOT_AVAILABLE = "Harmonized summary statistics are not available or empty" + NO_OT_CURATION = "GWAS Catalog study has not been curated by Open Targets" + FAILED_MEAN_BETA_CHECK = ( + "The mean beta QC check value is not within the expected range" + ) + FAILED_PZ_CHECK = "The PZ QC check values are not within the expected range" + FAILED_GC_LAMBDA_CHECK = "The GC lambda value is not within the expected range" + SMALL_NUMBER_OF_SNPS = ( + "The number of SNPs in the study is below the expected threshold" + ) @dataclass @@ -410,7 +427,9 @@ def validate_target(self: StudyIndex, target_index: GeneIndex) -> StudyIndex: return StudyIndex(_df=validated_df, _schema=StudyIndex.get_schema()) - def validate_biosample(self: StudyIndex, biosample_index: BiosampleIndex) -> StudyIndex: + def validate_biosample( + self: StudyIndex, biosample_index: BiosampleIndex + ) -> StudyIndex: """Validating biosample identifiers in the study index against the provided biosample index. Args: @@ -419,7 +438,9 @@ def validate_biosample(self: StudyIndex, biosample_index: BiosampleIndex) -> Stu Returns: StudyIndex: where non-gwas studies are flagged if biosampleIndex could not be validated. """ - biosample_set = biosample_index.df.select("biosampleId", f.lit(True).alias("isIdFound")) + biosample_set = biosample_index.df.select( + "biosampleId", f.lit(True).alias("isIdFound") + ) # If biosampleId in df, we need to drop it: if "biosampleId" in self.df.columns: @@ -430,7 +451,11 @@ def validate_biosample(self: StudyIndex, biosample_index: BiosampleIndex) -> Stu return self validated_df = ( - self.df.join(biosample_set, self.df.biosampleFromSourceId == biosample_set.biosampleId, how="left") + self.df.join( + biosample_set, + self.df.biosampleFromSourceId == biosample_set.biosampleId, + how="left", + ) .withColumn( "isIdFound", f.when( @@ -450,3 +475,118 @@ def validate_biosample(self: StudyIndex, biosample_index: BiosampleIndex) -> Stu ) return StudyIndex(_df=validated_df, _schema=StudyIndex.get_schema()) + + def annotate_sumstats_qc( + self: StudyIndex, + sumstats_qc: DataFrame, + threshold_mean_beta: float = 0.05, + threshold_mean_diff_pz: float = 0.05, + threshold_se_diff_pz: float = 0.05, + threshold_min_gc_lambda: float = 0.7, + threshold_max_gc_lambda: float = 2.5, + threshold_min_n_variants: int = 2_000_000, + ) -> StudyIndex: + """Annotate summary stats QC information. + + Args: + sumstats_qc (DataFrame): containing summary statistics-based quality controls. + threshold_mean_beta (float): Threshold for mean beta check. Defaults to 0.05. + threshold_mean_diff_pz (float): Threshold for mean diff PZ check. Defaults to 0.05. + threshold_se_diff_pz (float): Threshold for SE diff PZ check. Defaults to 0.05. + threshold_min_gc_lambda (float): Minimum threshold for GC lambda check. Defaults to 0.7. + threshold_max_gc_lambda (float): Maximum threshold for GC lambda check. Defaults to 2.5. + threshold_min_n_variants (int): Minimum number of variants for SuSiE check. Defaults to 2_000_000. + + Returns: + StudyIndex: Updated study index with QC information + """ + # convert all columns in sumstats_qc dataframe in array of structs grouped by studyId + cols = [c for c in sumstats_qc.columns if c != "studyId"] + + studies = self.df + + melted_df = convert_from_wide_to_long( + sumstats_qc, + id_vars=["studyId"], + value_vars=cols, + var_name="QCCheckName", + value_name="QCCheckValue", + ) + + qc_df = ( + melted_df.groupBy("studyId") + .agg( + f.map_from_entries( + f.collect_list( + f.struct(f.col("QCCheckName"), f.col("QCCheckValue")) + ) + ).alias("sumStatQCValues") + ) + .select("studyId", "sumstatQCValues") + ) + + df = ( + studies.drop("sumStatQCValues", "hasSumstats") + .join( + qc_df.withColumn("hasSumstats", f.lit(True)), how="left", on="studyId" + ) + .withColumn("hasSumstats", f.coalesce(f.col("hasSumstats"), f.lit(False))) + .withColumn( + "qualityControls", + StudyIndex.update_quality_flag( + f.col("qualityControls"), + ~f.col("hasSumstats"), + StudyQualityCheck.SUMSTATS_NOT_AVAILABLE, + ), + ) + .withColumn( + "qualityControls", + StudyIndex.update_quality_flag( + f.col("qualityControls"), + ~(f.abs(f.col("sumstatQCValues.mean_beta")) <= threshold_mean_beta), + StudyQualityCheck.FAILED_MEAN_BETA_CHECK, + ), + ) + .withColumn( + "qualityControls", + StudyIndex.update_quality_flag( + f.col("qualityControls"), + ~( + ( + f.abs(f.col("sumstatQCValues.mean_diff_pz")) + <= threshold_mean_diff_pz + ) + & (f.col("sumstatQCValues.se_diff_pz") <= threshold_se_diff_pz) + ), + StudyQualityCheck.FAILED_PZ_CHECK, + ), + ) + .withColumn( + "qualityControls", + StudyIndex.update_quality_flag( + f.col("qualityControls"), + ~( + (f.col("sumstatQCValues.gc_lambda") <= threshold_max_gc_lambda) + & ( + f.col("sumstatQCValues.gc_lambda") + >= threshold_min_gc_lambda + ) + ), + StudyQualityCheck.FAILED_GC_LAMBDA_CHECK, + ), + ) + .withColumn( + "qualityControls", + StudyIndex.update_quality_flag( + f.col("qualityControls"), + (f.col("sumstatQCValues.n_variants") < threshold_min_n_variants), + StudyQualityCheck.SMALL_NUMBER_OF_SNPS, + ), + ) + ) + + # Annotate study index with QC information: + return StudyIndex( + _df=df, + _schema=StudyIndex.get_schema(), + ) diff --git a/src/gentropy/datasource/gwas_catalog/associations.py b/src/gentropy/datasource/gwas_catalog/associations.py index b34944b11..da2bcc6df 100644 --- a/src/gentropy/datasource/gwas_catalog/associations.py +++ b/src/gentropy/datasource/gwas_catalog/associations.py @@ -230,7 +230,9 @@ def _map_variants_to_gnomad_variants( "chromosome", # Calculate the position in Ensembl coordinates for indels: GWASCatalogCuratedAssociationsParser.convert_gnomad_position_to_ensembl( - f.col("position"), f.col("referenceAllele"), f.col("alternateAllele") + f.col("position"), + f.col("referenceAllele"), + f.col("alternateAllele"), ).alias("ensemblPosition"), # Keeping GnomAD position: "position", @@ -240,11 +242,7 @@ def _map_variants_to_gnomad_variants( "alleleFrequencies", variant_index.max_maf().alias("maxMaf"), ).join( - f.broadcast( - gwas_associations_subset.select( - "chromosome", "ensemblPosition" - ).distinct() - ), + gwas_associations_subset.select("chromosome", "ensemblPosition").distinct(), on=["chromosome", "ensemblPosition"], how="inner", ) @@ -253,7 +251,7 @@ def _map_variants_to_gnomad_variants( # based on rsIds or allele concordance) filtered_associations = ( gwas_associations_subset.join( - f.broadcast(va_subset), + va_subset, on=["chromosome", "ensemblPosition"], how="left", ) @@ -1108,7 +1106,10 @@ def from_source( pvalue_threshold is keeped in sync with the WindowBasedClumpingStep gwas_significance. """ return StudyLocusGWASCatalog( - _df=gwas_associations.withColumn( + _df=gwas_associations + # drop duplicate rows + .distinct() + .withColumn( "studyLocusId", f.monotonically_increasing_id().cast(StringType()) ) .transform( diff --git a/src/gentropy/datasource/gwas_catalog/study_index.py b/src/gentropy/datasource/gwas_catalog/study_index.py index d0d841105..421f53d0f 100644 --- a/src/gentropy/datasource/gwas_catalog/study_index.py +++ b/src/gentropy/datasource/gwas_catalog/study_index.py @@ -7,61 +7,15 @@ import pyspark.sql.functions as f import pyspark.sql.types as t -from pyspark import SparkFiles -from gentropy.common.session import Session from gentropy.common.spark_helpers import column2camel_case from gentropy.common.utils import parse_efos -from gentropy.dataset.study_index import StudyIndex +from gentropy.dataset.study_index import StudyIndex, StudyQualityCheck if TYPE_CHECKING: from pyspark.sql import Column, DataFrame -def read_curation_table( - curation_path: str | None, session: Session -) -> DataFrame | None: - """Read curation table if path or URL is given. - - Curation itself is fully optional everything should work without it. - - Args: - curation_path (str | None): Optionally given path the curation tsv. - session (Session): session object - - Returns: - DataFrame | None: if curation was provided, - """ - # If no curation path provided, we are returning none: - if curation_path is None: - return None - # Read curation from the web: - elif curation_path.startswith("http"): - # Registering file: - session.spark.sparkContext.addFile(curation_path) - - # Reading file: - curation_df = session.spark.read.csv( - SparkFiles.get(curation_path.split("/")[-1]), sep="\t", header=True - ) - # Read curation from file: - else: - curation_df = session.spark.read.csv(curation_path, sep="\t", header=True) - return curation_df.select( - "studyId", - "studyType", - f.when(f.col("analysisFlag").isNotNull(), f.split(f.col("analysisFlag"), r"\|")) - .otherwise(f.array()) - .alias("analysisFlags"), - f.when( - f.col("qualityControl").isNotNull(), f.split(f.col("qualityControl"), r"\|") - ) - .otherwise(f.array()) - .alias("qualityControls"), - f.col("isCurated").cast(t.BooleanType()), - ) - - @dataclass class StudyIndexGWASCatalogParser: """GWAS Catalog study index parser. @@ -316,14 +270,12 @@ def from_source( cls: type[StudyIndexGWASCatalogParser], catalog_studies: DataFrame, ancestry_file: DataFrame, - sumstats_lut: DataFrame, ) -> StudyIndexGWASCatalog: """Ingests study level metadata from the GWAS Catalog. Args: catalog_studies (DataFrame): GWAS Catalog raw study table ancestry_file (DataFrame): GWAS Catalog ancestry table. - sumstats_lut (DataFrame): GWAS Catalog summary statistics list. Returns: StudyIndexGWASCatalog: Parsed and annotated GWAS Catalog study table. @@ -332,7 +284,6 @@ def from_source( return ( cls._parse_study_table(catalog_studies) .annotate_ancestries(ancestry_file) - .annotate_sumstats_info(sumstats_lut) .annotate_discovery_sample_sizes() ) @@ -403,7 +354,13 @@ def annotate_from_study_curation( if curation_table is None: return self - columns = self.df.columns + studies = self.df + + if "qualityControls" not in studies.columns: + studies = studies.withColumn("qualityControls", f.array()) + + if "analysisFlags" not in studies.columns: + studies = studies.withColumn("analysisFlags", f.array()) # Adding prefix to columns in the curation table: curation_table = curation_table.select( @@ -415,46 +372,34 @@ def annotate_from_study_curation( ] ) - # Create expression how to update/create quality controls dataset: - qualityControls_expression = ( - f.col("curation_qualityControls") - if "qualityControls" not in columns - else f.when( - f.col("curation_qualityControls").isNotNull(), - f.array_union( - f.col("qualityControls"), f.array(f.col("curation_qualityControls")) - ), - ).otherwise(f.col("qualityControls")) - ) - - # Create expression how to update/create analysis flag: - analysis_expression = ( - f.col("curation_analysisFlags") - if "analysisFlags" not in columns - else f.when( - f.col("curation_analysisFlags").isNotNull(), - f.array_union( - f.col("analysisFlags"), f.array(f.col("curation_analysisFlags")) - ), - ).otherwise(f.col("analysisFlags")) - ) - - # Updating columns list. We might or might not list columns twice, but that doesn't matter, unique set will generated: - columns = list(set(columns + ["qualityControls", "analysisFlags"])) - # Based on the curation table, columns needs to be updated: curated_df = ( - self.df.join(curation_table, on="studyId", how="left") + studies.join( + curation_table.withColumn("isCurated", f.lit(True)), + on="studyId", + how="left", + ) + .withColumn("isCurated", f.coalesce(f.col("isCurated"), f.lit(False))) # Updating study type: .withColumn( "studyType", f.coalesce(f.col("curation_studyType"), f.col("studyType")) ) - # Updating quality controls: - .withColumn("qualityControls", qualityControls_expression) # Updating study annotation flags: - .withColumn("analysisFlags", analysis_expression) + .withColumn( + "analysisFlags", + f.array_union(f.col("analysisFlags"), f.col("curation_analysisFlags")), + ) + .withColumn("analysisFlags", f.coalesce(f.col("analysisFlags"), f.array())) + .withColumn( + "qualityControls", + StudyIndex.update_quality_flag( + f.col("qualityControls"), + ~f.col("isCurated"), + StudyQualityCheck.NO_OT_CURATION, + ), + ) # Dropping columns coming from the curation table: - .select(*columns) + .select(*studies.columns) ) return StudyIndexGWASCatalog( _df=curated_df, _schema=StudyIndexGWASCatalog.get_schema() @@ -641,47 +586,6 @@ def annotate_ancestries( self.df = self.df.join(parsed_ancestry_lut, on="studyId", how="left") return self - def annotate_sumstats_info( - self: StudyIndexGWASCatalog, sumstats_lut: DataFrame - ) -> StudyIndexGWASCatalog: - """Annotate summary stat locations. - - Args: - sumstats_lut (DataFrame): listing GWAS Catalog summary stats paths - - Returns: - StudyIndexGWASCatalog: including `summarystatsLocation` and `hasSumstats` columns - - Raises: - ValueError: if the sumstats_lut table doesn't have the right columns - """ - gwas_sumstats_base_uri = ( - "ftp://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/" - ) - - if "_c0" not in sumstats_lut.columns: - raise ValueError( - f'Sumstats look-up table needs to have `_c0` column. However it has: {",".join(sumstats_lut.columns)}' - ) - - parsed_sumstats_lut = sumstats_lut.withColumn( - "summarystatsLocation", - f.concat( - f.lit(gwas_sumstats_base_uri), - f.regexp_replace(f.col("_c0"), r"^\.\/", ""), - ), - ).select( - self._parse_gwas_catalog_study_id("summarystatsLocation").alias("studyId"), - "summarystatsLocation", - f.lit(True).alias("hasSumstats"), - ) - self.df = ( - self.df.drop("hasSumstats") - .join(parsed_sumstats_lut, on="studyId", how="left") - .withColumn("hasSumstats", f.coalesce(f.col("hasSumstats"), f.lit(False))) - ) - return self - def annotate_discovery_sample_sizes( self: StudyIndexGWASCatalog, ) -> StudyIndexGWASCatalog: diff --git a/src/gentropy/datasource/gwas_catalog/study_index_ot_curation.py b/src/gentropy/datasource/gwas_catalog/study_index_ot_curation.py new file mode 100644 index 000000000..8d75e4824 --- /dev/null +++ b/src/gentropy/datasource/gwas_catalog/study_index_ot_curation.py @@ -0,0 +1,90 @@ +"""Study Index for GWAS Catalog data source.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +import pyspark.sql.functions as f +import pyspark.sql.types as t +from pyspark import SparkFiles + +from gentropy.common.session import Session + +if TYPE_CHECKING: + from pyspark.sql import DataFrame + + +@dataclass +class StudyIndexGWASCatalogOTCuration: + """Study Index Curation for GWAS Catalog data source. + + This class is responsible for parsing additional curation for the GWAS Catalog studies. + """ + + @staticmethod + def _parser(df: DataFrame) -> DataFrame: + """Parse the curation table. + + Args: + df (DataFrame): DataFrame with the curation table. + + Returns: + DataFrame: DataFrame with the parsed curation table. + """ + if "qualityControl" not in df.columns: + # Add the 'qualityControl' column with null values + df = df.withColumn("qualityControl", f.lit(None).cast("string")) + return df.select( + "studyId", + "studyType", + f.when( + f.col("analysisFlag").isNotNull(), f.split(f.col("analysisFlag"), r"\|") + ) + .otherwise(f.array()) + .alias("analysisFlags"), + f.when( + f.col("qualityControl").isNotNull(), + f.split(f.col("qualityControl"), r"\|"), + ) + .otherwise(f.array()) + .alias("qualityControls"), + f.col("isCurated").cast(t.BooleanType()), + ) + + @classmethod + def from_csv( + cls: type[StudyIndexGWASCatalogOTCuration], session: Session, curation_path: str + ) -> DataFrame: + """Read curation table from csv. + + Args: + session (Session): Session object. + curation_path (str): Path to the curation table. + + Returns: + DataFrame: DataFrame with the curation table. + """ + return cls._parser(session.spark.read.csv(curation_path, sep="\t", header=True)) + + @classmethod + def from_url( + cls: type[StudyIndexGWASCatalogOTCuration], session: Session, curation_url: str + ) -> DataFrame: + """Read curation table from URL. + + Args: + session (Session): Session object. + curation_url (str): URL to the curation table. + + Returns: + DataFrame: DataFrame with the curation table. + """ + # Registering file: + session.spark.sparkContext.addFile(curation_url) + + return cls._parser( + session.spark.read.csv( + SparkFiles.get(curation_url.split("/")[-1]), sep="\t", header=True + ) + ) diff --git a/src/gentropy/gwas_catalog_study_curation.py b/src/gentropy/gwas_catalog_study_curation.py index 7329a1679..530e03ea6 100644 --- a/src/gentropy/gwas_catalog_study_curation.py +++ b/src/gentropy/gwas_catalog_study_curation.py @@ -1,10 +1,13 @@ """Step to update GWAS Catalog study curation file based on newly released GWAS Catalog dataset.""" + from __future__ import annotations from gentropy.common.session import Session from gentropy.datasource.gwas_catalog.study_index import ( StudyIndexGWASCatalogParser, - read_curation_table, +) +from gentropy.datasource.gwas_catalog.study_index_ot_curation import ( + StudyIndexGWASCatalogOTCuration, ) @@ -16,7 +19,6 @@ def __init__( session: Session, catalog_study_files: list[str], catalog_ancestry_files: list[str], - catalog_sumstats_lut: str, gwas_catalog_study_curation_out: str, gwas_catalog_study_curation_file: str | None, ) -> None: @@ -26,9 +28,11 @@ def __init__( session (Session): Session object. catalog_study_files (list[str]): List of raw GWAS catalog studies file. catalog_ancestry_files (list[str]): List of raw ancestry annotations files from GWAS Catalog. - catalog_sumstats_lut (str): GWAS Catalog summary statistics lookup table. gwas_catalog_study_curation_out (str): Path for the updated curation table. gwas_catalog_study_curation_file (str | None): Path to the original curation table. Optinal + + Raises: + ValueError: If the curation file is provided but not a CSV file or URL. """ catalog_studies = session.spark.read.csv( list(catalog_study_files), sep="\t", header=True @@ -36,18 +40,24 @@ def __init__( ancestry_lut = session.spark.read.csv( list(catalog_ancestry_files), sep="\t", header=True ) - sumstats_lut = session.spark.read.csv( - catalog_sumstats_lut, sep="\t", header=False - ) - gwas_catalog_study_curation = read_curation_table( - gwas_catalog_study_curation_file, session - ) + + if gwas_catalog_study_curation_file: + if gwas_catalog_study_curation_file.endswith(".csv"): + gwas_catalog_study_curation = StudyIndexGWASCatalogOTCuration.from_csv( + session, gwas_catalog_study_curation_file + ) + elif gwas_catalog_study_curation_file.startswith("http"): + gwas_catalog_study_curation = StudyIndexGWASCatalogOTCuration.from_url( + session, gwas_catalog_study_curation_file + ) + else: + raise ValueError( + "Only CSV files or URLs are accepted as curation file." + ) # Process GWAS Catalog studies and get list of studies for curation: ( - StudyIndexGWASCatalogParser.from_source( - catalog_studies, ancestry_lut, sumstats_lut - ) + StudyIndexGWASCatalogParser.from_source(catalog_studies, ancestry_lut) # Adding existing curation: .annotate_from_study_curation(gwas_catalog_study_curation) # Extract new studies for curation: diff --git a/src/gentropy/gwas_catalog_study_inclusion.py b/src/gentropy/gwas_catalog_study_inclusion.py deleted file mode 100644 index f07f851a7..000000000 --- a/src/gentropy/gwas_catalog_study_inclusion.py +++ /dev/null @@ -1,190 +0,0 @@ -"""Step to generate an GWAS Catalog study identifier inclusion and exclusion list.""" - -from __future__ import annotations - -from typing import TYPE_CHECKING - -from pyspark.sql import functions as f - -from gentropy.common.session import Session -from gentropy.dataset.variant_index import VariantIndex -from gentropy.datasource.gwas_catalog.associations import ( - GWASCatalogCuratedAssociationsParser, -) -from gentropy.datasource.gwas_catalog.study_index import ( - StudyIndexGWASCatalog, - StudyIndexGWASCatalogParser, - read_curation_table, -) -from gentropy.datasource.gwas_catalog.study_splitter import GWASCatalogStudySplitter - -if TYPE_CHECKING: - from pyspark.sql import Column, DataFrame - - -class GWASCatalogStudyInclusionGenerator: - """GWAS Catalog study eligibility for ingestion based on curation and the provided criteria.""" - - @staticmethod - def flag_eligible_studies( - study_index: StudyIndexGWASCatalog, criteria: str - ) -> DataFrame: - """Apply filter on GWAS Catalog studies based on the provided criteria. - - Args: - study_index (StudyIndexGWASCatalog): complete study index to be filtered based on the provided filter set - criteria (str): name of the filter set to be applied. - - Raises: - ValueError: if the provided filter set is not in the accepted values. - - Returns: - DataFrame: filtered dataframe containing only eligible studies. - """ - filters: dict[str, Column] = { - # Filters applied on studies for ingesting curated associations: - "curation": (study_index.is_gwas() & study_index.has_mapped_trait()), - # Filters applied on studies for ingesting summary statistics: - "summary_stats": ( - study_index.is_gwas() - & study_index.has_mapped_trait() - & (~study_index.is_quality_flagged()) - & study_index.has_summarystats() - ), - } - - if criteria not in filters: - raise ValueError( - f'Wrong value as filter set ({criteria}). Accepted: {",".join(filters.keys())}' - ) - - # Applying the relevant filter to the study: - return study_index.df.select( - "studyId", - "studyType", - "traitFromSource", - "traitFromSourceMappedIds", - "qualityControls", - "hasSumstats", - filters[criteria].alias("isEligible"), - ) - - @staticmethod - def process_harmonised_list(studies: list[str], session: Session) -> DataFrame: - """Generate spark dataframe from the provided list. - - Args: - studies (list[str]): list of path pointing to harmonised summary statistics. - session (Session): session - - Returns: - DataFrame: column name is consistent with original implementatin - """ - return session.spark.createDataFrame([{"_c0": path} for path in studies]) - - @staticmethod - def get_gwas_catalog_study_index( - session: Session, - gnomad_variant_path: str, - catalog_study_files: list[str], - catalog_ancestry_files: list[str], - harmonised_study_file: str, - catalog_associations_file: str, - gwas_catalog_study_curation_file: str, - ) -> StudyIndexGWASCatalog: - """Return GWAS Catalog study index. - - Args: - session (Session): Session object. - gnomad_variant_path (str): Path to GnomAD variant list. - catalog_study_files (list[str]): List of raw GWAS catalog studies file. - catalog_ancestry_files (list[str]): List of raw ancestry annotations files from GWAS Catalog. - harmonised_study_file (str): GWAS Catalog summary statistics lookup table. - catalog_associations_file (str): Raw GWAS catalog associations file. - gwas_catalog_study_curation_file (str): file of the curation table. Optional. - - Returns: - StudyIndexGWASCatalog: Completely processed and fully annotated study index. - """ - # Extract - gnomad_variants = VariantIndex.from_parquet(session, gnomad_variant_path) - catalog_studies = session.spark.read.csv( - list(catalog_study_files), sep="\t", header=True - ) - ancestry_lut = session.spark.read.csv( - list(catalog_ancestry_files), sep="\t", header=True - ) - sumstats_lut = session.spark.read.csv( - harmonised_study_file, sep="\t", header=False - ) - catalog_associations = session.spark.read.csv( - catalog_associations_file, sep="\t", header=True - ).persist() - gwas_catalog_study_curation = read_curation_table( - gwas_catalog_study_curation_file, session - ) - - # Transform - study_index, _ = GWASCatalogStudySplitter.split( - StudyIndexGWASCatalogParser.from_source( - catalog_studies, - ancestry_lut, - sumstats_lut, - ).annotate_from_study_curation(gwas_catalog_study_curation), - GWASCatalogCuratedAssociationsParser.from_source( - catalog_associations, gnomad_variants - ), - ) - - return study_index - - def __init__( - self, - session: Session, - catalog_study_files: list[str], - catalog_ancestry_files: list[str], - catalog_associations_file: str, - gwas_catalog_study_curation_file: str, - gnomad_variant_path: str, - harmonised_study_file: str, - criteria: str, - inclusion_list_path: str, - exclusion_list_path: str, - ) -> None: - """Run step. - - Args: - session (Session): Session objecct. - catalog_study_files (list[str]): List of raw GWAS catalog studies file. - catalog_ancestry_files (list[str]): List of raw ancestry annotations files from GWAS Catalog. - catalog_associations_file (str): Raw GWAS catalog associations file. - gwas_catalog_study_curation_file (str): file of the curation table. Optional. - gnomad_variant_path (str): Path to GnomAD variant list. - harmonised_study_file (str): GWAS Catalog summary statistics lookup table. - criteria (str): name of the filter set to be applied. - inclusion_list_path (str): Output path for the inclusion list. - exclusion_list_path (str): Output path for the exclusion list. - """ - # Create study index: - study_index = self.get_gwas_catalog_study_index( - session, - gnomad_variant_path, - catalog_study_files, - catalog_ancestry_files, - harmonised_study_file, - catalog_associations_file, - gwas_catalog_study_curation_file, - ) - - # Get study indices for inclusion: - flagged_studies = self.flag_eligible_studies(study_index, criteria) - - # Output inclusion list: - eligible = ( - flagged_studies.filter(f.col("isEligible")).select("studyId").persist() - ) - eligible.write.mode(session.write_mode).parquet(inclusion_list_path) - - # Output exclusion list: - excluded = flagged_studies.filter(~f.col("isEligible")).persist() - excluded.write.mode(session.write_mode).parquet(exclusion_list_path) diff --git a/src/gentropy/gwas_catalog_study_index.py b/src/gentropy/gwas_catalog_study_index.py new file mode 100644 index 000000000..6c19b6909 --- /dev/null +++ b/src/gentropy/gwas_catalog_study_index.py @@ -0,0 +1,99 @@ +"""Step to generate an GWAS Catalog study identifier inclusion and exclusion list.""" + +from __future__ import annotations + +from pyspark.sql.types import DoubleType, LongType, StringType, StructField, StructType + +from gentropy.common.session import Session +from gentropy.datasource.gwas_catalog.study_index import StudyIndexGWASCatalogParser +from gentropy.datasource.gwas_catalog.study_index_ot_curation import ( + StudyIndexGWASCatalogOTCuration, +) + + +class GWASCatalogStudyIndexGenerationStep: + """GWAS Catalog study index generation. + + This step generates a study index from the GWAS Catalog studies and ancestry files. It can also add additional curation information and summary statistics QC information when available. + + ''' warning + This step does not generate study index for gwas catalog top hits. + + This step provides several optional arguments to add additional information to the study index: + + - gwas_catalog_study_curation_file: csv file or URL containing the curation table. If provided it annotates the study index with the additional curation information performed by the Open Targets team. + - sumstats_qc_path: Path to the summary statistics QC table. If provided it annotates the study index with the summary statistics QC information in the `sumStatQCValues` columns (e.g. `n_variants`, `n_variants_sig` etc.). + """ + + def __init__( + self, + session: Session, + catalog_study_files: list[str], + catalog_ancestry_files: list[str], + study_index_path: str, + gwas_catalog_study_curation_file: str | None = None, + sumstats_qc_path: str | None = None, + ) -> None: + """Run step. + + Args: + session (Session): Session objecct. + catalog_study_files (list[str]): List of raw GWAS catalog studies file. + catalog_ancestry_files (list[str]): List of raw ancestry annotations files from GWAS Catalog. + study_index_path (str): Output GWAS catalog studies path. + gwas_catalog_study_curation_file (str | None): csv file or URL containing the curation table. Optional. + sumstats_qc_path (str | None): Path to the summary statistics QC table. Optional. + + Raises: + ValueError: If the curation file is provided but not a CSV file or URL. + """ + # Core Study Index Generation: + study_index = StudyIndexGWASCatalogParser.from_source( + session.spark.read.csv(list(catalog_study_files), sep="\t", header=True), + session.spark.read.csv(list(catalog_ancestry_files), sep="\t", header=True), + ) + + # Annotate with curation if provided: + if gwas_catalog_study_curation_file: + if gwas_catalog_study_curation_file.endswith( + ".tsv" + ) | gwas_catalog_study_curation_file.endswith(".tsv"): + gwas_catalog_study_curation = StudyIndexGWASCatalogOTCuration.from_csv( + session, gwas_catalog_study_curation_file + ) + elif gwas_catalog_study_curation_file.startswith("http"): + gwas_catalog_study_curation = StudyIndexGWASCatalogOTCuration.from_url( + session, gwas_catalog_study_curation_file + ) + else: + raise ValueError( + "Only CSV/TSV files or URLs are accepted as curation file." + ) + study_index = study_index.annotate_from_study_curation( + gwas_catalog_study_curation + ) + + # Annotate with sumstats QC if provided: + if sumstats_qc_path: + schema = StructType( + [ + StructField("studyId", StringType(), True), + StructField("mean_beta", DoubleType(), True), + StructField("mean_diff_pz", DoubleType(), True), + StructField("se_diff_pz", DoubleType(), True), + StructField("gc_lambda", DoubleType(), True), + StructField("n_variants", LongType(), True), + StructField("n_variants_sig", LongType(), True), + ] + ) + sumstats_qc = session.spark.read.schema(schema).parquet( + sumstats_qc_path, recursiveFileLookup=True + ) + study_index_with_qc = study_index.annotate_sumstats_qc(sumstats_qc) + + # Write the study + study_index_with_qc.df.write.mode(session.write_mode).parquet( + study_index_path + ) + else: + study_index.df.write.mode(session.write_mode).parquet(study_index_path) diff --git a/src/gentropy/gwas_catalog_ingestion.py b/src/gentropy/gwas_catalog_top_hits.py similarity index 59% rename from src/gentropy/gwas_catalog_ingestion.py rename to src/gentropy/gwas_catalog_top_hits.py index 5dab5bf16..95722c768 100644 --- a/src/gentropy/gwas_catalog_ingestion.py +++ b/src/gentropy/gwas_catalog_top_hits.py @@ -10,30 +10,23 @@ ) from gentropy.datasource.gwas_catalog.study_index import ( StudyIndexGWASCatalogParser, - read_curation_table, ) from gentropy.datasource.gwas_catalog.study_splitter import GWASCatalogStudySplitter -class GWASCatalogIngestionStep: - """GWAS Catalog ingestion step to extract GWASCatalog Study and StudyLocus tables. - - !!! note This step currently only processes the GWAS Catalog curated list of top hits. - """ +class GWASCatalogTopHitIngestionStep: + """GWAS Catalog ingestion step to extract GWASCatalog top hits.""" def __init__( self, session: Session, catalog_study_files: list[str], catalog_ancestry_files: list[str], - catalog_sumstats_lut: str, catalog_associations_file: str, - gnomad_variant_path: str, + variant_annotation_path: str, catalog_studies_out: str, catalog_associations_out: str, distance: int = WindowBasedClumpingStepConfig().distance, - gwas_catalog_study_curation_file: str | None = None, - inclusion_list_path: str | None = None, ) -> None: """Run step. @@ -41,52 +34,31 @@ def __init__( session (Session): Session object. catalog_study_files (list[str]): List of raw GWAS catalog studies file. catalog_ancestry_files (list[str]): List of raw ancestry annotations files from GWAS Catalog. - catalog_sumstats_lut (str): GWAS Catalog summary statistics lookup table. catalog_associations_file (str): Raw GWAS catalog associations file. - gnomad_variant_path (str): Path to GnomAD variants. + variant_annotation_path (str): Path to GnomAD variants. catalog_studies_out (str): Output GWAS catalog studies path. catalog_associations_out (str): Output GWAS catalog associations path. distance (int): Distance, within which tagging variants are collected around the semi-index. - gwas_catalog_study_curation_file (str | None): file of the curation table. Optional. - inclusion_list_path (str | None): optional inclusion list (parquet) """ # Extract - gnomad_variants = VariantIndex.from_parquet(session, gnomad_variant_path) + gnomad_variants = VariantIndex.from_parquet(session, variant_annotation_path) catalog_studies = session.spark.read.csv( list(catalog_study_files), sep="\t", header=True ) ancestry_lut = session.spark.read.csv( list(catalog_ancestry_files), sep="\t", header=True ) - sumstats_lut = session.spark.read.csv( - catalog_sumstats_lut, sep="\t", header=False - ) catalog_associations = session.spark.read.csv( catalog_associations_file, sep="\t", header=True ).persist() - gwas_catalog_study_curation = read_curation_table( - gwas_catalog_study_curation_file, session - ) # Transform study_index, study_locus = GWASCatalogStudySplitter.split( - StudyIndexGWASCatalogParser.from_source( - catalog_studies, ancestry_lut, sumstats_lut - ).annotate_from_study_curation(gwas_catalog_study_curation), + StudyIndexGWASCatalogParser.from_source(catalog_studies, ancestry_lut), GWASCatalogCuratedAssociationsParser.from_source( catalog_associations, gnomad_variants ), ) - - # if inclusion list is provided apply filter: - if inclusion_list_path: - inclusion_list = session.spark.read.parquet( - inclusion_list_path, sep="\t", header=True - ) - - study_index = study_index.apply_inclusion_list(inclusion_list) - study_locus = study_locus.apply_inclusion_list(inclusion_list) - # Load study_index.df.write.mode(session.write_mode).parquet(catalog_studies_out) diff --git a/tests/gentropy/dataset/test_dataset_exclusion.py b/tests/gentropy/dataset/test_dataset_exclusion.py index 329a0a1d5..1b6fce967 100644 --- a/tests/gentropy/dataset/test_dataset_exclusion.py +++ b/tests/gentropy/dataset/test_dataset_exclusion.py @@ -24,11 +24,11 @@ class TestDataExclusion: # Good study no flag: ("S1", None), # Good study permissive flag: - ("S2", "This type of study is not supported."), - ("S2", "No valid disease identifier found."), + ("S2", "This type of study is not supported"), + ("S2", "No valid disease identifier found"), # Bad study: - ("S3", "The identifier of this study is not unique."), - ("S3", "This type of study is not supported."), + ("S3", "The identifier of this study is not unique"), + ("S3", "This type of study is not supported"), ] @pytest.fixture(autouse=True) diff --git a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_curation.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_curation.py index 4163531cb..0dd1a7363 100644 --- a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_curation.py +++ b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_curation.py @@ -143,39 +143,6 @@ def test_curation__study_type_update( assert expected == observed - # Test update qc flag - @staticmethod - def test_curation__quality_controls( - mock_gwas_study_index: StudyIndexGWASCatalog, mock_study_curation: DataFrame - ) -> None: - """Test for making sure the study type got updated.""" - curated = mock_gwas_study_index.annotate_from_study_curation( - mock_study_curation - ) - - # Expected studyIds: - expected = [ - row["studyId"] - for row in ( - mock_study_curation.filter(f.col("qualityControls").isNotNull()) - .select("studyId") - .distinct() - .collect() - ) - ] - - observed = [ - row["studyId"] - for row in ( - curated.df.filter(f.size(f.col("qualityControls")) > 0) - .select("studyId") - .distinct() - .collect() - ) - ] - - assert expected == observed - # Test updated method flag @staticmethod def test_curation__analysis_flags( diff --git a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_index.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_index.py index b91529b3d..b3ff4e486 100644 --- a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_index.py +++ b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_index.py @@ -31,25 +31,8 @@ def test_parse_study_table(sample_gwas_catalog_studies: DataFrame) -> None: ) -def test_annotate_sumstats( - mock_study_index_gwas_catalog: StudyIndexGWASCatalog, - sample_gwas_catalog_harmonised_sumstats_list: DataFrame, -) -> None: - """Test annotate sumstats of GWASCatalogStudyIndex.""" - mock_study_index_gwas_catalog.df = mock_study_index_gwas_catalog.df.drop( - "summarystatsLocation" - ) - assert isinstance( - mock_study_index_gwas_catalog.annotate_sumstats_info( - sample_gwas_catalog_harmonised_sumstats_list - ), - StudyIndexGWASCatalog, - ) - - def test_study_index_from_source( sample_gwas_catalog_studies: DataFrame, - sample_gwas_catalog_harmonised_sumstats_list: DataFrame, sample_gwas_catalog_ancestries_lut: DataFrame, ) -> None: """Test study index from source.""" @@ -57,7 +40,6 @@ def test_study_index_from_source( StudyIndexGWASCatalogParser.from_source( sample_gwas_catalog_studies, sample_gwas_catalog_ancestries_lut, - sample_gwas_catalog_harmonised_sumstats_list, ), StudyIndexGWASCatalog, ) From a78bae3069470e8302a0030971b4ee38215ce48f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 11:38:54 +0100 Subject: [PATCH 124/188] build(deps-dev): bump mypy from 1.11.0 to 1.12.1 (#865) Bumps [mypy](https://github.com/python/mypy) from 1.11.0 to 1.12.1. - [Changelog](https://github.com/python/mypy/blob/master/CHANGELOG.md) - [Commits](https://github.com/python/mypy/compare/v1.11...v1.12.1) --- updated-dependencies: - dependency-name: mypy dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- poetry.lock | 63 +++++++++++++++++++++++++++----------------------- pyproject.toml | 2 +- 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/poetry.lock b/poetry.lock index 254c05a75..40d24a800 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2755,38 +2755,43 @@ files = [ [[package]] name = "mypy" -version = "1.11.0" +version = "1.12.1" description = "Optional static typing for Python" optional = false python-versions = ">=3.8" files = [ - {file = "mypy-1.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3824187c99b893f90c845bab405a585d1ced4ff55421fdf5c84cb7710995229"}, - {file = "mypy-1.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:96f8dbc2c85046c81bcddc246232d500ad729cb720da4e20fce3b542cab91287"}, - {file = "mypy-1.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a5d8d8dd8613a3e2be3eae829ee891b6b2de6302f24766ff06cb2875f5be9c6"}, - {file = "mypy-1.11.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:72596a79bbfb195fd41405cffa18210af3811beb91ff946dbcb7368240eed6be"}, - {file = "mypy-1.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:35ce88b8ed3a759634cb4eb646d002c4cef0a38f20565ee82b5023558eb90c00"}, - {file = "mypy-1.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:98790025861cb2c3db8c2f5ad10fc8c336ed2a55f4daf1b8b3f877826b6ff2eb"}, - {file = "mypy-1.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:25bcfa75b9b5a5f8d67147a54ea97ed63a653995a82798221cca2a315c0238c1"}, - {file = "mypy-1.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bea2a0e71c2a375c9fa0ede3d98324214d67b3cbbfcbd55ac8f750f85a414e3"}, - {file = "mypy-1.11.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d2b3d36baac48e40e3064d2901f2fbd2a2d6880ec6ce6358825c85031d7c0d4d"}, - {file = "mypy-1.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:d8e2e43977f0e09f149ea69fd0556623919f816764e26d74da0c8a7b48f3e18a"}, - {file = "mypy-1.11.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:1d44c1e44a8be986b54b09f15f2c1a66368eb43861b4e82573026e04c48a9e20"}, - {file = "mypy-1.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cea3d0fb69637944dd321f41bc896e11d0fb0b0aa531d887a6da70f6e7473aba"}, - {file = "mypy-1.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a83ec98ae12d51c252be61521aa5731f5512231d0b738b4cb2498344f0b840cd"}, - {file = "mypy-1.11.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c7b73a856522417beb78e0fb6d33ef89474e7a622db2653bc1285af36e2e3e3d"}, - {file = "mypy-1.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:f2268d9fcd9686b61ab64f077be7ffbc6fbcdfb4103e5dd0cc5eaab53a8886c2"}, - {file = "mypy-1.11.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:940bfff7283c267ae6522ef926a7887305945f716a7704d3344d6d07f02df850"}, - {file = "mypy-1.11.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:14f9294528b5f5cf96c721f231c9f5b2733164e02c1c018ed1a0eff8a18005ac"}, - {file = "mypy-1.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7b54c27783991399046837df5c7c9d325d921394757d09dbcbf96aee4649fe9"}, - {file = "mypy-1.11.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:65f190a6349dec29c8d1a1cd4aa71284177aee5949e0502e6379b42873eddbe7"}, - {file = "mypy-1.11.0-cp38-cp38-win_amd64.whl", hash = "sha256:dbe286303241fea8c2ea5466f6e0e6a046a135a7e7609167b07fd4e7baf151bf"}, - {file = "mypy-1.11.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:104e9c1620c2675420abd1f6c44bab7dd33cc85aea751c985006e83dcd001095"}, - {file = "mypy-1.11.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f006e955718ecd8d159cee9932b64fba8f86ee6f7728ca3ac66c3a54b0062abe"}, - {file = "mypy-1.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:becc9111ca572b04e7e77131bc708480cc88a911adf3d0239f974c034b78085c"}, - {file = "mypy-1.11.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6801319fe76c3f3a3833f2b5af7bd2c17bb93c00026a2a1b924e6762f5b19e13"}, - {file = "mypy-1.11.0-cp39-cp39-win_amd64.whl", hash = "sha256:c1a184c64521dc549324ec6ef7cbaa6b351912be9cb5edb803c2808a0d7e85ac"}, - {file = "mypy-1.11.0-py3-none-any.whl", hash = "sha256:56913ec8c7638b0091ef4da6fcc9136896914a9d60d54670a75880c3e5b99ace"}, - {file = "mypy-1.11.0.tar.gz", hash = "sha256:93743608c7348772fdc717af4aeee1997293a1ad04bc0ea6efa15bf65385c538"}, + {file = "mypy-1.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3d7d4371829184e22fda4015278fbfdef0327a4b955a483012bd2d423a788801"}, + {file = "mypy-1.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f59f1dfbf497d473201356966e353ef09d4daec48caeacc0254db8ef633a28a5"}, + {file = "mypy-1.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b947097fae68004b8328c55161ac9db7d3566abfef72d9d41b47a021c2fba6b1"}, + {file = "mypy-1.12.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:96af62050971c5241afb4701c15189ea9507db89ad07794a4ee7b4e092dc0627"}, + {file = "mypy-1.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:d90da248f4c2dba6c44ddcfea94bb361e491962f05f41990ff24dbd09969ce20"}, + {file = "mypy-1.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1230048fec1380faf240be6385e709c8570604d2d27ec6ca7e573e3bc09c3735"}, + {file = "mypy-1.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:02dcfe270c6ea13338210908f8cadc8d31af0f04cee8ca996438fe6a97b4ec66"}, + {file = "mypy-1.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5a437c9102a6a252d9e3a63edc191a3aed5f2fcb786d614722ee3f4472e33f6"}, + {file = "mypy-1.12.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:186e0c8346efc027ee1f9acf5ca734425fc4f7dc2b60144f0fbe27cc19dc7931"}, + {file = "mypy-1.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:673ba1140a478b50e6d265c03391702fa11a5c5aff3f54d69a62a48da32cb811"}, + {file = "mypy-1.12.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9fb83a7be97c498176fb7486cafbb81decccaef1ac339d837c377b0ce3743a7f"}, + {file = "mypy-1.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:389e307e333879c571029d5b93932cf838b811d3f5395ed1ad05086b52148fb0"}, + {file = "mypy-1.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:94b2048a95a21f7a9ebc9fbd075a4fcd310410d078aa0228dbbad7f71335e042"}, + {file = "mypy-1.12.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ee5932370ccf7ebf83f79d1c157a5929d7ea36313027b0d70a488493dc1b179"}, + {file = "mypy-1.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:19bf51f87a295e7ab2894f1d8167622b063492d754e69c3c2fed6563268cb42a"}, + {file = "mypy-1.12.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d34167d43613ffb1d6c6cdc0cc043bb106cac0aa5d6a4171f77ab92a3c758bcc"}, + {file = "mypy-1.12.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:427878aa54f2e2c5d8db31fa9010c599ed9f994b3b49e64ae9cd9990c40bd635"}, + {file = "mypy-1.12.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5fcde63ea2c9f69d6be859a1e6dd35955e87fa81de95bc240143cf00de1f7f81"}, + {file = "mypy-1.12.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d54d840f6c052929f4a3d2aab2066af0f45a020b085fe0e40d4583db52aab4e4"}, + {file = "mypy-1.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:20db6eb1ca3d1de8ece00033b12f793f1ea9da767334b7e8c626a4872090cf02"}, + {file = "mypy-1.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b16fe09f9c741d85a2e3b14a5257a27a4f4886c171d562bc5a5e90d8591906b8"}, + {file = "mypy-1.12.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0dcc1e843d58f444fce19da4cce5bd35c282d4bde232acdeca8279523087088a"}, + {file = "mypy-1.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e10ba7de5c616e44ad21005fa13450cd0de7caaa303a626147d45307492e4f2d"}, + {file = "mypy-1.12.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0e6fe449223fa59fbee351db32283838a8fee8059e0028e9e6494a03802b4004"}, + {file = "mypy-1.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:dc6e2a2195a290a7fd5bac3e60b586d77fc88e986eba7feced8b778c373f9afe"}, + {file = "mypy-1.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:de5b2a8988b4e1269a98beaf0e7cc71b510d050dce80c343b53b4955fff45f19"}, + {file = "mypy-1.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:843826966f1d65925e8b50d2b483065c51fc16dc5d72647e0236aae51dc8d77e"}, + {file = "mypy-1.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9fe20f89da41a95e14c34b1ddb09c80262edcc295ad891f22cc4b60013e8f78d"}, + {file = "mypy-1.12.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8135ffec02121a75f75dc97c81af7c14aa4ae0dda277132cfcd6abcd21551bfd"}, + {file = "mypy-1.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:a7b76fa83260824300cc4834a3ab93180db19876bce59af921467fd03e692810"}, + {file = "mypy-1.12.1-py3-none-any.whl", hash = "sha256:ce561a09e3bb9863ab77edf29ae3a50e65685ad74bba1431278185b7e5d5486e"}, + {file = "mypy-1.12.1.tar.gz", hash = "sha256:f5b3936f7a6d0e8280c9bdef94c7ce4847f5cdfc258fbb2c29a8c1711e8bb96d"}, ] [package.dependencies] @@ -5219,4 +5224,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "e786d680aaa9f4a57bfc91fc2a18002199156ccc840c52e372171026506cdf04" +content-hash = "f4355a99419384b34057818bc3d8fdb4b8746f406631678976668426fbb95183" diff --git a/pyproject.toml b/pyproject.toml index 6f0e2e919..2d38452cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ google-cloud-secret-manager = "^2.20.0" [tool.poetry.dev-dependencies] pre-commit = "^4.0.0" -mypy = "^1.11" +mypy = "^1.12" pep8-naming = "^0.14.1" interrogate = "^1.7.0" isort = "^5.13.2" From 34a4e6106772a6aa5a396db2525cb7b9f246cd00 Mon Sep 17 00:00:00 2001 From: Yakov Date: Tue, 22 Oct 2024 11:50:59 +0100 Subject: [PATCH 125/188] feat: adding desision tree to fine-mapper (#860) * feat: adding dession tree to fine-mapper * fix: adding the rest * fix: v1 * fix: v3 * fix: v3 * fix: v5 * fix: v6 * fix: v7 * fix: change flags * fix: keys --------- Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- src/gentropy/susie_finemapper.py | 106 +++++++++++++++++++++++++++++-- 1 file changed, 100 insertions(+), 6 deletions(-) diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index d0759f565..adda2874c 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -28,6 +28,7 @@ from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus, StudyLocusQualityCheck from gentropy.method.carma import CARMA +from gentropy.method.ld import LDAnnotator from gentropy.method.ld_matrix_interface import LDMatrixInterface from gentropy.method.sumstat_imputation import SummaryStatisticsImputation from gentropy.method.susie_inf import SUSIE_inf @@ -712,12 +713,32 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 study_index_df = study_index._df study_index_df = study_index_df.filter(f.col("studyId") == studyId) - major_population = study_index_df.select( - "studyId", - order_array_of_structs_by_field( - "ldPopulationStructure", "relativeSampleSize" - )[0]["ldPopulation"].alias("majorPopulation"), - ).collect()[0]["majorPopulation"] + + # Desision tree - study index + if study_index_df.count() == 0: + logging.warning("No study index found for the studyId") + return None + + major_population = ( + study_index_df.select( + "studyId", + order_array_of_structs_by_field( + "ldPopulationStructure", "relativeSampleSize" + ).alias("ldPopulationStructure"), + ) + .withColumn( + "majorPopulation", + f.when( + f.col("ldPopulationStructure").isNotNull(), + LDAnnotator._get_major_population(f.col("ldPopulationStructure")), + ), + ) + .collect()[0]["majorPopulation"] + ) + + # This is a temporary solution + if major_population == "eas": + major_population = "csa" N_total = int(study_index_df.select("nSamples").collect()[0]["nSamples"]) if N_total is None: @@ -727,6 +748,79 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 region = chromosome + ":" + str(int(locusStart)) + "-" + str(int(locusEnd)) + # Desision tree - studyType + if study_index_df.select("studyType").collect()[0]["studyType"] in [ + "gwas", + "pqtl", + ]: + logging.warning("Study type is not GWAS or non gwas catalog pqtl") + return None + + # Desision tree - ancestry + if major_population not in ["nfe", "csa", "afr"]: + logging.warning("Major ancestry is not nfe, csa or afr") + return None + + # Desision tree - hasSumstats + if not study_index_df.select("hasSumstats").collect()[0]["hasSumstats"]: + logging.warning("No sumstats found for the studyId") + return None + + # Desision tree - qulityControls + keys_reasons = [ + "SMALL_NUMBER_OF_SNPS", + "FAILED_GC_LAMBDA_CHECK", + "FAILED_PZ_CHECK", + "FAILED_MEAN_BETA_CHECK", + "NO_OT_CURATION", + "SUMSTATS_NOT_AVAILABLE", + ] + + qc_mappings_dict = StudyIndex.get_QC_mappings() + invalid_reasons = [ + qc_mappings_dict[key] for key in keys_reasons if key in qc_mappings_dict + ] + + x_boolean = ( + study_index_df.withColumn( + "FailedQC", + f.arrays_overlap( + f.col("qualityControls"), + f.array([f.lit(reason) for reason in invalid_reasons]), + ), + ) + .select("FailedQC") + .collect()[0]["FailedQC"] + ) + if x_boolean: + logging.warning("Quality control check failed for this study") + return None + + # Desision tree - analysisFlags + study_index_df = study_index_df.drop("FailedQC") + invalid_reasons = [ + "Multivariate analysis", + "ExWAS", + "Non-additive model", + "GxG", + "GxE", + "Case-case study", + ] + x_boolean = ( + study_index_df.withColumn( + "FailedQC", + f.arrays_overlap( + f.col("analysisFlags"), + f.array([f.lit(reason) for reason in invalid_reasons]), + ), + ) + .select("FailedQC") + .collect()[0]["FailedQC"] + ) + if x_boolean: + logging.warning("Analysis Flags check failed for this study") + return None + schema = StudyLocus.get_schema() gwas_df = session.spark.createDataFrame([study_locus_row], schema=schema) exploded_df = gwas_df.select(f.explode("locus").alias("locus")) From c5adb7512ec26c68e0c744720d74676f3b88ad49 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 15:36:36 +0100 Subject: [PATCH 126/188] build(deps-dev): bump ruff from 0.6.1 to 0.7.0 (#864) Bumps [ruff](https://github.com/astral-sh/ruff) from 0.6.1 to 0.7.0. - [Release notes](https://github.com/astral-sh/ruff/releases) - [Changelog](https://github.com/astral-sh/ruff/blob/main/CHANGELOG.md) - [Commits](https://github.com/astral-sh/ruff/compare/0.6.1...0.7.0) --- updated-dependencies: - dependency-name: ruff dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- poetry.lock | 40 ++++++++++++++++++++-------------------- pyproject.toml | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/poetry.lock b/poetry.lock index 40d24a800..9ce219f6b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4267,29 +4267,29 @@ pyasn1 = ">=0.1.3" [[package]] name = "ruff" -version = "0.6.1" +version = "0.7.0" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.6.1-py3-none-linux_armv6l.whl", hash = "sha256:b4bb7de6a24169dc023f992718a9417380301b0c2da0fe85919f47264fb8add9"}, - {file = "ruff-0.6.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:45efaae53b360c81043e311cdec8a7696420b3d3e8935202c2846e7a97d4edae"}, - {file = "ruff-0.6.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:bc60c7d71b732c8fa73cf995efc0c836a2fd8b9810e115be8babb24ae87e0850"}, - {file = "ruff-0.6.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c7477c3b9da822e2db0b4e0b59e61b8a23e87886e727b327e7dcaf06213c5cf"}, - {file = "ruff-0.6.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a0af7ab3f86e3dc9f157a928e08e26c4b40707d0612b01cd577cc84b8905cc9"}, - {file = "ruff-0.6.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:392688dbb50fecf1bf7126731c90c11a9df1c3a4cdc3f481b53e851da5634fa5"}, - {file = "ruff-0.6.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:5278d3e095ccc8c30430bcc9bc550f778790acc211865520f3041910a28d0024"}, - {file = "ruff-0.6.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fe6d5f65d6f276ee7a0fc50a0cecaccb362d30ef98a110f99cac1c7872df2f18"}, - {file = "ruff-0.6.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2e0dd11e2ae553ee5c92a81731d88a9883af8db7408db47fc81887c1f8b672e"}, - {file = "ruff-0.6.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d812615525a34ecfc07fd93f906ef5b93656be01dfae9a819e31caa6cfe758a1"}, - {file = "ruff-0.6.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:faaa4060f4064c3b7aaaa27328080c932fa142786f8142aff095b42b6a2eb631"}, - {file = "ruff-0.6.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:99d7ae0df47c62729d58765c593ea54c2546d5de213f2af2a19442d50a10cec9"}, - {file = "ruff-0.6.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9eb18dfd7b613eec000e3738b3f0e4398bf0153cb80bfa3e351b3c1c2f6d7b15"}, - {file = "ruff-0.6.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:c62bc04c6723a81e25e71715aa59489f15034d69bf641df88cb38bdc32fd1dbb"}, - {file = "ruff-0.6.1-py3-none-win32.whl", hash = "sha256:9fb4c4e8b83f19c9477a8745e56d2eeef07a7ff50b68a6998f7d9e2e3887bdc4"}, - {file = "ruff-0.6.1-py3-none-win_amd64.whl", hash = "sha256:c2ebfc8f51ef4aca05dad4552bbcf6fe8d1f75b2f6af546cc47cc1c1ca916b5b"}, - {file = "ruff-0.6.1-py3-none-win_arm64.whl", hash = "sha256:3bc81074971b0ffad1bd0c52284b22411f02a11a012082a76ac6da153536e014"}, - {file = "ruff-0.6.1.tar.gz", hash = "sha256:af3ffd8c6563acb8848d33cd19a69b9bfe943667f0419ca083f8ebe4224a3436"}, + {file = "ruff-0.7.0-py3-none-linux_armv6l.whl", hash = "sha256:0cdf20c2b6ff98e37df47b2b0bd3a34aaa155f59a11182c1303cce79be715628"}, + {file = "ruff-0.7.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:496494d350c7fdeb36ca4ef1c9f21d80d182423718782222c29b3e72b3512737"}, + {file = "ruff-0.7.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:214b88498684e20b6b2b8852c01d50f0651f3cc6118dfa113b4def9f14faaf06"}, + {file = "ruff-0.7.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:630fce3fefe9844e91ea5bbf7ceadab4f9981f42b704fae011bb8efcaf5d84be"}, + {file = "ruff-0.7.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:211d877674e9373d4bb0f1c80f97a0201c61bcd1e9d045b6e9726adc42c156aa"}, + {file = "ruff-0.7.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:194d6c46c98c73949a106425ed40a576f52291c12bc21399eb8f13a0f7073495"}, + {file = "ruff-0.7.0-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:82c2579b82b9973a110fab281860403b397c08c403de92de19568f32f7178598"}, + {file = "ruff-0.7.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9af971fe85dcd5eaed8f585ddbc6bdbe8c217fb8fcf510ea6bca5bdfff56040e"}, + {file = "ruff-0.7.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b641c7f16939b7d24b7bfc0be4102c56562a18281f84f635604e8a6989948914"}, + {file = "ruff-0.7.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d71672336e46b34e0c90a790afeac8a31954fd42872c1f6adaea1dff76fd44f9"}, + {file = "ruff-0.7.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ab7d98c7eed355166f367597e513a6c82408df4181a937628dbec79abb2a1fe4"}, + {file = "ruff-0.7.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1eb54986f770f49edb14f71d33312d79e00e629a57387382200b1ef12d6a4ef9"}, + {file = "ruff-0.7.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:dc452ba6f2bb9cf8726a84aa877061a2462afe9ae0ea1d411c53d226661c601d"}, + {file = "ruff-0.7.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:4b406c2dce5be9bad59f2de26139a86017a517e6bcd2688da515481c05a2cb11"}, + {file = "ruff-0.7.0-py3-none-win32.whl", hash = "sha256:f6c968509f767776f524a8430426539587d5ec5c662f6addb6aa25bc2e8195ec"}, + {file = "ruff-0.7.0-py3-none-win_amd64.whl", hash = "sha256:ff4aabfbaaba880e85d394603b9e75d32b0693152e16fa659a3064a85df7fce2"}, + {file = "ruff-0.7.0-py3-none-win_arm64.whl", hash = "sha256:10842f69c245e78d6adec7e1db0a7d9ddc2fff0621d730e61657b64fa36f207e"}, + {file = "ruff-0.7.0.tar.gz", hash = "sha256:47a86360cf62d9cd53ebfb0b5eb0e882193fc191c6d717e8bef4462bc3b9ea2b"}, ] [[package]] @@ -5224,4 +5224,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "f4355a99419384b34057818bc3d8fdb4b8746f406631678976668426fbb95183" +content-hash = "2dcd05168a809a2a9dc0728ee7ed1578ad008d0c3a290bf9a41895b10f2d849a" diff --git a/pyproject.toml b/pyproject.toml index 2d38452cb..fea630906 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ pep8-naming = "^0.14.1" interrogate = "^1.7.0" isort = "^5.13.2" darglint = "^1.8.1" -ruff = "^0.6.1" +ruff = "^0.7.0" [tool.poetry.group.docs.dependencies] mkdocs = "^1.5.3" From 6e6687b270211698753e6c2479575a7484d2d0f5 Mon Sep 17 00:00:00 2001 From: Yakov Date: Wed, 23 Oct 2024 10:14:54 +0100 Subject: [PATCH 127/188] fix: logging of finemamper (#870) --- src/gentropy/susie_finemapper.py | 139 +++++++++++++++++++++++++++++-- 1 file changed, 133 insertions(+), 6 deletions(-) diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index adda2874c..03a8730ef 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -131,6 +131,7 @@ def __init__( imputed_r2_threshold=imputed_r2_threshold, ld_score_threshold=ld_score_threshold, ld_min_r2=ld_min_r2, + log_output=log_output, ) if result_logging is not None: @@ -149,11 +150,35 @@ def __init__( df.write.mode(session.write_mode).parquet(study_locus_output) if result_logging["log"] is not None: # Write log - result_logging["log"].to_parquet( - log_output, - engine="pyarrow", - index=False, - ) + result_logging["log"].to_csv(log_output, index=False, sep="\t") + + @staticmethod + def _empty_log_mg(studyId: str, region: str, error_mg: str, path_out: str) -> None: + """Create an empty log DataFrame with error message. + + Args: + studyId (str): study ID + region (str): region + error_mg (str): error message + path_out (str): output path + """ + pd.DataFrame( + { + "studyId": studyId, + "region": region, + "N_gwas_before_dedupl": 0, + "N_gwas": 0, + "N_ld": 0, + "N_overlap": 0, + "N_outliers": 0, + "N_imputed": 0, + "N_final_to_fm": 0, + "elapsed_time": 0, + "number_of_CS": 0, + "error": error_mg, + }, + index=[0], + ).to_csv(path_out, index=False, sep="\t") @staticmethod def susie_inf_to_studylocus( # noqa: C901 @@ -631,6 +656,7 @@ def susie_finemapper_from_prepared_dataframes( "N_final_to_fm": len(ld_to_fm), "elapsed_time": end_time - start_time, "number_of_CS": study_locus.df.count(), + "error": "", }, index=[0], ) @@ -648,6 +674,7 @@ def susie_finemapper_from_prepared_dataframes( "N_final_to_fm": len(ld_to_fm), "elapsed_time": end_time - start_time, "number_of_CS": 0, + "error": "", }, index=[0], ) @@ -678,6 +705,7 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 purity_min_r2_threshold: float = 0.25, cs_lbf_thr: float = 2, ld_min_r2: float = 0.9, + log_output: str = "", ) -> dict[str, Any] | None: """Susie fine-mapper function that uses study-locus row with collected locus, chromosome and position as inputs. @@ -699,6 +727,7 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 purity_min_r2_threshold (float): thrshold for purity min r2 qc metrics for filtering credible sets cs_lbf_thr (float): credible set logBF threshold for filtering credible sets, default is 2 ld_min_r2 (float): Threshold to fillter CS by leads in high LD, default is 0.9 + log_output (str): path to the log output Returns: dict[str, Any] | None: dictionary with study locus, number of GWAS variants, number of LD variants, number of variants after merge, number of outliers, number of imputed variants, number of variants to fine-map, or None @@ -716,6 +745,13 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 # Desision tree - study index if study_index_df.count() == 0: + if log_output != "": + SusieFineMapperStep._empty_log_mg( + studyId=studyId, + region="", + error_mg="No study index found for the studyId", + path_out=log_output, + ) logging.warning("No study index found for the studyId") return None @@ -749,20 +785,41 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 region = chromosome + ":" + str(int(locusStart)) + "-" + str(int(locusEnd)) # Desision tree - studyType - if study_index_df.select("studyType").collect()[0]["studyType"] in [ + if study_index_df.select("studyType").collect()[0]["studyType"] not in [ "gwas", "pqtl", ]: + if log_output != "": + SusieFineMapperStep._empty_log_mg( + studyId=studyId, + region=region, + error_mg="Study type is not GWAS or non gwas catalog pqtl", + path_out=log_output, + ) logging.warning("Study type is not GWAS or non gwas catalog pqtl") return None # Desision tree - ancestry if major_population not in ["nfe", "csa", "afr"]: + if log_output != "": + SusieFineMapperStep._empty_log_mg( + studyId=studyId, + region=region, + error_mg="Major ancestry is not nfe, csa or afr", + path_out=log_output, + ) logging.warning("Major ancestry is not nfe, csa or afr") return None # Desision tree - hasSumstats if not study_index_df.select("hasSumstats").collect()[0]["hasSumstats"]: + if log_output != "": + SusieFineMapperStep._empty_log_mg( + studyId=studyId, + region=region, + error_mg="No sumstats found for the studyId", + path_out=log_output, + ) logging.warning("No sumstats found for the studyId") return None @@ -793,6 +850,13 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 .collect()[0]["FailedQC"] ) if x_boolean: + if log_output != "": + SusieFineMapperStep._empty_log_mg( + studyId=studyId, + region=region, + error_mg="Quality control check failed for this study", + path_out=log_output, + ) logging.warning("Quality control check failed for this study") return None @@ -818,6 +882,13 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 .collect()[0]["FailedQC"] ) if x_boolean: + if log_output != "": + SusieFineMapperStep._empty_log_mg( + studyId=studyId, + region=region, + error_mg="Analysis Flags check failed for this study", + path_out=log_output, + ) logging.warning("Analysis Flags check failed for this study") return None @@ -875,6 +946,13 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 "z", "chromosome", "position", "beta", "StandardError" ) if gwas_index.rdd.isEmpty(): + if log_output != "": + SusieFineMapperStep._empty_log_mg( + studyId=studyId, + region=region, + error_mg="No overlapping variants in the LD Index", + path_out=log_output, + ) logging.warning("No overlapping variants in the LD Index") return None gnomad_ld = LDMatrixInterface.get_numpy_matrix( @@ -894,6 +972,13 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 gwas_index = gwas_index.iloc[indices, :] if len(gwas_index) == 0: + if log_output != "": + SusieFineMapperStep._empty_log_mg( + studyId=studyId, + region=region, + error_mg="No overlapping variants in the LD Index", + path_out=log_output, + ) logging.warning("No overlapping variants in the LD Index") return None @@ -906,6 +991,13 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 gwas_index = gwas_index.iloc[indices, :] if len(gwas_index) == 0: + if log_output != "": + SusieFineMapperStep._empty_log_mg( + studyId=studyId, + region=region, + error_mg="No overlapping variants in the LD Index", + path_out=log_output, + ) logging.warning("No overlapping variants in the LD Index") return None @@ -916,6 +1008,13 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 ld_index.select("variantId", "idx", "alleleOrder"), on="variantId" ).sort("idx") if gwas_index.rdd.isEmpty(): + if log_output != "": + SusieFineMapperStep._empty_log_mg( + studyId=studyId, + region=region, + error_mg="No overlapping variants in the LD Index", + path_out=log_output, + ) logging.warning("No overlapping variants in the LD Index") return None gwas_index = ld_index @@ -936,6 +1035,13 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 gwas_index = gwas_index.iloc[indices, :] if len(gwas_index) == 0: + if log_output != "": + SusieFineMapperStep._empty_log_mg( + studyId=studyId, + region=region, + error_mg="No overlapping variants in the LD Index", + path_out=log_output, + ) logging.warning("No overlapping variants in the LD Index") return None @@ -948,6 +1054,13 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 gwas_index = gwas_index.iloc[indices, :] if len(gwas_index) == 0: + if log_output != "": + SusieFineMapperStep._empty_log_mg( + studyId=studyId, + region=region, + error_mg="No overlapping variants in the LD Index", + path_out=log_output, + ) logging.warning("No overlapping variants in the LD Index") return None @@ -965,9 +1078,23 @@ def susie_finemapper_one_sl_row_gathered_boundaries( # noqa: C901 # Desision tree - number of variants if gwas_index.count() < 100: + if log_output != "": + SusieFineMapperStep._empty_log_mg( + studyId=studyId, + region=region, + error_mg="Less than 100 variants after joining GWAS and LD index", + path_out=log_output, + ) logging.warning("Less than 100 variants after joining GWAS and LD index") return None elif gwas_index.count() >= 15_000: + if log_output != "": + SusieFineMapperStep._empty_log_mg( + studyId=studyId, + region=region, + error_mg="More than 15000 variants after joining GWAS and LD index", + path_out=log_output, + ) logging.warning("More than 15000 variants after joining GWAS and LD index") return None From 52da70f4abccae613c2620970ac45d70643e2d3d Mon Sep 17 00:00:00 2001 From: Yakov Date: Wed, 23 Oct 2024 10:23:20 +0100 Subject: [PATCH 128/188] chore: add chromosome validation (#869) * chore: add chromosome validation * test: chromosome validation --------- Co-authored-by: DSuveges --- src/gentropy/dataset/study_locus.py | 31 +++++++++++++++++ tests/gentropy/dataset/test_study_locus.py | 40 ++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index eaec9672b..a68a00a6d 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -82,6 +82,8 @@ class StudyLocusQualityCheck(Enum): IN_MHC (str): Flagging study loci in the MHC region REDUNDANT_PICS_TOP_HIT (str): Flagging study loci in studies with PICS results from summary statistics EXPLAINED_BY_SUSIE (str): Study locus in region explained by a SuSiE credible set + OUT_OF_SAMPLE_LD (str): Study locus finemapped without in-sample LD reference + INVALID_CHROMOSOME (str): Chromosome not in 1:22, X, Y, XY or MT """ SUBSIGNIFICANT_FLAG = "Subsignificant p-value" @@ -111,6 +113,7 @@ class StudyLocusQualityCheck(Enum): TOP_HIT = "Study locus from curated top hit" EXPLAINED_BY_SUSIE = "Study locus in region explained by a SuSiE credible set" OUT_OF_SAMPLE_LD = "Study locus finemapped without in-sample LD reference" + INVALID_CHROMOSOME = "Chromosome not in 1:22, X, Y, XY or MT" class CredibleInterval(Enum): @@ -205,6 +208,34 @@ def annotate_study_type(self: StudyLocus, study_index: StudyIndex) -> StudyLocus _schema=self.get_schema(), ) + def validate_chromosome_label(self: StudyLocus) -> StudyLocus: + """Flagging study loci, where chromosome is coded not as 1:22, X, Y, Xy and MT. + + Returns: + StudyLocus: Updated study locus with quality control flags. + """ + # QC column might not be present in the variant index schema, so we have to be ready to handle it: + qc_select_expression = ( + f.col("qualityControls") + if "qualityControls" in self.df.columns + else f.lit(None).cast(ArrayType(StringType())) + ) + valid_chromosomes = [str(i) for i in range(1, 23)] + ["X", "Y", "XY", "MT"] + + return StudyLocus( + _df=( + self.df.withColumn( + "qualityControls", + self.update_quality_flag( + qc_select_expression, + ~f.col("chromosome").isin(valid_chromosomes), + StudyLocusQualityCheck.INVALID_CHROMOSOME, + ), + ) + ), + _schema=self.get_schema(), + ) + def validate_variant_identifiers( self: StudyLocus, variant_index: VariantIndex ) -> StudyLocus: diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index 94da005b9..eaee0ebf3 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -1081,3 +1081,43 @@ def test_qc_explained_by_SuSiE_correctness( ) .count() ) == 3 + + +def test_qc_valid_chromosomes( + spark: SparkSession, +) -> None: + """Testing if chredible sets with invalid chromosomes are properly flagged.""" + df = spark.createDataFrame( + [ + # Chromosome is fine: + ("1", "v1", "s1", "X", []), + ("2", "v2", "s1", "1", []), + # Should be flagged: + ("3", "v3", "s1", "11325", []), + ("4", "v4", "s1", "CICAFUL", []), + ], + schema=t.StructType( + [ + t.StructField("studyLocusId", t.StringType(), False), + t.StructField("variantId", t.StringType(), False), + t.StructField("studyId", t.StringType(), False), + t.StructField("chromosome", t.StringType(), False), + t.StructField("qualityControls", t.ArrayType(t.StringType()), False), + ] + ), + ) + + sl = StudyLocus(_df=df, _schema=StudyLocus.get_schema()).validate_chromosome_label() + + # Assert return type: + assert isinstance(sl, StudyLocus) + + # Assert flagging correctness: + for row in sl.df.collect(): + if row["chromosome"] in ["1", "X"]: + assert not row["qualityControls"] + else: + assert ( + StudyLocusQualityCheck.INVALID_CHROMOSOME.value + in row["qualityControls"] + ) From 9fe77ca8e8f816f6fd9d33638affac52b6f6bd84 Mon Sep 17 00:00:00 2001 From: xyg123 <33658607+xyg123@users.noreply.github.com> Date: Thu, 24 Oct 2024 10:16:37 +0100 Subject: [PATCH 129/188] feat: add gene count features to l2g (#852) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add gene count features to l2g * fix: add genecount to pytest * fix: use alias in gene count * fix: use alias in gene count * fix: pytest now works * chore: pre-commit auto fixes [...] * fix: type hint * feat(coloc): step refactoring (#845) * feat: prior fix * refactor: coloc step * fix: subclasss colocalisation methods on interface * fix: failing coloc step * chore(coloc): step tests * chore: restore old script --------- Co-authored-by: Szymon Szyszkowski Co-authored-by: project-defiant * test: skip `fetch_coordinates_from_rsids` (#850) * fix(eqtl): deduplicating credible set loci (#849) * fix(eqtl): deduplicating credible set loci * fix: removing * chore: adding logging even when no CS in locus (#848) * chore: adding logging even when no CS in locus * fix: addin CS count to log * fix: type hints * added test_l2g_feature tests * fix: remove print statements from tests * feat: add genecount features to FM + config * feat: add to l2g config * chore: pre-commit auto fixes [...] --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Co-authored-by: Szymon Szyszkowski Co-authored-by: project-defiant Co-authored-by: Irene López Santiago <45119610+ireneisdoomed@users.noreply.github.com> Co-authored-by: Daniel Suveges Co-authored-by: Yakov --- .../python_api/datasets/l2g_features/other.md | 12 ++ src/gentropy/config.py | 6 + src/gentropy/dataset/l2g_features/other.py | 162 ++++++++++++++++++ src/gentropy/method/l2g/feature_factory.py | 6 + tests/gentropy/dataset/test_l2g_feature.py | 114 +++++++++++- .../dataset/test_l2g_feature_matrix.py | 34 +++- 6 files changed, 331 insertions(+), 3 deletions(-) create mode 100644 docs/python_api/datasets/l2g_features/other.md create mode 100644 src/gentropy/dataset/l2g_features/other.py diff --git a/docs/python_api/datasets/l2g_features/other.md b/docs/python_api/datasets/l2g_features/other.md new file mode 100644 index 000000000..e294e1813 --- /dev/null +++ b/docs/python_api/datasets/l2g_features/other.md @@ -0,0 +1,12 @@ +--- +title: Other features +--- + +## List of features + +::: gentropy.dataset.l2g_features.other.GeneCountFeature +::: gentropy.dataset.l2g_features.other.ProteinGeneCountFeature + +## Common logic + +::: gentropy.dataset.l2g_features.other.common_genecount_feature_logic diff --git a/src/gentropy/config.py b/src/gentropy/config.py index d47c1b8ab..f6b699f0a 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -258,6 +258,9 @@ class LocusToGeneConfig(StepConfig): "vepMaximumNeighbourhood", "vepMean", "vepMeanNeighbourhood", + # other + "geneCount500kb", + "proteinGeneCount500kb", ] ) hyperparameters: dict[str, Any] = field( @@ -325,6 +328,9 @@ class LocusToGeneFeatureMatrixConfig(StepConfig): "vepMaximumNeighbourhood", "vepMean", "vepMeanNeighbourhood", + # other + "geneCount500kb", + "proteinGeneCount500kb", ] ) _target_: str = "gentropy.l2g.LocusToGeneFeatureMatrixStep" diff --git a/src/gentropy/dataset/l2g_features/other.py b/src/gentropy/dataset/l2g_features/other.py new file mode 100644 index 000000000..a033192a8 --- /dev/null +++ b/src/gentropy/dataset/l2g_features/other.py @@ -0,0 +1,162 @@ +"""Methods to generate features which are not obviously categorised.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pyspark.sql.functions as f + +from gentropy.common.spark_helpers import convert_from_wide_to_long +from gentropy.dataset.gene_index import GeneIndex +from gentropy.dataset.l2g_features.l2g_feature import L2GFeature +from gentropy.dataset.l2g_gold_standard import L2GGoldStandard +from gentropy.dataset.study_locus import StudyLocus + +if TYPE_CHECKING: + from pyspark.sql import DataFrame + + +def common_genecount_feature_logic( + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + *, + gene_index: GeneIndex, + feature_name: str, + genomic_window: int, + protein_coding_only: bool = False, +) -> DataFrame: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci + that will be used for annotation + gene_index (GeneIndex): Dataset containing information related to all genes in release. + feature_name (str): The name of the feature + genomic_window (int): The maximum window size to consider + protein_coding_only (bool): Whether to only consider protein coding genes in calculation. + + Returns: + DataFrame: Feature dataset + """ + study_loci_window = ( + study_loci_to_annotate.df.withColumn( + "window_start", f.col("position") - (genomic_window / 2) + ) + .withColumn("window_end", f.col("position") + (genomic_window / 2)) + .withColumnRenamed("chromosome", "SL_chromosome") + ) + gene_index_filter = gene_index.df + + if protein_coding_only: + gene_index_filter = gene_index_filter.filter( + f.col("biotype") == "protein_coding" + ) + + distinct_gene_counts = ( + study_loci_window.join( + gene_index_filter.alias("genes"), + on=( + (f.col("SL_chromosome") == f.col("genes.chromosome")) + & (f.col("genes.tss") >= f.col("window_start")) + & (f.col("genes.tss") <= f.col("window_end")) + ), + how="inner", + ) + .groupBy("studyLocusId") + .agg(f.approx_count_distinct("geneId").alias(feature_name)) + ) + + return ( + study_loci_window.join( + gene_index_filter.alias("genes"), + on=( + (f.col("SL_chromosome") == f.col("genes.chromosome")) + & (f.col("genes.tss") >= f.col("window_start")) + & (f.col("genes.tss") <= f.col("window_end")) + ), + how="inner", + ) + .join(distinct_gene_counts, on="studyLocusId", how="inner") + .select("studyLocusId", "geneId", feature_name) + .distinct() + ) + + +class GeneCountFeature(L2GFeature): + """Counts the number of genes within a specified window size from the study locus.""" + + feature_dependency_type = GeneIndex + feature_name = "geneCount500kb" + + @classmethod + def compute( + cls: type[GeneCountFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> GeneCountFeature: + """Computes the gene count feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dictionary containing dependencies, with gene index and window size + + Returns: + GeneCountFeature: Feature dataset + """ + genomic_window = 500000 + gene_count_df = common_genecount_feature_logic( + study_loci_to_annotate=study_loci_to_annotate, + feature_name=cls.feature_name, + genomic_window=genomic_window, + **feature_dependency, + ) + + return cls( + _df=convert_from_wide_to_long( + gene_count_df, + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + +class ProteinGeneCountFeature(L2GFeature): + """Counts the number of protein coding genes within a specified window size from the study locus.""" + + feature_dependency_type = GeneIndex + feature_name = "proteinGeneCount500kb" + + @classmethod + def compute( + cls: type[ProteinGeneCountFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> ProteinGeneCountFeature: + """Computes the gene count feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dictionary containing dependencies, with gene index and window size + + Returns: + ProteinGeneCountFeature: Feature dataset + """ + genomic_window = 500000 + gene_count_df = common_genecount_feature_logic( + study_loci_to_annotate=study_loci_to_annotate, + feature_name=cls.feature_name, + genomic_window=genomic_window, + protein_coding_only=True, + **feature_dependency, + ) + + return cls( + _df=convert_from_wide_to_long( + gene_count_df, + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) diff --git a/src/gentropy/method/l2g/feature_factory.py b/src/gentropy/method/l2g/feature_factory.py index 1077fc825..dbb5ecf48 100644 --- a/src/gentropy/method/l2g/feature_factory.py +++ b/src/gentropy/method/l2g/feature_factory.py @@ -29,6 +29,10 @@ DistanceTssMeanNeighbourhoodFeature, ) from gentropy.dataset.l2g_features.l2g_feature import L2GFeature +from gentropy.dataset.l2g_features.other import ( + GeneCountFeature, + ProteinGeneCountFeature, +) from gentropy.dataset.l2g_features.vep import ( VepMaximumFeature, VepMaximumNeighbourhoodFeature, @@ -119,6 +123,8 @@ class FeatureFactory: "vepMeanNeighbourhood": VepMeanNeighbourhoodFeature, "vepMaximum": VepMaximumFeature, "vepMaximumNeighbourhood": VepMaximumNeighbourhoodFeature, + "geneCount500kb": GeneCountFeature, + "proteinGeneCount500kb": ProteinGeneCountFeature, } def __init__( diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py index 212ad32a6..a841af0a6 100644 --- a/tests/gentropy/dataset/test_l2g_feature.py +++ b/tests/gentropy/dataset/test_l2g_feature.py @@ -1,3 +1,5 @@ +# pylint: disable=too-few-public-methods +# isort: skip_file """Test locus-to-gene feature generation.""" from __future__ import annotations @@ -57,6 +59,11 @@ common_neighbourhood_vep_feature_logic, common_vep_feature_logic, ) +from gentropy.dataset.l2g_features.other import ( + common_genecount_feature_logic, + GeneCountFeature, + ProteinGeneCountFeature, +) from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus from gentropy.dataset.variant_index import VariantIndex @@ -93,6 +100,8 @@ VepMeanFeature, VepMaximumNeighbourhoodFeature, VepMeanNeighbourhoodFeature, + GeneCountFeature, + ProteinGeneCountFeature, ], ) def test_feature_factory_return_type( @@ -391,7 +400,7 @@ def _setup(self: TestCommonColocalisationFeatureLogic, spark: SparkSession) -> N { "studyLocusId": "2", "variantId": "var1", - "studyId": "study2", # this is a QTL (same gee) + "studyId": "study2", # this is a QTL (same gene) "chromosome": "1", }, { @@ -803,3 +812,106 @@ def _setup(self: TestCommonVepFeatureLogic, spark: SparkSession) -> None: ), _schema=StudyLocus.get_schema(), ) + + +class TestCommonGeneCountFeatureLogic: + """Test the CommonGeneCountFeatureLogic methods.""" + + @pytest.mark.parametrize( + ("feature_name", "expected_data", "protein_coding_only"), + [ + ( + "geneCount", + [ + {"studyLocusId": "1", "geneId": "gene1", "geneCount": 3}, + {"studyLocusId": "1", "geneId": "gene2", "geneCount": 3}, + {"studyLocusId": "1", "geneId": "gene3", "geneCount": 3}, + ], + False, # Test case for all genes + ), + ( + "geneCountProteinCoding", + [ + { + "studyLocusId": "1", + "geneId": "gene1", + "geneCountProteinCoding": 2, + }, + { + "studyLocusId": "1", + "geneId": "gene2", + "geneCountProteinCoding": 2, + }, + ], + True, # Test case for protein-coding genes only + ), + ], + ) + def test_common_genecount_feature_logic( + self: TestCommonGeneCountFeatureLogic, + spark: SparkSession, + feature_name: str, + expected_data: list[dict[str, Any]], + protein_coding_only: bool, + ) -> None: + """Test the common logic of the gene count features.""" + observed_df = common_genecount_feature_logic( + study_loci_to_annotate=self.sample_study_locus, + gene_index=self.sample_gene_index, + feature_name=feature_name, + genomic_window=500000, + protein_coding_only=protein_coding_only, + ).orderBy("studyLocusId", "geneId") + expected_df = ( + spark.createDataFrame(expected_data) + .select("studyLocusId", "geneId", feature_name) + .orderBy("studyLocusId", "geneId") + ) + assert ( + observed_df.collect() == expected_df.collect() + ), f"Expected and observed dataframes do not match for feature {feature_name}." + + @pytest.fixture(autouse=True) + def _setup(self: TestCommonGeneCountFeatureLogic, spark: SparkSession) -> None: + """Set up testing fixtures.""" + self.sample_study_locus = StudyLocus( + _df=spark.createDataFrame( + [ + { + "studyLocusId": "1", + "variantId": "var1", + "studyId": "study1", + "chromosome": "1", + "position": 1000000, + }, + ], + StudyLocus.get_schema(), + ), + _schema=StudyLocus.get_schema(), + ) + self.sample_gene_index = GeneIndex( + _df=spark.createDataFrame( + [ + { + "geneId": "gene1", + "chromosome": "1", + "tss": 950000, + "biotype": "protein_coding", + }, + { + "geneId": "gene2", + "chromosome": "1", + "tss": 1050000, + "biotype": "protein_coding", + }, + { + "geneId": "gene3", + "chromosome": "1", + "tss": 1010000, + "biotype": "non_coding", + }, + ], + GeneIndex.get_schema(), + ), + _schema=GeneIndex.get_schema(), + ) diff --git a/tests/gentropy/dataset/test_l2g_feature_matrix.py b/tests/gentropy/dataset/test_l2g_feature_matrix.py index f4859844a..76661e170 100644 --- a/tests/gentropy/dataset/test_l2g_feature_matrix.py +++ b/tests/gentropy/dataset/test_l2g_feature_matrix.py @@ -5,9 +5,17 @@ from typing import TYPE_CHECKING import pytest -from pyspark.sql.types import ArrayType, DoubleType, StringType, StructField, StructType +from pyspark.sql.types import ( + ArrayType, + DoubleType, + IntegerType, + StringType, + StructField, + StructType, +) from gentropy.dataset.colocalisation import Colocalisation +from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix from gentropy.dataset.l2g_gold_standard import L2GGoldStandard from gentropy.dataset.study_index import StudyIndex @@ -40,11 +48,12 @@ def test_study_locus( self: TestFromFeaturesList, ) -> None: """Test building feature matrix for a SL with the eQtlColocH4Maximum feature.""" - features_list = ["eQtlColocH4Maximum"] + features_list = ["eQtlColocH4Maximum", "geneCount500kb"] loader = L2GFeatureInputLoader( colocalisation=self.sample_colocalisation, study_index=self.sample_study_index, study_locus=self.sample_study_locus, + gene_index=self.sample_gene_index, ) fm = L2GFeatureMatrix.from_features_list( self.sample_study_locus, features_list, loader @@ -89,6 +98,8 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: "1", "var1", "gwas1", + "X", + 2, [ {"variantId": "var1", "posteriorProbability": 0.8}, {"variantId": "var12", "posteriorProbability": 0.2}, @@ -98,6 +109,8 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: "2", "var2", "eqtl1", + "X", + 10, [ {"variantId": "var2", "posteriorProbability": 1.0}, ], @@ -108,6 +121,8 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: StructField("studyLocusId", StringType(), True), StructField("variantId", StringType(), True), StructField("studyId", StringType(), True), + StructField("chromosome", StringType(), True), + StructField("position", IntegerType(), True), StructField( "locus", ArrayType( @@ -154,3 +169,18 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: ), _schema=Colocalisation.get_schema(), ) + self.sample_gene_index = GeneIndex( + _df=spark.createDataFrame( + [ + ("g1", "X", "protein_coding", 200), + ("g2", "X", "protein_coding", 300), + ], + [ + "geneId", + "chromosome", + "biotype", + "tss", + ], + ), + _schema=GeneIndex.get_schema(), + ) From e233dac0a2ed5a55daa1ddb98955e18bfe717888 Mon Sep 17 00:00:00 2001 From: Daniel-Considine <113430683+Daniel-Considine@users.noreply.github.com> Date: Thu, 24 Oct 2024 10:57:33 +0100 Subject: [PATCH 130/188] feat: change betas to posterior mean from susie for Finngen credible sets (#872) * feat: using mean instead of univariate beta * fix: missing f-string * fix: typo * fix: beta column in locus * feat: populating locusStart and locusEnd from region column * fix: locusStart/End cast to integer --------- Co-authored-by: Yakov --- .../datasource/finngen/finemapping.py | 42 +++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/src/gentropy/datasource/finngen/finemapping.py b/src/gentropy/datasource/finngen/finemapping.py index 5b8d21864..36ab97e80 100644 --- a/src/gentropy/datasource/finngen/finemapping.py +++ b/src/gentropy/datasource/finngen/finemapping.py @@ -211,7 +211,7 @@ def from_finngen_susie_finemapping( ) -> StudyLocus: """Process the SuSIE finemapping output for FinnGen studies. - The finngen_susue_finemapping_snp_files are files that contain variant summaries with credible set information with following shema: + The finngen_susie_finemapping_snp_files are files that contain variant summaries with credible set information with following shema: - trait: phenotype - region: region for which the fine-mapping was run. - v, rsid: variant ids @@ -312,8 +312,7 @@ def from_finngen_susie_finemapping( f.col("allele2").cast(t.StringType()).alias("alt"), # Parse p-value into mantissa and exponent. *parse_pvalue(f.col("p")), - # Add beta, standard error, and allele frequency information. - f.col("beta").cast("double"), + # Add standard error, and allele frequency information. f.col("se").cast("double").alias("standardError"), f.col("maf").cast("float").alias("effectAlleleFrequencyFromSource"), f.lit("SuSie").cast("string").alias("finemappingMethod"), @@ -325,6 +324,10 @@ def from_finngen_susie_finemapping( f.col(f"lbf_variable{i}").cast(t.DoubleType()).alias(f"lbf_{i}") for i in range(1, 11) ], + *[ + f.col(f"mean{i}").cast(t.DoubleType()).alias(f"beta_{i}") + for i in range(1, 11) + ], ) .withColumn( "posteriorProbability", @@ -376,6 +379,31 @@ def from_finngen_susie_finemapping( "lbf_9", "lbf_10", ) + .withColumn( + "beta", + f.when(f.col("credibleSetIndex") == 1, f.col("beta_1")) + .when(f.col("credibleSetIndex") == 2, f.col("beta_2")) + .when(f.col("credibleSetIndex") == 3, f.col("beta_3")) + .when(f.col("credibleSetIndex") == 4, f.col("beta_4")) + .when(f.col("credibleSetIndex") == 5, f.col("beta_5")) + .when(f.col("credibleSetIndex") == 6, f.col("beta_6")) + .when(f.col("credibleSetIndex") == 7, f.col("beta_7")) + .when(f.col("credibleSetIndex") == 8, f.col("beta_8")) + .when(f.col("credibleSetIndex") == 9, f.col("beta_9")) + .when(f.col("credibleSetIndex") == 10, f.col("beta_10")), + ) + .drop( + "beta_1", + "beta_2", + "beta_3", + "beta_4", + "beta_5", + "beta_6", + "beta_7", + "beta_8", + "beta_9", + "beta_10", + ) ) bgzip_compressed_cs_summaries = cls._infer_block_gzip_compression( @@ -475,6 +503,14 @@ def from_finngen_susie_finemapping( on=["studyId", "region", "credibleSetIndex"], how="inner", ) + .withColumns( + { + "locusStart": f.split(f.split("region", ":")[1], "-")[0].cast( + "int" + ), + "locusEnd": f.split(f.split("region", ":")[1], "-")[1].cast("int"), + } + ) ).withColumn( "studyLocusId", StudyLocus.assign_study_locus_id( From d4b91d61fdc0a7a65133a4e02678c46b7e728c8f Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Thu, 24 Oct 2024 11:13:29 +0100 Subject: [PATCH 131/188] feat: step to export disease/target evidence (#867) * feature(l2g): step to export disease/target evidence * fix: sorting out typo in function docstring * fix: evidence is written as json * fix: addressing reviewer comments * docs: adding step documentation * chore: pre-commit auto fixes [...] * fix: removing default value from step definition --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/python_api/steps/l2g.md | 2 ++ src/gentropy/config.py | 17 ++++++++++ src/gentropy/dataset/l2g_prediction.py | 43 ++++++++++++++++++++++++++ src/gentropy/l2g.py | 42 +++++++++++++++++++++++++ 4 files changed, 104 insertions(+) diff --git a/docs/python_api/steps/l2g.md b/docs/python_api/steps/l2g.md index 556e5a275..5594f1605 100644 --- a/docs/python_api/steps/l2g.md +++ b/docs/python_api/steps/l2g.md @@ -5,3 +5,5 @@ title: Locus to Gene (L2G) ::: gentropy.l2g.LocusToGeneFeatureMatrixStep ::: gentropy.l2g.LocusToGeneStep + +::: gentropy.l2g.LocusToGeneEvidenceStep diff --git a/src/gentropy/config.py b/src/gentropy/config.py index f6b699f0a..486778c3a 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -615,6 +615,18 @@ class StudyValidationStepConfig(StepConfig): _target_: str = "gentropy.study_validation.StudyValidationStep" +@dataclass +class LocusToGeneEvidenceStepConfig(StepConfig): + """Configuration of the locus to gene evidence step.""" + + locus_to_gene_predictions_path: str = MISSING + credible_set_path: str = MISSING + study_index_path: str = MISSING + evidence_output_path: str = MISSING + locus_to_gene_threshold: float = 0.05 + _target_: str = "gentropy.l2g.LocusToGeneEvidenceStep" + + @dataclass class StudyLocusValidationStepConfig(StepConfig): """Configuration of the study index validation step. @@ -710,4 +722,9 @@ def register_config() -> None: name="study_validation", node=StudyValidationStepConfig, ) + cs.store( + group="step", + name="locus_to_gene_evidence", + node=LocusToGeneEvidenceStepConfig, + ) cs.store(group="step", name="finngen_ukb_meta_ingestion", node=FinngenUkbMetaConfig) diff --git a/src/gentropy/dataset/l2g_prediction.py b/src/gentropy/dataset/l2g_prediction.py index c29b359af..169f5a846 100644 --- a/src/gentropy/dataset/l2g_prediction.py +++ b/src/gentropy/dataset/l2g_prediction.py @@ -6,11 +6,13 @@ from typing import TYPE_CHECKING, Type import pyspark.sql.functions as f +from pyspark.sql import DataFrame from gentropy.common.schemas import parse_spark_schema from gentropy.common.session import Session from gentropy.dataset.dataset import Dataset from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix +from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus from gentropy.method.l2g.model import LocusToGeneModel @@ -83,3 +85,44 @@ def from_credible_set( ) return l2g_model.predict(fm, session) + + def to_disease_target_evidence( + self: L2GPrediction, + study_locus: StudyLocus, + study_index: StudyIndex, + l2g_threshold: float = 0.05, + ) -> DataFrame: + """Convert locus to gene predictions to disease target evidence. + + Args: + study_locus (StudyLocus): Study locus dataset + study_index (StudyIndex): Study index dataset + l2g_threshold (float): Threshold to consider a gene as a target. Defaults to 0.05. + + Returns: + DataFrame: Disease target evidence + """ + datasource_id = "gwas_credible_sets" + datatype_id = "genetic_association" + + return ( + self.df.filter(f.col("score") >= l2g_threshold) + .join( + study_locus.df.select("studyLocusId", "studyId"), + on="studyLocusId", + how="inner", + ) + .join( + study_index.df.select("studyId", "diseaseIds"), + on="studyId", + how="inner", + ) + .select( + f.lit(datatype_id).alias("datatypeId"), + f.lit(datasource_id).alias("datasourceId"), + f.col("geneId").alias("targetFromSourceId"), + f.explode(f.col("diseaseIds")).alias("diseaseFromSourceMappedId"), + f.col("score").alias("resourceScore"), + "studyLocusId", + ) + ) diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 1a5037bb2..ca52fbf04 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -278,3 +278,45 @@ def _annotate_gold_standards_w_feature_matrix(self) -> L2GFeatureMatrix: .persist() ) raise ValueError("Dependencies for train mode not set.") + + +class LocusToGeneEvidenceStep: + """Locus to gene evidence step.""" + + def __init__( + self, + session: Session, + locus_to_gene_predictions_path: str, + credible_set_path: str, + study_index_path: str, + evidence_output_path: str, + locus_to_gene_threshold: float, + ) -> None: + """Initialise the step and generate disease/target evidence. + + Args: + session (Session): Session object that contains the Spark session + locus_to_gene_predictions_path (str): Path to the L2G predictions dataset + credible_set_path (str): Path to the credible set dataset + study_index_path (str): Path to the study index dataset + evidence_output_path (str): Path to the L2G evidence output dataset + locus_to_gene_threshold (float, optional): Threshold to consider a gene as a target. Defaults to 0.05. + """ + # Reading the predictions + locus_to_gene_prediction = L2GPrediction.from_parquet( + session, locus_to_gene_predictions_path + ) + # Reading the credible set + credible_sets = StudyLocus.from_parquet(session, credible_set_path) + + # Reading the study index + study_index = StudyIndex.from_parquet(session, study_index_path) + + # Generate evidence and save file: + ( + locus_to_gene_prediction.to_disease_target_evidence( + credible_sets, study_index, locus_to_gene_threshold + ) + .write.mode(session.write_mode) + .json(evidence_output_path) + ) From c252dcb6ca12455bc61d9bbb88ccf8996556034a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Thu, 24 Oct 2024 13:17:53 +0100 Subject: [PATCH 132/188] feat(variant_index): hash variants at the time of instance creation (#874) * fix(variant_index): pass config threshold after joining with gnomad * feat(dataset): ability to pass class params to `from_parquet` * feat(variant_index): hash variants after initialising class * test: ensure hashing happens and params are correcly separated * fix: add threshold in `test_extract_variant_index_from_vep` * feat(variant_index): make hash threshold inclusive --- src/gentropy/dataset/dataset.py | 41 ++++++++++++++++--- src/gentropy/dataset/variant_index.py | 33 ++++++++++----- src/gentropy/datasource/ensembl/vep_parser.py | 5 ++- src/gentropy/variant_index.py | 14 ++----- tests/gentropy/dataset/test_dataset.py | 20 ++++++--- tests/gentropy/dataset/test_variant_index.py | 22 ++++++++-- .../datasource/ensembl/test_vep_variants.py | 2 +- 7 files changed, 98 insertions(+), 39 deletions(-) diff --git a/src/gentropy/dataset/dataset.py b/src/gentropy/dataset/dataset.py index d033e129d..779faefe2 100644 --- a/src/gentropy/dataset/dataset.py +++ b/src/gentropy/dataset/dataset.py @@ -66,6 +66,30 @@ def schema(self: Dataset) -> StructType: """ return self._schema + @classmethod + def _process_class_params( + cls, params: dict[str, Any] + ) -> tuple[dict[str, Any], dict[str, Any]]: + """Separate class initialization parameters from spark session parameters. + + Args: + params (dict[str, Any]): Combined parameters dictionary + + Returns: + tuple[dict[str, Any], dict[str, Any]]: (class_params, spark_params) + """ + # Get all field names from the class (including parent classes) + class_field_names = { + field.name + for cls_ in cls.__mro__ + if hasattr(cls_, "__dataclass_fields__") + for field in cls_.__dataclass_fields__.values() + } + # Separate parameters + class_params = {k: v for k, v in params.items() if k in class_field_names} + spark_params = {k: v for k, v in params.items() if k not in class_field_names} + return class_params, spark_params + @classmethod @abstractmethod def get_schema(cls: type[Self]) -> StructType: @@ -120,10 +144,14 @@ def from_parquet( ValueError: Parquet file is empty """ schema = cls.get_schema() - df = session.load_data(path, format="parquet", schema=schema, **kwargs) + + # Separate class params from spark params + class_params, spark_params = cls._process_class_params(kwargs) + + df = session.load_data(path, format="parquet", schema=schema, **spark_params) if df.isEmpty(): raise ValueError(f"Parquet file is empty: {path}") - return cls(_df=df, _schema=schema) + return cls(_df=df, _schema=schema, **class_params) def filter(self: Self, condition: Column) -> Self: """Creates a new instance of a Dataset with the DataFrame filtered by the condition. @@ -321,7 +349,10 @@ def generate_identifier(uniqueness_defining_columns: list[str]) -> Column: Returns: Column: column with a unique identifier """ - hashable_columns = [f.when(f.col(column).cast("string").isNull(), f.lit("None")) - .otherwise(f.col(column).cast("string")) - for column in uniqueness_defining_columns] + hashable_columns = [ + f.when(f.col(column).cast("string").isNull(), f.lit("None")).otherwise( + f.col(column).cast("string") + ) + for column in uniqueness_defining_columns + ] return f.md5(f.concat(*hashable_columns)) diff --git a/src/gentropy/dataset/variant_index.py b/src/gentropy/dataset/variant_index.py index a1a2e2a4d..9e8740aa6 100644 --- a/src/gentropy/dataset/variant_index.py +++ b/src/gentropy/dataset/variant_index.py @@ -2,7 +2,7 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import TYPE_CHECKING import pyspark.sql.functions as f @@ -25,6 +25,8 @@ class VariantIndex(Dataset): """Dataset for representing variants and methods applied on them.""" + id_threshold: int = field(default=300) + def __post_init__(self: VariantIndex) -> None: """Forcing the presence of empty arrays even if the schema allows missing values. @@ -45,7 +47,16 @@ def __post_init__(self: VariantIndex) -> None: } # Not returning, but changing the data: - self.df = self.df.withColumns(array_columns) + self.df = self.df.withColumns(array_columns).withColumn( + # Hashing long variant identifiers: + "variantId", + self.hash_long_variant_ids( + f.col("variantId"), + f.col("chromosome"), + f.col("position"), + self.id_threshold, + ), + ) @classmethod def get_schema(cls: type[VariantIndex]) -> StructType: @@ -58,7 +69,7 @@ def get_schema(cls: type[VariantIndex]) -> StructType: @staticmethod def hash_long_variant_ids( - variant_id: Column, chromosome: Column, position: Column, threshold: int = 100 + variant_id: Column, chromosome: Column, position: Column, threshold: int ) -> Column: """Hash long variant identifiers. @@ -98,7 +109,7 @@ def hash_long_variant_ids( ) # If chromosome and position are given, but alleles are too long, create hash: .when( - f.length(variant_id) > threshold, + f.length(variant_id) >= threshold, f.concat_ws( "_", f.lit("OTVAR"), @@ -132,20 +143,20 @@ def add_annotation( select_expressions = [] # Collect columns by iterating over the variant index schema: - for field in VariantIndex.get_schema(): - column = field.name + for schema_field in VariantIndex.get_schema(): + column = schema_field.name # If an annotation column can be found in both datasets: if (column in self.df.columns) and (column in annotation_source.df.columns): # Arrays are merged: - if isinstance(field.dataType, t.ArrayType): + if isinstance(schema_field.dataType, t.ArrayType): fields_order = None - if isinstance(field.dataType.elementType, t.StructType): + if isinstance(schema_field.dataType.elementType, t.StructType): # Extract the schema of the array to get the order of the fields: array_schema = [ - field - for field in VariantIndex.get_schema().fields - if field.name == column + schema_field + for schema_field in VariantIndex.get_schema().fields + if schema_field.name == column ][0].dataType fields_order = get_nested_struct_schema( array_schema diff --git a/src/gentropy/datasource/ensembl/vep_parser.py b/src/gentropy/datasource/ensembl/vep_parser.py index 6931b96de..01c820513 100644 --- a/src/gentropy/datasource/ensembl/vep_parser.py +++ b/src/gentropy/datasource/ensembl/vep_parser.py @@ -59,7 +59,7 @@ def extract_variant_index_from_vep( cls: type[VariantEffectPredictorParser], spark: SparkSession, vep_output_path: str | list[str], - hash_threshold: int = 100, + hash_threshold: int, **kwargs: bool | float | int | str | None, ) -> VariantIndex: """Extract variant index from VEP output. @@ -67,7 +67,7 @@ def extract_variant_index_from_vep( Args: spark (SparkSession): Spark session. vep_output_path (str | list[str]): Path to the VEP output. - hash_threshold (int): Threshold above which variant identifiers will be hashed. Default is 100, + hash_threshold (int): Threshold above which variant identifiers will be hashed. **kwargs (bool | float | int | str | None): Additional arguments to pass to spark.read.json. Returns: @@ -93,6 +93,7 @@ def extract_variant_index_from_vep( vep_data, hash_threshold ), _schema=VariantIndex.get_schema(), + id_threshold=hash_threshold, ) @staticmethod diff --git a/src/gentropy/variant_index.py b/src/gentropy/variant_index.py index dba087c79..b50b470b2 100644 --- a/src/gentropy/variant_index.py +++ b/src/gentropy/variant_index.py @@ -2,10 +2,7 @@ from __future__ import annotations -from pyspark.sql.functions import col - from gentropy.common.session import Session -from gentropy.config import VariantIndexConfig from gentropy.dataset.variant_index import VariantIndex from gentropy.datasource.ensembl.vep_parser import VariantEffectPredictorParser from gentropy.datasource.open_targets.variants import OpenTargetsVariant @@ -23,7 +20,7 @@ def __init__( session: Session, vep_output_json_path: str, variant_index_path: str, - hash_threshold: int = VariantIndexConfig().hash_threshold, + hash_threshold: int, gnomad_variant_annotations_path: str | None = None, ) -> None: """Run VariantIndex step. @@ -47,19 +44,14 @@ def __init__( session=session, path=gnomad_variant_annotations_path, recursiveFileLookup=True, + id_threshold=hash_threshold, ) # Update file with extra annotations: variant_index = variant_index.add_annotation(annotations) ( - variant_index.df.withColumn( - "variantId", - VariantIndex.hash_long_variant_ids( - col("variantId"), col("chromosome"), col("position") - ), - ) - .repartitionByRange("chromosome", "position") + variant_index.df.repartitionByRange("chromosome", "position") .sortWithinPartitions("chromosome", "position") .write.mode(session.write_mode) .parquet(variant_index_path) diff --git a/tests/gentropy/dataset/test_dataset.py b/tests/gentropy/dataset/test_dataset.py index a152b1ac8..7c61f3f52 100644 --- a/tests/gentropy/dataset/test_dataset.py +++ b/tests/gentropy/dataset/test_dataset.py @@ -6,12 +6,7 @@ import pyspark.sql.functions as f import pytest from pyspark.sql import SparkSession -from pyspark.sql.types import ( - DoubleType, - IntegerType, - StructField, - StructType, -) +from pyspark.sql.types import DoubleType, IntegerType, StructField, StructType from gentropy.dataset.dataset import Dataset from gentropy.dataset.study_index import StudyIndex @@ -79,3 +74,16 @@ def test_dataset_drop_infinity_values() -> None: assert ds.drop_infinity_values().df.count() == 7 # otherwise drop all columns assert ds.drop_infinity_values("field").df.count() == 1 + + +def test__process_class_params(spark: SparkSession) -> None: + """Test splitting of parameters between class and spark parameters.""" + params = { + "_df": spark.createDataFrame([(1,)], schema=MockDataset.get_schema()), + "recursiveFileLookup": True, + } + class_params, spark_params = Dataset._process_class_params(params) + assert "_df" in class_params, "Class params should contain _df" + assert ( + "recursiveFileLookup" in spark_params + ), "Spark params should contain recursiveFileLookup" diff --git a/tests/gentropy/dataset/test_variant_index.py b/tests/gentropy/dataset/test_variant_index.py index 15b102415..43c409ea6 100644 --- a/tests/gentropy/dataset/test_variant_index.py +++ b/tests/gentropy/dataset/test_variant_index.py @@ -24,14 +24,22 @@ class TestVariantIndex: MOCK_ANNOTATION_DATA = [ ("v1", "c1", 2, "T", "A", ["rs5"], "really bad consequence"), - ("v4", "c1", 5, "T", "A", ["rs6"], "mild consequence"), + ( + "v4_long", + "c1", + 5, + "T", + "A", + ["rs6"], + "mild consequence", + ), # should be hashed automatically ] MOCK_DATA = [ ("v1", "c1", 2, "T", "A", ["rs1"]), ("v2", "c1", 3, "T", "A", ["rs2", "rs3"]), ("v3", "c1", 4, "T", "A", None), - ("v4", "c1", 5, "T", "A", None), + ("v4_long", "c1", 5, "T", "A", None), # should be hashed automatically ] MOCK_SCHEMA = t.StructType( @@ -69,7 +77,7 @@ def _setup(self: TestVariantIndex, spark: SparkSession) -> None: self.df = spark.createDataFrame(self.MOCK_DATA, schema=self.MOCK_SCHEMA) # Loading variant index: self.variant_index = VariantIndex( - _df=self.df, _schema=VariantIndex.get_schema() + _df=self.df, _schema=VariantIndex.get_schema(), id_threshold=2 ) # Loading annotation variant index: @@ -78,6 +86,7 @@ def _setup(self: TestVariantIndex, spark: SparkSession) -> None: self.MOCK_ANNOTATION_DATA, schema=self.MOCK_ANNOTATION_SCHEMA ), _schema=VariantIndex.get_schema(), + id_threshold=2, ) def test_init_type(self: TestVariantIndex) -> None: @@ -132,6 +141,13 @@ def test_rsid_column_updated(self: TestVariantIndex) -> None: == 2 ) + def test_variantid_column_hashed(self: TestVariantIndex) -> None: + """Make sure the variantId column is hashed during initialisation. Threshold is set to 2, so var_4_long should be hashed.""" + assert ( + self.variant_index.df.filter(f.col("variantId").startswith("OTVAR")).count() + != 0 + ) + @pytest.mark.parametrize( "distance_type", ["distanceFromTss", "distanceFromFootprint"] ) diff --git a/tests/gentropy/datasource/ensembl/test_vep_variants.py b/tests/gentropy/datasource/ensembl/test_vep_variants.py index 5757fa2f5..556a22411 100644 --- a/tests/gentropy/datasource/ensembl/test_vep_variants.py +++ b/tests/gentropy/datasource/ensembl/test_vep_variants.py @@ -113,7 +113,7 @@ def test_extract_variant_index_from_vep( ) -> None: """Test if the variant index can be extracted from the VEP output.""" variant_index = VariantEffectPredictorParser.extract_variant_index_from_vep( - spark, self.SAMPLE_VEP_DATA_PATH + spark, self.SAMPLE_VEP_DATA_PATH, hash_threshold=100 ) assert isinstance( From b694d80d382d438801f16f24e3ad67557a3c8a3b Mon Sep 17 00:00:00 2001 From: Yakov Date: Thu, 24 Oct 2024 17:06:16 +0100 Subject: [PATCH 133/188] fix: fix in calculate_credible_set_log10bf (#868) --- src/gentropy/dataset/study_locus.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index a68a00a6d..a0a231cfa 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -553,11 +553,14 @@ def calculate_credible_set_log10bf(cls: type[StudyLocus], logbfs: Column) -> Col +------------------+ |credibleSetlog10BF| +------------------+ - | 1.4765565| + | 0.6412604| +------------------+ """ - logsumexp_udf = f.udf(lambda x: get_logsum(x), FloatType()) + # log10=log/log(10)=log*0.43429448190325176 + logsumexp_udf = f.udf( + lambda x: (get_logsum(x) * 0.43429448190325176), FloatType() + ) return logsumexp_udf(logbfs).cast("double").alias("credibleSetlog10BF") @classmethod From 4c1013ea8f6349bc0c5e56985e924dbb6227eae5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Fri, 25 Oct 2024 12:13:26 +0100 Subject: [PATCH 134/188] feat(l2g_feature_matrix): add `credibleSetConfidence` to L2G (#875) * feat(feature_matrix): add `credibleSetConfidence` * feat: add feature to l2g * test: add semantic and minor fixes * test: fix * test: fix --- .../python_api/datasets/l2g_features/other.md | 1 + src/gentropy/config.py | 2 + src/gentropy/dataset/l2g_features/other.py | 102 +++++++++++++- src/gentropy/method/l2g/feature_factory.py | 2 + tests/gentropy/dataset/test_l2g_feature.py | 128 ++++++++++++++---- 5 files changed, 209 insertions(+), 26 deletions(-) diff --git a/docs/python_api/datasets/l2g_features/other.md b/docs/python_api/datasets/l2g_features/other.md index e294e1813..6120f8b7a 100644 --- a/docs/python_api/datasets/l2g_features/other.md +++ b/docs/python_api/datasets/l2g_features/other.md @@ -6,6 +6,7 @@ title: Other features ::: gentropy.dataset.l2g_features.other.GeneCountFeature ::: gentropy.dataset.l2g_features.other.ProteinGeneCountFeature +::: gentropy.dataset.l2g_features.other.CredibleSetConfidenceFeature ## Common logic diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 486778c3a..b3fe73aa8 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -261,6 +261,7 @@ class LocusToGeneConfig(StepConfig): # other "geneCount500kb", "proteinGeneCount500kb", + "credibleSetConfidence", ] ) hyperparameters: dict[str, Any] = field( @@ -331,6 +332,7 @@ class LocusToGeneFeatureMatrixConfig(StepConfig): # other "geneCount500kb", "proteinGeneCount500kb", + "credibleSetConfidence", ] ) _target_: str = "gentropy.l2g.LocusToGeneFeatureMatrixStep" diff --git a/src/gentropy/dataset/l2g_features/other.py b/src/gentropy/dataset/l2g_features/other.py index a033192a8..39348b00b 100644 --- a/src/gentropy/dataset/l2g_features/other.py +++ b/src/gentropy/dataset/l2g_features/other.py @@ -10,10 +10,11 @@ from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.l2g_features.l2g_feature import L2GFeature from gentropy.dataset.l2g_gold_standard import L2GGoldStandard -from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.study_locus import CredibleSetConfidenceClasses, StudyLocus +from gentropy.dataset.variant_index import VariantIndex if TYPE_CHECKING: - from pyspark.sql import DataFrame + from pyspark.sql import Column, DataFrame def common_genecount_feature_logic( @@ -160,3 +161,100 @@ def compute( ), _schema=cls.get_schema(), ) + + +class CredibleSetConfidenceFeature(L2GFeature): + """Distance of the sentinel variant to gene TSS. This is not weighted by the causal probability.""" + + feature_dependency_type = [StudyLocus, VariantIndex] + feature_name = "credibleSetConfidence" + + @classmethod + def compute( + cls: type[CredibleSetConfidenceFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> CredibleSetConfidenceFeature: + """Computes the feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dataset that contains the distance information + + Returns: + CredibleSetConfidenceFeature: Feature dataset + """ + full_credible_set = feature_dependency["study_locus"].df.select( + "studyLocusId", + "studyId", + f.explode("locus.variantId").alias("variantId"), + cls.score_credible_set_confidence(f.col("confidence")).alias( + cls.feature_name + ), + ) + + return cls( + _df=convert_from_wide_to_long( + ( + study_loci_to_annotate.df.drop("studyLocusId") + # Annotate genes + .join( + feature_dependency["variant_index"].df.select( + "variantId", + f.explode("transcriptConsequences.targetId").alias( + "geneId" + ), + ), + on="variantId", + how="left", + ) + # Annotate credible set confidence + .join(full_credible_set, ["variantId", "studyId"], "left") + .select("studyLocusId", "geneId", cls.feature_name) + ), + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + @classmethod + def score_credible_set_confidence( + cls: type[CredibleSetConfidenceFeature], + confidence_column: Column, + ) -> Column: + """Expression that assigns a score to the credible set confidence. + + Args: + confidence_column (Column): Confidence column in the StudyLocus object + + Returns: + Column: A confidence score between 0 and 1 + """ + return ( + f.when( + f.col("confidence") + == CredibleSetConfidenceClasses.FINEMAPPED_IN_SAMPLE_LD.value, + f.lit(1.0), + ) + .when( + f.col("confidence") + == CredibleSetConfidenceClasses.FINEMAPPED_OUT_OF_SAMPLE_LD.value, + f.lit(0.75), + ) + .when( + f.col("confidence") + == CredibleSetConfidenceClasses.PICSED_SUMMARY_STATS.value, + f.lit(0.5), + ) + .when( + f.col("confidence") + == CredibleSetConfidenceClasses.PICSED_TOP_HIT.value, + f.lit(0.25), + ) + .when( + f.col("confidence") == CredibleSetConfidenceClasses.UNKNOWN.value, + f.lit(0.0), + ) + ) diff --git a/src/gentropy/method/l2g/feature_factory.py b/src/gentropy/method/l2g/feature_factory.py index dbb5ecf48..ac3b5976e 100644 --- a/src/gentropy/method/l2g/feature_factory.py +++ b/src/gentropy/method/l2g/feature_factory.py @@ -30,6 +30,7 @@ ) from gentropy.dataset.l2g_features.l2g_feature import L2GFeature from gentropy.dataset.l2g_features.other import ( + CredibleSetConfidenceFeature, GeneCountFeature, ProteinGeneCountFeature, ) @@ -125,6 +126,7 @@ class FeatureFactory: "vepMaximumNeighbourhood": VepMaximumNeighbourhoodFeature, "geneCount500kb": GeneCountFeature, "proteinGeneCount500kb": ProteinGeneCountFeature, + "credibleSetConfidence": CredibleSetConfidenceFeature, } def __init__( diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py index a841af0a6..5fc3e8eaa 100644 --- a/tests/gentropy/dataset/test_l2g_feature.py +++ b/tests/gentropy/dataset/test_l2g_feature.py @@ -63,6 +63,7 @@ common_genecount_feature_logic, GeneCountFeature, ProteinGeneCountFeature, + CredibleSetConfidenceFeature, ) from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus @@ -102,6 +103,7 @@ VepMeanNeighbourhoodFeature, GeneCountFeature, ProteinGeneCountFeature, + CredibleSetConfidenceFeature, ], ) def test_feature_factory_return_type( @@ -217,6 +219,33 @@ def sample_variant_index(spark: SparkSession) -> VariantIndex: ) +@pytest.fixture(scope="module") +def sample_variant_index_schema() -> StructType: + """Partial schema of the variant index.""" + return StructType( + [ + StructField("variantId", StringType(), True), + StructField("chromosome", StringType(), True), + StructField("position", IntegerType(), True), + StructField("referenceAllele", StringType(), True), + StructField("alternateAllele", StringType(), True), + StructField( + "transcriptConsequences", + ArrayType( + StructType( + [ + StructField("distanceFromTss", LongType(), True), + StructField("targetId", StringType(), True), + StructField("isEnsemblCanonical", BooleanType(), True), + ] + ) + ), + True, + ), + ] + ) + + class TestCommonColocalisationFeatureLogic: """Test the common logic of the colocalisation features.""" @@ -544,7 +573,11 @@ def test_common_neighbourhood_distance_feature_logic( ), "Output doesn't meet the expectation." @pytest.fixture(autouse=True) - def _setup(self: TestCommonDistanceFeatureLogic, spark: SparkSession) -> None: + def _setup( + self: TestCommonDistanceFeatureLogic, + spark: SparkSession, + sample_variant_index_schema: StructType, + ) -> None: """Set up testing fixtures.""" self.distance_type = "distanceFromTss" self.sample_study_locus = StudyLocus( @@ -571,28 +604,6 @@ def _setup(self: TestCommonDistanceFeatureLogic, spark: SparkSession) -> None: ), _schema=StudyLocus.get_schema(), ) - self.variant_index_schema = StructType( - [ - StructField("variantId", StringType(), True), - StructField("chromosome", StringType(), True), - StructField("position", IntegerType(), True), - StructField("referenceAllele", StringType(), True), - StructField("alternateAllele", StringType(), True), - StructField( - "transcriptConsequences", - ArrayType( - StructType( - [ - StructField("distanceFromTss", LongType(), True), - StructField("targetId", StringType(), True), - StructField("isEnsemblCanonical", BooleanType(), True), - ] - ) - ), - True, - ), - ] - ) self.sample_variant_index = VariantIndex( _df=spark.createDataFrame( [ @@ -630,7 +641,7 @@ def _setup(self: TestCommonDistanceFeatureLogic, spark: SparkSession) -> None: ], ), ], - self.variant_index_schema, + sample_variant_index_schema, ), _schema=VariantIndex.get_schema(), ) @@ -915,3 +926,72 @@ def _setup(self: TestCommonGeneCountFeatureLogic, spark: SparkSession) -> None: ), _schema=GeneIndex.get_schema(), ) + + +class TestCredibleSetConfidenceFeatureLogic: + """Test the CredibleSetConfidenceFeature method.""" + + def test_compute( + self: TestCredibleSetConfidenceFeatureLogic, + spark: SparkSession, + ) -> None: + """Test the logic of the function that scores a credible set's confidence.""" + sample_study_loci_to_annotate = self.sample_study_locus + observed_df = CredibleSetConfidenceFeature.compute( + study_loci_to_annotate=sample_study_loci_to_annotate, + feature_dependency={ + "study_locus": self.sample_study_locus, + "variant_index": self.sample_variant_index, + }, + ) + assert observed_df.df.first()["featureValue"] == 0.25 + + @pytest.fixture(autouse=True) + def _setup( + self: TestCredibleSetConfidenceFeatureLogic, + spark: SparkSession, + sample_variant_index_schema: StructType, + ) -> None: + """Set up testing fixtures.""" + self.sample_study_locus = StudyLocus( + _df=spark.createDataFrame( + [ + { + "studyLocusId": "1", + "variantId": "lead1", + "studyId": "study1", + "confidence": "PICS fine-mapped credible set based on reported top hit", + "chromosome": "1", + "locus": [ + { + "variantId": "lead1", + }, + ], + }, + ], + StudyLocus.get_schema(), + ), + _schema=StudyLocus.get_schema(), + ) + self.sample_variant_index = VariantIndex( + _df=spark.createDataFrame( + [ + ( + "lead1", + "chrom", + 1, + "A", + "T", + [ + { + "distanceFromTss": 10, + "targetId": "gene1", + "isEnsemblCanonical": True, + }, + ], + ) + ], + sample_variant_index_schema, + ), + _schema=VariantIndex.get_schema(), + ) From 3e61996a36fa9cd3d255692946ede3c30aa9915c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Fri, 25 Oct 2024 12:40:42 +0100 Subject: [PATCH 135/188] feat(l2g): normalise distance features (#878) * feat(l2g): normalise distance features * feat(l2g): normalise distance features * chore: fix tests --- src/gentropy/dataset/l2g_features/distance.py | 11 +++++++++-- tests/gentropy/dataset/test_l2g_feature.py | 8 ++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/gentropy/dataset/l2g_features/distance.py b/src/gentropy/dataset/l2g_features/distance.py index 8d42d30ed..2149dc339 100644 --- a/src/gentropy/dataset/l2g_features/distance.py +++ b/src/gentropy/dataset/l2g_features/distance.py @@ -64,7 +64,10 @@ def common_distance_feature_logic( on="variantId", how="inner", ) - .withColumn("distance_score", f.log10(distance_score_expr)) + .withColumn( + "distance_score", + f.log10(distance_score_expr) / f.log10(f.lit(genomic_window + 1)), + ) .groupBy("studyLocusId", "geneId") .agg(agg_expr.alias(feature_name)) ) @@ -105,7 +108,11 @@ def common_neighbourhood_distance_feature_logic( "regional_metric", f.mean(f.col(local_feature_name)).over(Window.partitionBy("studyLocusId")), ) - .withColumn(feature_name, f.col(local_feature_name) - f.col("regional_metric")) + .withColumn( + feature_name, + (f.col(local_feature_name) - f.col("regional_metric")) + / f.log10(f.lit(genomic_window + 1)), + ) .drop("regional_metric", local_feature_name) ) diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py index 5fc3e8eaa..1757b7f68 100644 --- a/tests/gentropy/dataset/test_l2g_feature.py +++ b/tests/gentropy/dataset/test_l2g_feature.py @@ -502,15 +502,15 @@ class TestCommonDistanceFeatureLogic: { "studyLocusId": "1", "geneId": "gene2", - "distanceSentinelTss": 0.95, + "distanceSentinelTss": 0.92, }, ], ), ( "distanceTssMean", [ - {"studyLocusId": "1", "geneId": "gene1", "distanceTssMean": 0.09}, - {"studyLocusId": "1", "geneId": "gene2", "distanceTssMean": 0.65}, + {"studyLocusId": "1", "geneId": "gene1", "distanceTssMean": 0.08}, + {"studyLocusId": "1", "geneId": "gene2", "distanceTssMean": 0.63}, ], ), ], @@ -565,7 +565,7 @@ def test_common_neighbourhood_distance_feature_logic( .orderBy(f.col(feature_name).asc()) ) expected_df = spark.createDataFrame( - (["1", "gene1", -0.48], ["1", "gene2", 0.48]), + (["1", "gene1", -0.44], ["1", "gene2", 0.44]), ["studyLocusId", "geneId", feature_name], ).orderBy(feature_name) assert ( From ee96c11c450557399d91897add73f7ceeb323f46 Mon Sep 17 00:00:00 2001 From: xyg123 <33658607+xyg123@users.noreply.github.com> Date: Fri, 25 Oct 2024 13:04:26 +0100 Subject: [PATCH 136/188] feat: l2g feature to indicate if gene is protein-coding or not (#873) * feat: l2g feature to indicate if gene is protein-coding or not * chore: pre-commit auto fixes [...] * fix: remove print from tests * fix: remove print from tests * fix: correct window size for gene annotation * fix: test error with MkDocs * fix: rename feature to remove range --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../python_api/datasets/l2g_features/other.md | 2 + src/gentropy/config.py | 2 + src/gentropy/dataset/l2g_features/other.py | 85 +++++++++++++++++ src/gentropy/method/l2g/feature_factory.py | 3 + tests/gentropy/dataset/test_l2g_feature.py | 95 +++++++++++++++++++ 5 files changed, 187 insertions(+) diff --git a/docs/python_api/datasets/l2g_features/other.md b/docs/python_api/datasets/l2g_features/other.md index 6120f8b7a..a3d89c13b 100644 --- a/docs/python_api/datasets/l2g_features/other.md +++ b/docs/python_api/datasets/l2g_features/other.md @@ -6,8 +6,10 @@ title: Other features ::: gentropy.dataset.l2g_features.other.GeneCountFeature ::: gentropy.dataset.l2g_features.other.ProteinGeneCountFeature +::: gentropy.dataset.l2g_features.other.ProteinCodingFeature ::: gentropy.dataset.l2g_features.other.CredibleSetConfidenceFeature ## Common logic ::: gentropy.dataset.l2g_features.other.common_genecount_feature_logic +::: gentropy.dataset.l2g_features.other.is_protein_coding_feature_logic diff --git a/src/gentropy/config.py b/src/gentropy/config.py index b3fe73aa8..5533043d2 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -262,6 +262,7 @@ class LocusToGeneConfig(StepConfig): "geneCount500kb", "proteinGeneCount500kb", "credibleSetConfidence", + "isProteinCoding", ] ) hyperparameters: dict[str, Any] = field( @@ -333,6 +334,7 @@ class LocusToGeneFeatureMatrixConfig(StepConfig): "geneCount500kb", "proteinGeneCount500kb", "credibleSetConfidence", + "isProteinCoding", ] ) _target_: str = "gentropy.l2g.LocusToGeneFeatureMatrixStep" diff --git a/src/gentropy/dataset/l2g_features/other.py b/src/gentropy/dataset/l2g_features/other.py index 39348b00b..4c28c2a0c 100644 --- a/src/gentropy/dataset/l2g_features/other.py +++ b/src/gentropy/dataset/l2g_features/other.py @@ -82,6 +82,51 @@ def common_genecount_feature_logic( ) +def is_protein_coding_feature_logic( + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + *, + gene_index: GeneIndex, + feature_name: str, + genomic_window: int, +) -> DataFrame: + """Computes the feature to indicate if a gene is protein-coding or not. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci + that will be used for annotation + gene_index (GeneIndex): Dataset containing information related to all genes in release. + feature_name (str): The name of the feature + genomic_window (int): The maximum window size to consider + + Returns: + DataFrame: Feature dataset, with 1 if the gene is protein-coding, 0 if not. + """ + study_loci_window = ( + study_loci_to_annotate.df.withColumn( + "window_start", f.col("position") - (genomic_window / 2) + ) + .withColumn("window_end", f.col("position") + (genomic_window / 2)) + .withColumnRenamed("chromosome", "SL_chromosome") + ) + return ( + study_loci_window.join( + gene_index.df.alias("genes"), + on=( + (f.col("SL_chromosome") == f.col("genes.chromosome")) + & (f.col("genes.tss") >= f.col("window_start")) + & (f.col("genes.tss") <= f.col("window_end")) + ), + how="inner", + ) + .withColumn( + feature_name, + f.when(f.col("biotype") == "protein_coding", f.lit(1)).otherwise(f.lit(0)), + ) + .select("studyLocusId", "geneId", feature_name) + .distinct() + ) + + class GeneCountFeature(L2GFeature): """Counts the number of genes within a specified window size from the study locus.""" @@ -163,6 +208,46 @@ def compute( ) +class ProteinCodingFeature(L2GFeature): + """Indicates whether a gene is protein-coding within a specified window size from the study locus.""" + + feature_dependency_type = GeneIndex + feature_name = "isProteinCoding" + + @classmethod + def compute( + cls: type[ProteinCodingFeature], + study_loci_to_annotate: StudyLocus | L2GGoldStandard, + feature_dependency: dict[str, Any], + ) -> ProteinCodingFeature: + """Computes the protein coding feature. + + Args: + study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation + feature_dependency (dict[str, Any]): Dictionary containing dependencies, including gene index + + Returns: + ProteinCodingFeature: Feature dataset with 1 if the gene is protein-coding, 0 otherwise + """ + genomic_window = 1000000 + protein_coding_df = is_protein_coding_feature_logic( + study_loci_to_annotate=study_loci_to_annotate, + feature_name=cls.feature_name, + genomic_window=genomic_window, + **feature_dependency, + ) + + return cls( + _df=convert_from_wide_to_long( + protein_coding_df, + id_vars=("studyLocusId", "geneId"), + var_name="featureName", + value_name="featureValue", + ), + _schema=cls.get_schema(), + ) + + class CredibleSetConfidenceFeature(L2GFeature): """Distance of the sentinel variant to gene TSS. This is not weighted by the causal probability.""" diff --git a/src/gentropy/method/l2g/feature_factory.py b/src/gentropy/method/l2g/feature_factory.py index ac3b5976e..fe792058a 100644 --- a/src/gentropy/method/l2g/feature_factory.py +++ b/src/gentropy/method/l2g/feature_factory.py @@ -1,3 +1,4 @@ +# isort: skip_file """Factory that computes features based on an input list.""" from __future__ import annotations @@ -33,6 +34,7 @@ CredibleSetConfidenceFeature, GeneCountFeature, ProteinGeneCountFeature, + ProteinCodingFeature, ) from gentropy.dataset.l2g_features.vep import ( VepMaximumFeature, @@ -126,6 +128,7 @@ class FeatureFactory: "vepMaximumNeighbourhood": VepMaximumNeighbourhoodFeature, "geneCount500kb": GeneCountFeature, "proteinGeneCount500kb": ProteinGeneCountFeature, + "isProteinCoding": ProteinCodingFeature, "credibleSetConfidence": CredibleSetConfidenceFeature, } diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py index 1757b7f68..c6019cefc 100644 --- a/tests/gentropy/dataset/test_l2g_feature.py +++ b/tests/gentropy/dataset/test_l2g_feature.py @@ -1,5 +1,6 @@ # pylint: disable=too-few-public-methods # isort: skip_file + """Test locus-to-gene feature generation.""" from __future__ import annotations @@ -61,9 +62,11 @@ ) from gentropy.dataset.l2g_features.other import ( common_genecount_feature_logic, + is_protein_coding_feature_logic, GeneCountFeature, ProteinGeneCountFeature, CredibleSetConfidenceFeature, + ProteinCodingFeature, ) from gentropy.dataset.study_index import StudyIndex from gentropy.dataset.study_locus import StudyLocus @@ -104,6 +107,7 @@ GeneCountFeature, ProteinGeneCountFeature, CredibleSetConfidenceFeature, + ProteinCodingFeature, ], ) def test_feature_factory_return_type( @@ -878,6 +882,7 @@ def test_common_genecount_feature_logic( .select("studyLocusId", "geneId", feature_name) .orderBy("studyLocusId", "geneId") ) + assert ( observed_df.collect() == expected_df.collect() ), f"Expected and observed dataframes do not match for feature {feature_name}." @@ -928,6 +933,96 @@ def _setup(self: TestCommonGeneCountFeatureLogic, spark: SparkSession) -> None: ) +class TestCommonProteinCodingFeatureLogic: + """Test the CommonGeneCountFeatureLogic methods.""" + + @pytest.mark.parametrize( + ("expected_data"), + [ + ( + [ + {"studyLocusId": "1", "geneId": "gene1", "isProteinCoding500kb": 1}, + {"studyLocusId": "1", "geneId": "gene2", "isProteinCoding500kb": 1}, + {"studyLocusId": "1", "geneId": "gene3", "isProteinCoding500kb": 0}, + ] + ), + ], + ) + def test_is_protein_coding_feature_logic( + self: TestCommonProteinCodingFeatureLogic, + spark: SparkSession, + expected_data: list[dict[str, Any]], + ) -> None: + """Test the logic of the is_protein_coding_feature_logic function.""" + observed_df = ( + is_protein_coding_feature_logic( + study_loci_to_annotate=self.sample_study_locus, + gene_index=self.sample_gene_index, + feature_name="isProteinCoding500kb", + genomic_window=500000, + ) + .select("studyLocusId", "geneId", "isProteinCoding500kb") + .orderBy("studyLocusId", "geneId") + ) + + expected_df = ( + spark.createDataFrame(expected_data) + .select("studyLocusId", "geneId", "isProteinCoding500kb") + .orderBy("studyLocusId", "geneId") + ) + assert ( + observed_df.collect() == expected_df.collect() + ), "Expected and observed DataFrames do not match." + + @pytest.fixture(autouse=True) + def _setup(self: TestCommonProteinCodingFeatureLogic, spark: SparkSession) -> None: + """Set up sample data for the test.""" + # Sample study locus data + self.sample_study_locus = StudyLocus( + _df=spark.createDataFrame( + [ + { + "studyLocusId": "1", + "variantId": "var1", + "studyId": "study1", + "chromosome": "1", + "position": 1000000, + }, + ], + StudyLocus.get_schema(), + ), + _schema=StudyLocus.get_schema(), + ) + + # Sample gene index data with biotype + self.sample_gene_index = GeneIndex( + _df=spark.createDataFrame( + [ + { + "geneId": "gene1", + "chromosome": "1", + "tss": 950000, + "biotype": "protein_coding", + }, + { + "geneId": "gene2", + "chromosome": "1", + "tss": 1050000, + "biotype": "protein_coding", + }, + { + "geneId": "gene3", + "chromosome": "1", + "tss": 1010000, + "biotype": "non_coding", + }, + ], + GeneIndex.get_schema(), + ), + _schema=GeneIndex.get_schema(), + ) + + class TestCredibleSetConfidenceFeatureLogic: """Test the CredibleSetConfidenceFeature method.""" From 85be796c48a8c8afdd45cadde386d0cc76d3e692 Mon Sep 17 00:00:00 2001 From: Daniel-Considine <113430683+Daniel-Considine@users.noreply.github.com> Date: Fri, 25 Oct 2024 13:18:07 +0100 Subject: [PATCH 137/188] feat: making credset qc have an option to coalsce and deduplicate credible sets without ld pruning (#877) * feat: making credset qc have an option to coalsce and deduplicate credible sets without ld pruning * fix: adding recursiveFileLookup for reading credible sets * Update src/gentropy/credible_set_qc.py Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> * Update src/gentropy/method/susie_inf.py Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> * Update src/gentropy/method/susie_inf.py Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> * chore: pre-commit auto fixes [...] * fix: few typos from review * fix: if statement tweaks --------- Co-authored-by: Yakov Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- src/gentropy/credible_set_qc.py | 30 ++++++++++++++------- src/gentropy/method/susie_inf.py | 45 ++++++++++++++++++++++---------- 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/src/gentropy/credible_set_qc.py b/src/gentropy/credible_set_qc.py index 31d298886..8ea9e06dd 100644 --- a/src/gentropy/credible_set_qc.py +++ b/src/gentropy/credible_set_qc.py @@ -16,11 +16,12 @@ def __init__( self, session: Session, credible_sets_path: str, - study_index_path: str, - ld_index_path: str, output_path: str, p_value_threshold: float = 1e-5, purity_min_r2: float = 0.01, + clump: bool = False, + ld_index_path: str | None = None, + study_index_path: str | None = None, ld_min_r2: float = 0.8, ) -> None: """Run credible set quality control step. @@ -28,23 +29,32 @@ def __init__( Args: session (Session): Session object. credible_sets_path (str): Path to credible sets file. - study_index_path (str): Path to study index file. - ld_index_path (str): Path to LD index file. output_path (str): Path to write the output file. p_value_threshold (float): P-value threshold for credible set quality control. Default is 1e-5. purity_min_r2 (float): Minimum R2 for purity estimation. Default is 0.01. + clump (bool): Whether to clump the credible sets by LD. Default is False. + ld_index_path (str | None): Path to LD index file. + study_index_path (str | None): Path to study index file. ld_min_r2 (float): Minimum R2 for LD estimation. Default is 0.8. """ - cred_sets = StudyLocus.from_parquet(session, credible_sets_path) - study_index = StudyIndex.from_parquet(session, study_index_path) - ld_index = LDIndex.from_parquet(session, ld_index_path) - + cred_sets = StudyLocus.from_parquet( + session, credible_sets_path, recursiveFileLookup=True + ).coalesce(200) + ld_index = ( + LDIndex.from_parquet(session, ld_index_path) if ld_index_path else None + ) + study_index = ( + StudyIndex.from_parquet(session, study_index_path) + if study_index_path + else None + ) cred_sets_clean = SUSIE_inf.credible_set_qc( cred_sets, - study_index, - ld_index, p_value_threshold, purity_min_r2, + clump, + ld_index, + study_index, ld_min_r2, ) diff --git a/src/gentropy/method/susie_inf.py b/src/gentropy/method/susie_inf.py index 4f75faad8..f90d15a14 100644 --- a/src/gentropy/method/susie_inf.py +++ b/src/gentropy/method/susie_inf.py @@ -9,6 +9,7 @@ import pyspark.sql.functions as f import scipy.linalg import scipy.special +from pyspark.sql.window import Window from scipy.optimize import minimize, minimize_scalar from scipy.special import logsumexp @@ -469,43 +470,59 @@ def cred_inf( @staticmethod def credible_set_qc( cred_sets: StudyLocus, - study_index: StudyIndex, - ld_index: LDIndex, p_value_threshold: float = 1e-5, purity_min_r2: float = 0.01, + clump: bool = False, + ld_index: LDIndex | None = None, + study_index: StudyIndex | None = None, ld_min_r2: float = 0.8, ) -> StudyLocus: """Filter credible sets by lead P-value and min-R2 purity, and performs LD clumping. + In case of duplicated loci, the filtering retains the loci wth the highest credibleSetLog10BF + Args: cred_sets (StudyLocus): StudyLocus object with credible sets to filter/clump - study_index (StudyIndex): StudyIndex object - ld_index (LDIndex): LDIndex object p_value_threshold (float): p-value threshold for filtering credible sets, default is 1e-5 purity_min_r2 (float): min-R2 purity threshold for filtering credible sets, default is 0.01 + clump (bool): Whether to clump the credible sets by LD, default is False + ld_index (LDIndex | None): LDIndex object + study_index (StudyIndex | None): StudyIndex object ld_min_r2 (float): LD R2 threshold for clumping, default is 0.8 Returns: StudyLocus: Credible sets which pass filters and LD clumping. """ - df = ( + cred_sets.df = ( cred_sets.df.withColumn( "pValue", f.col("pValueMantissa") * f.pow(10, f.col("pValueExponent")) ) .filter(f.col("pValue") <= p_value_threshold) .filter(f.col("purityMinR2") >= purity_min_r2) .drop("pValue") + .withColumn( + "rn", + f.row_number().over( + Window.partitionBy("studyLocusId").orderBy( + f.desc("credibleSetLog10BF") + ) + ), + ) + .filter(f.col("rn") == 1) + .drop("rn") ) - cred_sets.df = df - cred_sets = ( - cred_sets.annotate_ld(study_index, ld_index, ld_min_r2) - .clump() - .filter( - ~f.array_contains( - f.col("qualityControls"), - "Explained by a more significant variant in high LD (clumped)", + if clump: + assert study_index, "Running in clump mode, which requires study_index." + assert ld_index, "Running in clump mode, which requires ld_index." + cred_sets = ( + cred_sets.annotate_ld(study_index, ld_index, ld_min_r2) + .clump() + .filter( + ~f.array_contains( + f.col("qualityControls"), + "Explained by a more significant variant in high LD (clumped)", + ) ) ) - ) return cred_sets From 5d9d7cbb6487ec3cc4d7d4152f23c1b62a4ba0ab Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Fri, 25 Oct 2024 18:06:55 +0100 Subject: [PATCH 138/188] test(study_locus) credible_set_qc step integration test (#880) * test(credible_set_qc_step): added main functionality test * chore: moved default values to step config * chore: ensure to save with study locus schema * chore: enhanced docs * docs(credible_set_qc_ste): docs page * chore: typo --------- Co-authored-by: Szymon Szyszkowski --- docs/python_api/steps/credible_set_qc_step.md | 7 + src/gentropy/config.py | 11 +- src/gentropy/credible_set_qc.py | 46 ++++-- src/gentropy/method/susie_inf.py | 12 +- tests/gentropy/step/test_credible_set_qc.py | 151 ++++++++++++++++++ 5 files changed, 203 insertions(+), 24 deletions(-) create mode 100644 docs/python_api/steps/credible_set_qc_step.md create mode 100644 tests/gentropy/step/test_credible_set_qc.py diff --git a/docs/python_api/steps/credible_set_qc_step.md b/docs/python_api/steps/credible_set_qc_step.md new file mode 100644 index 000000000..2999115e7 --- /dev/null +++ b/docs/python_api/steps/credible_set_qc_step.md @@ -0,0 +1,7 @@ +--- +title: credible_set_qc +--- + +::: gentropy.credible_set_qc.CredibleSetQCStep + +::: gentropy.config.CredibleSetQCStepConfig diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 5533043d2..c5889dbab 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -589,16 +589,18 @@ class SummaryStatisticsQCStepConfig(StepConfig): @dataclass -class CredibleSetQCConfig(StepConfig): +class CredibleSetQCStepConfig(StepConfig): """Credible set quality control step configuration.""" credible_sets_path: str = MISSING - study_index_path: str = MISSING - ld_index_path: str = MISSING output_path: str = MISSING p_value_threshold: float = 1e-5 purity_min_r2: float = 0.01 - ld_min_r2: float = 0.8 + clump: bool = False + ld_index_path: str | None = None + study_index_path: str | None = None + ld_min_r2: float | None = 0.8 + n_partitions: int | None = 200 _target_: str = "gentropy.credible_set_qc.CredibleSetQCStep" @@ -732,3 +734,4 @@ def register_config() -> None: node=LocusToGeneEvidenceStepConfig, ) cs.store(group="step", name="finngen_ukb_meta_ingestion", node=FinngenUkbMetaConfig) + cs.store(group="step", name="credible_set_qc", node=CredibleSetQCStepConfig) diff --git a/src/gentropy/credible_set_qc.py b/src/gentropy/credible_set_qc.py index 8ea9e06dd..5b89faf59 100644 --- a/src/gentropy/credible_set_qc.py +++ b/src/gentropy/credible_set_qc.py @@ -17,29 +17,38 @@ def __init__( session: Session, credible_sets_path: str, output_path: str, - p_value_threshold: float = 1e-5, - purity_min_r2: float = 0.01, - clump: bool = False, - ld_index_path: str | None = None, - study_index_path: str | None = None, - ld_min_r2: float = 0.8, + p_value_threshold: float, + purity_min_r2: float, + clump: bool, + ld_index_path: str | None, + study_index_path: str | None, + ld_min_r2: float | None, + n_partitions: int | None, ) -> None: """Run credible set quality control step. + Check defaults used by steps in hydra configuration `gentropy.config.CredibleSetQCStepConfig` + + Due to the large number of partitions at the input credible_set_path after finemapping, the + best strategy it is to repartition and save the dataset after deduplication. + + The `clump` mode will perform additional LD based clumping on the input credible sets. + Enabling `clump` mode requires providing `ld_index_path`, `study_index_path` and `ld_min_r2`. + Args: session (Session): Session object. credible_sets_path (str): Path to credible sets file. output_path (str): Path to write the output file. - p_value_threshold (float): P-value threshold for credible set quality control. Default is 1e-5. - purity_min_r2 (float): Minimum R2 for purity estimation. Default is 0.01. - clump (bool): Whether to clump the credible sets by LD. Default is False. + p_value_threshold (float): P-value threshold for credible set quality control. + purity_min_r2 (float): Minimum R2 for purity estimation. + clump (bool): Whether to clump the credible sets by LD. ld_index_path (str | None): Path to LD index file. study_index_path (str | None): Path to study index file. - ld_min_r2 (float): Minimum R2 for LD estimation. Default is 0.8. + ld_min_r2 (float | None): Minimum R2 for LD estimation. + n_partitions (int | None): Number of partitions to coalesce the dataset after reading. Defaults to 200 """ - cred_sets = StudyLocus.from_parquet( - session, credible_sets_path, recursiveFileLookup=True - ).coalesce(200) + n_partitions = n_partitions or 200 + ld_index = ( LDIndex.from_parquet(session, ld_index_path) if ld_index_path else None ) @@ -48,6 +57,11 @@ def __init__( if study_index_path else None ) + + cred_sets = StudyLocus.from_parquet( + session, credible_sets_path, recursiveFileLookup=True + ).coalesce(n_partitions) + cred_sets_clean = SUSIE_inf.credible_set_qc( cred_sets, p_value_threshold, @@ -57,5 +71,7 @@ def __init__( study_index, ld_min_r2, ) - - cred_sets_clean.df.write.mode(session.write_mode).parquet(output_path) + # ensure the saved object is still a valid StudyLocus + StudyLocus( + _df=cred_sets_clean.df, _schema=StudyLocus.get_schema() + ).df.write.mode(session.write_mode).parquet(output_path) diff --git a/src/gentropy/method/susie_inf.py b/src/gentropy/method/susie_inf.py index f90d15a14..e8a4a57b1 100644 --- a/src/gentropy/method/susie_inf.py +++ b/src/gentropy/method/susie_inf.py @@ -15,7 +15,7 @@ from gentropy.dataset.ld_index import LDIndex from gentropy.dataset.study_index import StudyIndex -from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.study_locus import StudyLocus, StudyLocusQualityCheck @dataclass @@ -475,11 +475,12 @@ def credible_set_qc( clump: bool = False, ld_index: LDIndex | None = None, study_index: StudyIndex | None = None, - ld_min_r2: float = 0.8, + ld_min_r2: float | None = 0.8, ) -> StudyLocus: """Filter credible sets by lead P-value and min-R2 purity, and performs LD clumping. - In case of duplicated loci, the filtering retains the loci wth the highest credibleSetLog10BF + In case of duplicated loci, the filtering retains the loci wth the highest credibleSetlog10BF. + Args: cred_sets (StudyLocus): StudyLocus object with credible sets to filter/clump @@ -488,7 +489,7 @@ def credible_set_qc( clump (bool): Whether to clump the credible sets by LD, default is False ld_index (LDIndex | None): LDIndex object study_index (StudyIndex | None): StudyIndex object - ld_min_r2 (float): LD R2 threshold for clumping, default is 0.8 + ld_min_r2 (float | None): LD R2 threshold for clumping, default is 0.8 Returns: StudyLocus: Credible sets which pass filters and LD clumping. @@ -514,13 +515,14 @@ def credible_set_qc( if clump: assert study_index, "Running in clump mode, which requires study_index." assert ld_index, "Running in clump mode, which requires ld_index." + assert ld_min_r2, "Running in clump mode, which requires ld_min_r2 value." cred_sets = ( cred_sets.annotate_ld(study_index, ld_index, ld_min_r2) .clump() .filter( ~f.array_contains( f.col("qualityControls"), - "Explained by a more significant variant in high LD (clumped)", + StudyLocusQualityCheck.LD_CLUMPED.value, ) ) ) diff --git a/tests/gentropy/step/test_credible_set_qc.py b/tests/gentropy/step/test_credible_set_qc.py new file mode 100644 index 000000000..c7fb58c8c --- /dev/null +++ b/tests/gentropy/step/test_credible_set_qc.py @@ -0,0 +1,151 @@ +"""Test credible set qc step.""" + +from pathlib import Path + +import pytest +from pyspark.sql import functions as f +from pyspark.sql import types as t + +from gentropy.common.session import Session +from gentropy.credible_set_qc import CredibleSetQCStep +from gentropy.dataset.study_locus import StudyLocus + + +@pytest.mark.step_test +class TestCredibleSetQCStep: + """Test credible set qc.""" + + @pytest.fixture(autouse=True) + def _setup(self, session: Session, tmp_path: Path) -> None: + """Setup StudyLocus for testing.""" + # NOTE: About the input dataset for tests + # Entry dataset contains 6 loci (3 of them contains duplicated studyLocusId, 2 contains the same studyId) + # The step is expected to remove the duplicates of the studyLocus (1 row) + # THe step is expected to remove rows which pValue <= p_val_threshold (1 row) + # The step is expected to remove rows which purityMinR2 >= to purity_min_r2 (1 row) + # at the end we should end up with 2 non duplicated loci + self.purity_min_r2 = 0.01 + self.p_value_threshold = 1e-5 + self.n_partitions = 1 + credible_set_data = [ + ( + "A", # duplicated credibleSetId + "1_100_G_GA", # variantId + "GCST1", # duplicated studyId + 1.0, # pValMantissa + -6, # pValExponent + 1.0, # credibleSetlog10BF -> should be skipped due to the lowest Log10BF + 1.0, # purityMinR2 + ), + ( + "A", # duplicated credibleSetId + "1_100_G_GA", # variantId + "GCST1", # duplicated studyId + 1.0, # pValMantissa + -6, # pValExponent + 2.0, # credibleSetlog10BF -> highest log10BF within duplicates considering single study + 1.0, # purityMinR2 + ), + ( + "A", # duplicated credibleSetId + "1_100_G_GA", # variantId + "GCST2", # studyId + 1.0, # pValMantissa + -6, # pValExponent + 3.0, # credibleSetlog10BF -> highest log10BF within duplicates + 1.0, # purityMinR2 + ), + ( + "B", # credibleSetId + "1_200_G_GA", # variantId + "GCST3", # studyId + 1.0, # pValMantissa + -4, # too high pValExponent => pVal = 1.0e-4 < p_val_threshold + 1.0, # credibleSetlog10BF + 1.0, # purityMinR2 + ), + ( + "C", # credibleSetId + "1_300_G_GA", # variantId + "GCST3", # studyId + 1.0, # pValMantissa + -6, # pValExponent + 1.0, # credibleSetlog10BF + 0.001, # purityMinR2 < purity_min_r2 + ), + ( + # full row OK! + "D", # credibleSetId + "1_400_G_GA", # variantId + "GCST3", # studyId + 1.0, # pValMantissa + -6, # pValExponent + 1.0, # credibleSetlog10BF + 1.0, # purityMinR2 + ), + ] + cs_schema = t.StructType( + [ + t.StructField("studyLocusId", t.StringType(), True), + t.StructField("variantId", t.StringType(), True), + t.StructField("studyId", t.StringType(), True), + t.StructField("pValueMantissa", t.FloatType(), True), + t.StructField("pValueExponent", t.IntegerType(), True), + t.StructField("credibleSetlog10BF", t.DoubleType(), True), + t.StructField("purityMinR2", t.DoubleType(), True), + ] + ) + + # NOTE: Use proper input! + # Ensure the input dataset is saved per studyLocusId in recursive manner. + # This mimics the dataset with multiple loci evaluated separately. + self.credible_set_path = str(tmp_path / "credible_set_datasets") + cs_df = session.spark.createDataFrame(credible_set_data, schema=cs_schema) + cs_path = tmp_path / "credible_set_dataset" + loci_ids = {row["studyLocusId"] for row in cs_df.collect()} + for loci_id in loci_ids: + loci_path = str(cs_path / loci_id) + cs_df.filter(f.col("studyLocusId") == loci_id).write.parquet(loci_path) + self.input_cs_df = cs_df + self.cs_path = str(cs_path) + self.output_path = str(tmp_path / "clean_credible_sets") + + def test_step(self, session: Session) -> None: + """Invoke the step to check if it works correctly.""" + assert not Path(self.output_path).exists(), "Input for qc does not exists." + assert Path(self.cs_path).exists(), "Output of qc is not emptied before test." + assert self.input_cs_df.count() == 6, "Incorrect number of rows." + CredibleSetQCStep( + session=session, + credible_sets_path=self.cs_path, + output_path=self.output_path, + p_value_threshold=self.p_value_threshold, + purity_min_r2=self.purity_min_r2, + clump=False, + ld_index_path=None, + study_index_path=None, + ld_min_r2=None, + n_partitions=self.n_partitions, + ) + + assert Path(self.output_path).exists(), "Output of qc does not exists." + # check the number of partitions + partitions = [ + str(p) + for p in Path(self.output_path).iterdir() + if str(p).endswith(".parquet") + ] + assert ( + len(partitions) == self.n_partitions + ), "Incorrect number of partitions in the output." + cs = StudyLocus.from_parquet( + session, self.output_path, recursiveFileLookup=True + ) + assert cs.df.count() == 2 # Row A where LogBF == 3.0 and row D + assert cs.df.rdd.getNumPartitions() == self.n_partitions + data = { + row["studyLocusId"]: row["credibleSetlog10BF"] for row in cs.df.collect() + } + assert sorted(data.keys()) == ["A", "D"] + # ensure the Locus A with highest credibleSetlog10BF was chosen + assert data["A"] == 3.0 From cbbf3c5d39f685ea8adeae70f3e4591294e8f9a3 Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Sat, 26 Oct 2024 18:07:07 +0100 Subject: [PATCH 139/188] feat: flagging duplicated entries while keeping one of the duplicates (#876) * feat: flagging duplicated entries while keeping one of the duplicates * feat: turning validation on for validation step * fix: test for duplication flagging * fix: test for duplication flagging --------- Co-authored-by: Yakov --- src/gentropy/dataset/dataset.py | 11 ++--- src/gentropy/study_locus_validation.py | 2 + tests/gentropy/dataset/test_study_index.py | 8 ++-- tests/gentropy/dataset/test_study_locus.py | 55 ++++++++++++++++++++++ 4 files changed, 65 insertions(+), 11 deletions(-) diff --git a/src/gentropy/dataset/dataset.py b/src/gentropy/dataset/dataset.py index 779faefe2..67fe05eaf 100644 --- a/src/gentropy/dataset/dataset.py +++ b/src/gentropy/dataset/dataset.py @@ -322,7 +322,9 @@ def update_quality_flag( @staticmethod def flag_duplicates(test_column: Column) -> Column: - """Return True for duplicated values in column. + """Return True for rows, where the value was already seen in column. + + This implementation allows keeping the first occurrence of the value. Args: test_column (Column): Column to check for duplicates @@ -331,12 +333,7 @@ def flag_duplicates(test_column: Column) -> Column: Column: Column with a boolean flag for duplicates """ return ( - f.count(test_column).over( - Window.partitionBy(test_column).rowsBetween( - Window.unboundedPreceding, Window.unboundedFollowing - ) - ) - > 1 + f.row_number().over(Window.partitionBy(test_column).orderBy(f.rand())) > 1 ) @staticmethod diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index 0be046a67..bca6b8e11 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -49,6 +49,8 @@ def __init__( .filter_credible_set(credible_interval=CredibleInterval.IS99) # Annotate credible set confidence: .assign_confidence() + # Flagging credible sets that are duplicated: + .validate_unique_study_locus_id() ).persist() # we will need this for 2 types of outputs study_locus_with_qc.valid_rows(invalid_qc_reasons, invalid=True).df.write.mode( diff --git a/tests/gentropy/dataset/test_study_index.py b/tests/gentropy/dataset/test_study_index.py index 4bfede7d9..05b652752 100644 --- a/tests/gentropy/dataset/test_study_index.py +++ b/tests/gentropy/dataset/test_study_index.py @@ -313,7 +313,7 @@ class TestUniquenessValidation: STUDY_DATA = [ # This is the only study to be flagged: ("s1", "eqtl", "p"), - ("s1", "eqtl", "p"), + ("s1", "eqtl", "p"), # Duplicate -> one should be flagged ("s3", "gwas", "p"), ("s4", "gwas", "p"), ] @@ -337,8 +337,8 @@ def test_uniqueness_correct_data(self: TestUniquenessValidation) -> None: """Testing if the function returns the right type.""" validated = self.study_index.validate_unique_study_id().persist() - # We have more than one flagged studies: - assert validated.df.filter(f.size(f.col("qualityControls")) > 0).count() > 1 + # We have only one flagged study: + assert validated.df.filter(f.size(f.col("qualityControls")) > 0).count() == 1 # The flagged study identifiers are found more than once: flagged_ids = { @@ -350,7 +350,7 @@ def test_uniqueness_correct_data(self: TestUniquenessValidation) -> None: } for _, count in flagged_ids.items(): - assert count > 1 + assert count == 1 # the right study is found: assert "s1" in flagged_ids diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index eaee0ebf3..3cbaf6866 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -1121,3 +1121,58 @@ def test_qc_valid_chromosomes( StudyLocusQualityCheck.INVALID_CHROMOSOME.value in row["qualityControls"] ) + + +class TestStudyLocusDuplicationFlagging: + """Collection of tests related to flagging redundant credible sets.""" + + STUDY_LOCUS_DATA = [ + # Non-duplicated: + ("1", "v1", "s1", "pics"), + # Triplicate: + ("3", "v3", "s1", "pics"), + ("3", "v3", "s1", "pics"), + ("3", "v3", "s1", "pics"), + ] + + STUDY_LOCUS_SCHEMA = t.StructType( + [ + t.StructField("studyLocusId", t.StringType(), False), + t.StructField("variantId", t.StringType(), False), + t.StructField("studyId", t.StringType(), False), + t.StructField("finemappingMethod", t.StringType(), False), + ] + ) + + @pytest.fixture(autouse=True) + def _setup(self: TestStudyLocusDuplicationFlagging, spark: SparkSession) -> None: + """Setup study locus for testing.""" + self.study_locus = StudyLocus( + _df=spark.createDataFrame( + self.STUDY_LOCUS_DATA, schema=self.STUDY_LOCUS_SCHEMA + ).withColumn( + "qualityControls", f.array().cast(t.ArrayType(t.StringType())) + ), + _schema=StudyLocus.get_schema(), + ) + + # Run validation: + self.validated = self.study_locus.validate_unique_study_locus_id() + + def test_duplication_flag_type(self: TestStudyLocusDuplicationFlagging) -> None: + """Test duplication flagging return type.""" + assert isinstance(self.validated, StudyLocus) + + def test_duplication_flag_no_data_loss( + self: TestStudyLocusDuplicationFlagging, + ) -> None: + """Test duplication flagging no data loss.""" + assert self.validated.df.count() == self.study_locus.df.count() + + def test_duplication_flag_correctness( + self: TestStudyLocusDuplicationFlagging, + ) -> None: + """Make sure that the end, there are two study loci that pass the validation.""" + assert self.validated.df.filter(f.size("qualityControls") == 0).count() == 2 + + assert self.validated.df.filter(f.size("qualityControls") > 0).count() == 2 From d12d65d849b4ca518b5a66a6460f677adffa1d35 Mon Sep 17 00:00:00 2001 From: Tobi Alegbe Date: Mon, 28 Oct 2024 09:54:03 +0000 Subject: [PATCH 140/188] feat: flag and filter credible sets (#879) * feat(flag_and_filter_credible_sets): add code for identifying abnormal credible sets * feat(flag_and_filter_credible_sets): restructure tests * chore(flag_and_filter_credible_sets): update schema for spark loading * feat(flag_and_filter_credible_sets): first comple draft filtering abnormal pips code * fix(flag_and_filter_credible_sets): tweak broken code and unresolved merge * fix(flag_and_filter_credible_sets): amend test logic * fix(flag_and_filter_credible_sets): modify logic to simplify and account for floating point errors * chore(flag_and_filter_credible_sets): simplify logic to boolean --- src/gentropy/dataset/study_locus.py | 51 +++ src/gentropy/study_locus_validation.py | 2 + tests/gentropy/dataset/test_study_locus.py | 490 +++++++++++---------- 3 files changed, 322 insertions(+), 221 deletions(-) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index a0a231cfa..e685d828f 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -82,6 +82,7 @@ class StudyLocusQualityCheck(Enum): IN_MHC (str): Flagging study loci in the MHC region REDUNDANT_PICS_TOP_HIT (str): Flagging study loci in studies with PICS results from summary statistics EXPLAINED_BY_SUSIE (str): Study locus in region explained by a SuSiE credible set + ABNORMAL_PIPS (str): Flagging study loci with a sum of PIPs that are not in [0.99,1] OUT_OF_SAMPLE_LD (str): Study locus finemapped without in-sample LD reference INVALID_CHROMOSOME (str): Chromosome not in 1:22, X, Y, XY or MT """ @@ -113,6 +114,7 @@ class StudyLocusQualityCheck(Enum): TOP_HIT = "Study locus from curated top hit" EXPLAINED_BY_SUSIE = "Study locus in region explained by a SuSiE credible set" OUT_OF_SAMPLE_LD = "Study locus finemapped without in-sample LD reference" + ABNORMAL_PIPS = "Study locus with a sum of PIPs that not in the expected range [0.99,1]" INVALID_CHROMOSOME = "Chromosome not in 1:22, X, Y, XY or MT" @@ -391,6 +393,55 @@ def _qc_subsignificant_associations( StudyLocusQualityCheck.SUBSIGNIFICANT_FLAG, ) + def qc_abnormal_pips( + self: StudyLocus, + sum_pips_lower_threshold: float = 0.99, + sum_pips_upper_threshold: float = 1.0001, # Set slightly above 1 to account for floating point errors + ) -> StudyLocus: + """Filter study-locus by sum of posterior inclusion probabilities to ensure that the sum of PIPs is within a given range. + + Args: + sum_pips_lower_threshold (float): Lower threshold for the sum of PIPs. + sum_pips_upper_threshold (float): Upper threshold for the sum of PIPs. + + Returns: + StudyLocus: Filtered study-locus dataset. + """ + # QC column might not be present so we have to be ready to handle it: + qc_select_expression = ( + f.col("qualityControls") + if "qualityControls" in self.df.columns + else f.lit(None).cast(ArrayType(StringType())) + ) + + flag = (self.df.withColumn( + "sumPosteriorProbability", + f.aggregate( + f.col("locus"), + f.lit(0.0), + lambda acc, x: acc + x["posteriorProbability"] + )).withColumn( + "pipOutOfRange", + f.when( + (f.col("sumPosteriorProbability") < sum_pips_lower_threshold) | + (f.col("sumPosteriorProbability") > sum_pips_upper_threshold), + True + ).otherwise(False))) + + return StudyLocus( + _df=(flag + # Flagging loci with failed studies: + .withColumn( + "qualityControls", + self.update_quality_flag( + qc_select_expression, + f.col("pipOutOfRange"), + StudyLocusQualityCheck.ABNORMAL_PIPS + ), + ).drop("sumPosteriorProbability", "pipOutOfRange")), + _schema=self.get_schema() + ) + @staticmethod def _overlapping_peaks( credset_to_overlap: DataFrame, intra_study_overlap: bool = False diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index bca6b8e11..1c8ae161c 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -45,6 +45,8 @@ def __init__( .annotate_study_type(study_index) # Add study type to study locus .qc_redundant_top_hits_from_PICS() # Flagging top hits from studies with PICS summary statistics .qc_explained_by_SuSiE() # Flagging credible sets in regions explained by SuSiE + # Flagging credible sets with PIP > 1 or PIP < 0.99 + .qc_abnormal_pips(sum_pips_lower_threshold=0.99,sum_pips_upper_threshold=1.0001) # Annotates credible intervals and filter to only keep 99% credible sets .filter_credible_set(credible_interval=CredibleInterval.IS99) # Annotate credible set confidence: diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index 3cbaf6866..7f15a11a6 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -203,72 +203,84 @@ def test_filter_credible_set(mock_study_locus: StudyLocus) -> None: ) -@pytest.mark.parametrize( - ("observed", "expected"), +def test_qc_abnormal_pips(mock_study_locus: StudyLocus) -> None: + """Test that the qc_abnormal_pips method returns a StudyLocus object.""" + assert isinstance(mock_study_locus.qc_abnormal_pips(0.99, 1), StudyLocus) + + +# Used primarily for test_unique_variants_in_locus but also for other tests +test_unique_variants_in_locus_test_data = [ + ( + # Locus is not null, should return union between variants in locus and lead variant + [ + ( + "1", + "traitA", + "22_varA", + [ + {"variantId": "22_varA", "posteriorProbability": 0.44}, + {"variantId": "22_varB", "posteriorProbability": 0.015}, + ], + ), + ], + [ + ( + "22_varA", + "22", + ), + ( + "22_varB", + "22", + ), + ], + ), + ( + # locus is null, should return lead variant + [ + ("1", "traitA", "22_varA", None), + ], + [ + ( + "22_varA", + "22", + ), + ], + ), +] + +test_unique_variants_in_locus_test_schema = StructType( [ - ( - # Locus is not null, should return union between variants in locus and lead variant - [ - ( - "1", - "traitA", - "22_varA", + StructField("studyLocusId", StringType(), True), + StructField("studyId", StringType(), True), + StructField("variantId", StringType(), True), + StructField( + "locus", + ArrayType( + StructType( [ - {"variantId": "22_varA", "posteriorProbability": 0.44}, - {"variantId": "22_varB", "posteriorProbability": 0.015}, - ], - ), - ], - [ - ( - "22_varA", - "22", - ), - ( - "22_varB", - "22", - ), - ], - ), - ( - # locus is null, should return lead variant - [ - ("1", "traitA", "22_varA", None), - ], - [ - ( - "22_varA", - "22", - ), - ], + StructField("variantId", StringType(), True), + StructField("posteriorProbability", DoubleType(), True), + ] + ) + ), + True, ), - ], + ] +) + + +@pytest.mark.parametrize( + ("observed", "expected"), + test_unique_variants_in_locus_test_data, ) def test_unique_variants_in_locus( spark: SparkSession, observed: list[Any], expected: list[Any] ) -> None: """Test unique variants in locus.""" # assert isinstance(mock_study_locus.test_unique_variants_in_locus(), DataFrame) - schema = StructType( - [ - StructField("studyLocusId", StringType(), True), - StructField("studyId", StringType(), True), - StructField("variantId", StringType(), True), - StructField( - "locus", - ArrayType( - StructType( - [ - StructField("variantId", StringType(), True), - ] - ) - ), - True, - ), - ] - ) data_sl = StudyLocus( - _df=spark.createDataFrame(observed, schema), _schema=StudyLocus.get_schema() + _df=spark.createDataFrame(observed, test_unique_variants_in_locus_test_schema), + _schema=StudyLocus.get_schema(), ) expected_df = spark.createDataFrame( expected, schema="variantId: string, chromosome: string" @@ -286,187 +298,223 @@ def test_clump(mock_study_locus: StudyLocus) -> None: assert isinstance(mock_study_locus.clump(), StudyLocus) -@pytest.mark.parametrize( - ("observed", "expected"), +# Used primarily for test_annotate_credible_sets but also for other tests +test_annotate_credible_sets_test_data = [ + ( + # Simple case + [ + # Observed + ( + "1", + "traitA", + "leadB", + [{"variantId": "tagVariantA", "posteriorProbability": 1.0}], + ), + ], + [ + # Expected + ( + "1", + "traitA", + "leadB", + [ + { + "variantId": "tagVariantA", + "posteriorProbability": 1.0, + "is95CredibleSet": True, + "is99CredibleSet": True, + } + ], + ) + ], + ), + ( + # Unordered credible set + [ + # Observed + ( + "1", + "traitA", + "leadA", + [ + {"variantId": "tagVariantA", "posteriorProbability": 0.44}, + {"variantId": "tagVariantB", "posteriorProbability": 0.015}, + {"variantId": "tagVariantC", "posteriorProbability": 0.04}, + {"variantId": "tagVariantD", "posteriorProbability": 0.005}, + {"variantId": "tagVariantE", "posteriorProbability": 0.5}, + {"variantId": "tagVariantNull", "posteriorProbability": None}, + {"variantId": "tagVariantNull", "posteriorProbability": None}, + ], + ) + ], + [ + # Expected + ( + "1", + "traitA", + "leadA", + [ + { + "variantId": "tagVariantE", + "posteriorProbability": 0.5, + "is95CredibleSet": True, + "is99CredibleSet": True, + }, + { + "variantId": "tagVariantA", + "posteriorProbability": 0.44, + "is95CredibleSet": True, + "is99CredibleSet": True, + }, + { + "variantId": "tagVariantC", + "posteriorProbability": 0.04, + "is95CredibleSet": True, + "is99CredibleSet": True, + }, + { + "variantId": "tagVariantB", + "posteriorProbability": 0.015, + "is95CredibleSet": False, + "is99CredibleSet": True, + }, + { + "variantId": "tagVariantD", + "posteriorProbability": 0.005, + "is95CredibleSet": False, + "is99CredibleSet": False, + }, + { + "variantId": "tagVariantNull", + "posteriorProbability": None, + "is95CredibleSet": False, + "is99CredibleSet": False, + }, + { + "variantId": "tagVariantNull", + "posteriorProbability": None, + "is95CredibleSet": False, + "is99CredibleSet": False, + }, + ], + ) + ], + ), + ( + # Null credible set + [ + # Observed + ( + "1", + "traitA", + "leadB", + None, + ), + ], + [ + # Expected + ( + "1", + "traitA", + "leadB", + None, + ) + ], + ), + ( + # Empty credible set + [ + # Observed + ( + "1", + "traitA", + "leadB", + [], + ), + ], + [ + # Expected + ( + "1", + "traitA", + "leadB", + None, + ) + ], + ), +] +test_annotate_credible_sets_test_schema = StructType( [ - ( - # Simple case - [ - # Observed - ( - "1", - "traitA", - "leadB", - [{"variantId": "tagVariantA", "posteriorProbability": 1.0}], - ), - ], - [ - # Expected - ( - "1", - "traitA", - "leadB", - [ - { - "variantId": "tagVariantA", - "posteriorProbability": 1.0, - "is95CredibleSet": True, - "is99CredibleSet": True, - } - ], - ) - ], - ), - ( - # Unordered credible set - [ - # Observed - ( - "1", - "traitA", - "leadA", - [ - {"variantId": "tagVariantA", "posteriorProbability": 0.44}, - {"variantId": "tagVariantB", "posteriorProbability": 0.015}, - {"variantId": "tagVariantC", "posteriorProbability": 0.04}, - {"variantId": "tagVariantD", "posteriorProbability": 0.005}, - {"variantId": "tagVariantE", "posteriorProbability": 0.5}, - {"variantId": "tagVariantNull", "posteriorProbability": None}, - {"variantId": "tagVariantNull", "posteriorProbability": None}, - ], - ) - ], - [ - # Expected - ( - "1", - "traitA", - "leadA", + StructField("studyLocusId", StringType(), True), + StructField("studyId", StringType(), True), + StructField("variantId", StringType(), True), + StructField( + "locus", + ArrayType( + StructType( [ - { - "variantId": "tagVariantE", - "posteriorProbability": 0.5, - "is95CredibleSet": True, - "is99CredibleSet": True, - }, - { - "variantId": "tagVariantA", - "posteriorProbability": 0.44, - "is95CredibleSet": True, - "is99CredibleSet": True, - }, - { - "variantId": "tagVariantC", - "posteriorProbability": 0.04, - "is95CredibleSet": True, - "is99CredibleSet": True, - }, - { - "variantId": "tagVariantB", - "posteriorProbability": 0.015, - "is95CredibleSet": False, - "is99CredibleSet": True, - }, - { - "variantId": "tagVariantD", - "posteriorProbability": 0.005, - "is95CredibleSet": False, - "is99CredibleSet": False, - }, - { - "variantId": "tagVariantNull", - "posteriorProbability": None, - "is95CredibleSet": False, - "is99CredibleSet": False, - }, - { - "variantId": "tagVariantNull", - "posteriorProbability": None, - "is95CredibleSet": False, - "is99CredibleSet": False, - }, - ], + StructField("variantId", StringType(), True), + StructField("posteriorProbability", DoubleType(), True), + StructField("is95CredibleSet", BooleanType(), True), + StructField("is99CredibleSet", BooleanType(), True), + ] ) - ], - ), - ( - # Null credible set - [ - # Observed - ( - "1", - "traitA", - "leadB", - None, - ), - ], - [ - # Expected - ( - "1", - "traitA", - "leadB", - None, - ) - ], - ), - ( - # Empty credible set - [ - # Observed - ( - "1", - "traitA", - "leadB", - [], - ), - ], - [ - # Expected - ( - "1", - "traitA", - "leadB", - None, - ) - ], + ), + True, ), - ], + ] +) + + +@pytest.mark.parametrize( + ("observed", "expected"), + test_annotate_credible_sets_test_data, ) def test_annotate_credible_sets( spark: SparkSession, observed: list[Any], expected: list[Any] ) -> None: """Test annotate_credible_sets.""" - schema = StructType( - [ - StructField("studyLocusId", StringType(), True), - StructField("studyId", StringType(), True), - StructField("variantId", StringType(), True), - StructField( - "locus", - ArrayType( - StructType( - [ - StructField("variantId", StringType(), True), - StructField("posteriorProbability", DoubleType(), True), - StructField("is95CredibleSet", BooleanType(), True), - StructField("is99CredibleSet", BooleanType(), True), - ] - ) - ), - True, - ), - ] - ) data_sl = StudyLocus( - _df=spark.createDataFrame(observed, schema), _schema=StudyLocus.get_schema() + _df=spark.createDataFrame(observed, test_annotate_credible_sets_test_schema), + _schema=StudyLocus.get_schema(), ) expected_sl = StudyLocus( - _df=spark.createDataFrame(expected, schema), _schema=StudyLocus.get_schema() + _df=spark.createDataFrame(expected, test_annotate_credible_sets_test_schema), + _schema=StudyLocus.get_schema(), ) assert data_sl.annotate_credible_sets().df.collect() == expected_sl.df.collect() +def test_qc_abnormal_pips_good_locus(spark: SparkSession) -> None: + """Test qc_abnormal_pips with a well-behaving locus.""" + # Input data + sl = StudyLocus( + _df=spark.createDataFrame( + test_annotate_credible_sets_test_data[1][0], + test_annotate_credible_sets_test_schema, + ), + _schema=StudyLocus.get_schema(), + ) + assert ( + sl.qc_abnormal_pips().df.filter(f.size("qualityControls") > 0).count() == 0 + ), "Expected number of rows differ from observed." + + +def test_qc_abnormal_pips_bad_locus(spark: SparkSession) -> None: + """Test qc_abnormal_pips with an abnormal locus.""" + # Input data + sl = StudyLocus( + _df=spark.createDataFrame( + test_unique_variants_in_locus_test_data[0][0], + test_unique_variants_in_locus_test_schema, + ), + _schema=StudyLocus.get_schema(), + ) + assert ( + sl.qc_abnormal_pips().df.filter(f.size("qualityControls") > 0).count() == 1 + ), "Expected number of rows differ from observed." + + def test_annotate_ld( mock_study_locus: StudyLocus, mock_study_index: StudyIndex, mock_ld_index: LDIndex ) -> None: From 1596c4ca6d5713ca748cdf3dbc637ead52660ebc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Oct 2024 11:33:16 +0000 Subject: [PATCH 141/188] build(deps-dev): bump ipython from 8.28.0 to 8.29.0 (#883) Bumps [ipython](https://github.com/ipython/ipython) from 8.28.0 to 8.29.0. - [Release notes](https://github.com/ipython/ipython/releases) - [Commits](https://github.com/ipython/ipython/compare/8.28.0...8.29.0) --- updated-dependencies: - dependency-name: ipython dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 9ce219f6b..1ed436a34 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1899,13 +1899,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio [[package]] name = "ipython" -version = "8.28.0" +version = "8.29.0" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.10" files = [ - {file = "ipython-8.28.0-py3-none-any.whl", hash = "sha256:530ef1e7bb693724d3cdc37287c80b07ad9b25986c007a53aa1857272dac3f35"}, - {file = "ipython-8.28.0.tar.gz", hash = "sha256:0d0d15ca1e01faeb868ef56bc7ee5a0de5bd66885735682e8a322ae289a13d1a"}, + {file = "ipython-8.29.0-py3-none-any.whl", hash = "sha256:0188a1bd83267192123ccea7f4a8ed0a78910535dbaa3f37671dca76ebd429c8"}, + {file = "ipython-8.29.0.tar.gz", hash = "sha256:40b60e15b22591450eef73e40a027cf77bd652e757523eebc5bd7c7c498290eb"}, ] [package.dependencies] From 31d8716fa3ff0d4071829381876e8d3f5efac6cb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Oct 2024 11:40:44 +0000 Subject: [PATCH 142/188] build(deps-dev): bump mypy from 1.12.1 to 1.13.0 (#884) Bumps [mypy](https://github.com/python/mypy) from 1.12.1 to 1.13.0. - [Changelog](https://github.com/python/mypy/blob/master/CHANGELOG.md) - [Commits](https://github.com/python/mypy/compare/v1.12.1...v1.13.0) --- updated-dependencies: - dependency-name: mypy dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 69 +++++++++++++++++++++++++------------------------- pyproject.toml | 2 +- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/poetry.lock b/poetry.lock index 1ed436a34..4da042c37 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2755,43 +2755,43 @@ files = [ [[package]] name = "mypy" -version = "1.12.1" +version = "1.13.0" description = "Optional static typing for Python" optional = false python-versions = ">=3.8" files = [ - {file = "mypy-1.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3d7d4371829184e22fda4015278fbfdef0327a4b955a483012bd2d423a788801"}, - {file = "mypy-1.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f59f1dfbf497d473201356966e353ef09d4daec48caeacc0254db8ef633a28a5"}, - {file = "mypy-1.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b947097fae68004b8328c55161ac9db7d3566abfef72d9d41b47a021c2fba6b1"}, - {file = "mypy-1.12.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:96af62050971c5241afb4701c15189ea9507db89ad07794a4ee7b4e092dc0627"}, - {file = "mypy-1.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:d90da248f4c2dba6c44ddcfea94bb361e491962f05f41990ff24dbd09969ce20"}, - {file = "mypy-1.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1230048fec1380faf240be6385e709c8570604d2d27ec6ca7e573e3bc09c3735"}, - {file = "mypy-1.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:02dcfe270c6ea13338210908f8cadc8d31af0f04cee8ca996438fe6a97b4ec66"}, - {file = "mypy-1.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5a437c9102a6a252d9e3a63edc191a3aed5f2fcb786d614722ee3f4472e33f6"}, - {file = "mypy-1.12.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:186e0c8346efc027ee1f9acf5ca734425fc4f7dc2b60144f0fbe27cc19dc7931"}, - {file = "mypy-1.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:673ba1140a478b50e6d265c03391702fa11a5c5aff3f54d69a62a48da32cb811"}, - {file = "mypy-1.12.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9fb83a7be97c498176fb7486cafbb81decccaef1ac339d837c377b0ce3743a7f"}, - {file = "mypy-1.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:389e307e333879c571029d5b93932cf838b811d3f5395ed1ad05086b52148fb0"}, - {file = "mypy-1.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:94b2048a95a21f7a9ebc9fbd075a4fcd310410d078aa0228dbbad7f71335e042"}, - {file = "mypy-1.12.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ee5932370ccf7ebf83f79d1c157a5929d7ea36313027b0d70a488493dc1b179"}, - {file = "mypy-1.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:19bf51f87a295e7ab2894f1d8167622b063492d754e69c3c2fed6563268cb42a"}, - {file = "mypy-1.12.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d34167d43613ffb1d6c6cdc0cc043bb106cac0aa5d6a4171f77ab92a3c758bcc"}, - {file = "mypy-1.12.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:427878aa54f2e2c5d8db31fa9010c599ed9f994b3b49e64ae9cd9990c40bd635"}, - {file = "mypy-1.12.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5fcde63ea2c9f69d6be859a1e6dd35955e87fa81de95bc240143cf00de1f7f81"}, - {file = "mypy-1.12.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d54d840f6c052929f4a3d2aab2066af0f45a020b085fe0e40d4583db52aab4e4"}, - {file = "mypy-1.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:20db6eb1ca3d1de8ece00033b12f793f1ea9da767334b7e8c626a4872090cf02"}, - {file = "mypy-1.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b16fe09f9c741d85a2e3b14a5257a27a4f4886c171d562bc5a5e90d8591906b8"}, - {file = "mypy-1.12.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0dcc1e843d58f444fce19da4cce5bd35c282d4bde232acdeca8279523087088a"}, - {file = "mypy-1.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e10ba7de5c616e44ad21005fa13450cd0de7caaa303a626147d45307492e4f2d"}, - {file = "mypy-1.12.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0e6fe449223fa59fbee351db32283838a8fee8059e0028e9e6494a03802b4004"}, - {file = "mypy-1.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:dc6e2a2195a290a7fd5bac3e60b586d77fc88e986eba7feced8b778c373f9afe"}, - {file = "mypy-1.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:de5b2a8988b4e1269a98beaf0e7cc71b510d050dce80c343b53b4955fff45f19"}, - {file = "mypy-1.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:843826966f1d65925e8b50d2b483065c51fc16dc5d72647e0236aae51dc8d77e"}, - {file = "mypy-1.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9fe20f89da41a95e14c34b1ddb09c80262edcc295ad891f22cc4b60013e8f78d"}, - {file = "mypy-1.12.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8135ffec02121a75f75dc97c81af7c14aa4ae0dda277132cfcd6abcd21551bfd"}, - {file = "mypy-1.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:a7b76fa83260824300cc4834a3ab93180db19876bce59af921467fd03e692810"}, - {file = "mypy-1.12.1-py3-none-any.whl", hash = "sha256:ce561a09e3bb9863ab77edf29ae3a50e65685ad74bba1431278185b7e5d5486e"}, - {file = "mypy-1.12.1.tar.gz", hash = "sha256:f5b3936f7a6d0e8280c9bdef94c7ce4847f5cdfc258fbb2c29a8c1711e8bb96d"}, + {file = "mypy-1.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6607e0f1dd1fb7f0aca14d936d13fd19eba5e17e1cd2a14f808fa5f8f6d8f60a"}, + {file = "mypy-1.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a21be69bd26fa81b1f80a61ee7ab05b076c674d9b18fb56239d72e21d9f4c80"}, + {file = "mypy-1.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b2353a44d2179846a096e25691d54d59904559f4232519d420d64da6828a3a7"}, + {file = "mypy-1.13.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0730d1c6a2739d4511dc4253f8274cdd140c55c32dfb0a4cf8b7a43f40abfa6f"}, + {file = "mypy-1.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:c5fc54dbb712ff5e5a0fca797e6e0aa25726c7e72c6a5850cfd2adbc1eb0a372"}, + {file = "mypy-1.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:581665e6f3a8a9078f28d5502f4c334c0c8d802ef55ea0e7276a6e409bc0d82d"}, + {file = "mypy-1.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3ddb5b9bf82e05cc9a627e84707b528e5c7caaa1c55c69e175abb15a761cec2d"}, + {file = "mypy-1.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:20c7ee0bc0d5a9595c46f38beb04201f2620065a93755704e141fcac9f59db2b"}, + {file = "mypy-1.13.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3790ded76f0b34bc9c8ba4def8f919dd6a46db0f5a6610fb994fe8efdd447f73"}, + {file = "mypy-1.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:51f869f4b6b538229c1d1bcc1dd7d119817206e2bc54e8e374b3dfa202defcca"}, + {file = "mypy-1.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5c7051a3461ae84dfb5dd15eff5094640c61c5f22257c8b766794e6dd85e72d5"}, + {file = "mypy-1.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:39bb21c69a5d6342f4ce526e4584bc5c197fd20a60d14a8624d8743fffb9472e"}, + {file = "mypy-1.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:164f28cb9d6367439031f4c81e84d3ccaa1e19232d9d05d37cb0bd880d3f93c2"}, + {file = "mypy-1.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4c1bfcdbce96ff5d96fc9b08e3831acb30dc44ab02671eca5953eadad07d6d0"}, + {file = "mypy-1.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0affb3a79a256b4183ba09811e3577c5163ed06685e4d4b46429a271ba174d2"}, + {file = "mypy-1.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a7b44178c9760ce1a43f544e595d35ed61ac2c3de306599fa59b38a6048e1aa7"}, + {file = "mypy-1.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5d5092efb8516d08440e36626f0153b5006d4088c1d663d88bf79625af3d1d62"}, + {file = "mypy-1.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2904956dac40ced10931ac967ae63c5089bd498542194b436eb097a9f77bc8"}, + {file = "mypy-1.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:7bfd8836970d33c2105562650656b6846149374dc8ed77d98424b40b09340ba7"}, + {file = "mypy-1.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:9f73dba9ec77acb86457a8fc04b5239822df0c14a082564737833d2963677dbc"}, + {file = "mypy-1.13.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:100fac22ce82925f676a734af0db922ecfea991e1d7ec0ceb1e115ebe501301a"}, + {file = "mypy-1.13.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7bcb0bb7f42a978bb323a7c88f1081d1b5dee77ca86f4100735a6f541299d8fb"}, + {file = "mypy-1.13.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bde31fc887c213e223bbfc34328070996061b0833b0a4cfec53745ed61f3519b"}, + {file = "mypy-1.13.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:07de989f89786f62b937851295ed62e51774722e5444a27cecca993fc3f9cd74"}, + {file = "mypy-1.13.0-cp38-cp38-win_amd64.whl", hash = "sha256:4bde84334fbe19bad704b3f5b78c4abd35ff1026f8ba72b29de70dda0916beb6"}, + {file = "mypy-1.13.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0246bcb1b5de7f08f2826451abd947bf656945209b140d16ed317f65a17dc7dc"}, + {file = "mypy-1.13.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7f5b7deae912cf8b77e990b9280f170381fdfbddf61b4ef80927edd813163732"}, + {file = "mypy-1.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7029881ec6ffb8bc233a4fa364736789582c738217b133f1b55967115288a2bc"}, + {file = "mypy-1.13.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3e38b980e5681f28f033f3be86b099a247b13c491f14bb8b1e1e134d23bb599d"}, + {file = "mypy-1.13.0-cp39-cp39-win_amd64.whl", hash = "sha256:a6789be98a2017c912ae6ccb77ea553bbaf13d27605d2ca20a76dfbced631b24"}, + {file = "mypy-1.13.0-py3-none-any.whl", hash = "sha256:9c250883f9fd81d212e0952c92dbfcc96fc237f4b7c92f56ac81fd48460b3e5a"}, + {file = "mypy-1.13.0.tar.gz", hash = "sha256:0291a61b6fbf3e6673e3405cfcc0e7650bebc7939659fdca2702958038bd835e"}, ] [package.dependencies] @@ -2801,6 +2801,7 @@ typing-extensions = ">=4.6.0" [package.extras] dmypy = ["psutil (>=4.0)"] +faster-cache = ["orjson"] install-types = ["pip"] mypyc = ["setuptools (>=50)"] reports = ["lxml"] @@ -5224,4 +5225,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "2dcd05168a809a2a9dc0728ee7ed1578ad008d0c3a290bf9a41895b10f2d849a" +content-hash = "9caf73143d9e3b6d71389d7fce3b554c619492bff8567de39847129ed7309af2" diff --git a/pyproject.toml b/pyproject.toml index fea630906..4985c6709 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ google-cloud-secret-manager = "^2.20.0" [tool.poetry.dev-dependencies] pre-commit = "^4.0.0" -mypy = "^1.12" +mypy = "^1.13" pep8-naming = "^0.14.1" interrogate = "^1.7.0" isort = "^5.13.2" From 759857eb9354f390ca089fb781979bed5b40065a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Thu, 31 Oct 2024 15:56:25 +0000 Subject: [PATCH 143/188] feat(trainer): log model explanation with shap (#886) * build: add `shap` * feat(l2g): log feature contributions with shap * chore: update lock file * fix: ignore installed packages in `install_dependencies_on_cluster.sh` (problem with llvm) * build: add `matplotlib` * fix: pin `matplotlib` version to avoid mplDeprecation issue * fix(trainer): set default features_list + minor bugs * chore: pre-commit auto fixes [...] --- poetry.lock | 4122 +++++++++++++--------- pyproject.toml | 2 + src/gentropy/method/l2g/model.py | 19 + src/gentropy/method/l2g/trainer.py | 133 +- utils/install_dependencies_on_cluster.sh | 2 +- 5 files changed, 2561 insertions(+), 1717 deletions(-) diff --git a/poetry.lock b/poetry.lock index 4da042c37..edba6ef99 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiodns" @@ -14,101 +14,128 @@ files = [ [package.dependencies] pycares = ">=3.0.0" +[[package]] +name = "aiohappyeyeballs" +version = "2.4.3" +description = "Happy Eyeballs for asyncio" +optional = false +python-versions = ">=3.8" +files = [ + {file = "aiohappyeyeballs-2.4.3-py3-none-any.whl", hash = "sha256:8a7a83727b2756f394ab2895ea0765a0a8c475e3c71e98d43d76f22b4b435572"}, + {file = "aiohappyeyeballs-2.4.3.tar.gz", hash = "sha256:75cf88a15106a5002a8eb1dab212525c00d1f4c0fa96e551c9fbe6f09a621586"}, +] + [[package]] name = "aiohttp" -version = "3.9.5" +version = "3.10.10" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.8" files = [ - {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fcde4c397f673fdec23e6b05ebf8d4751314fa7c24f93334bf1f1364c1c69ac7"}, - {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d6b3f1fabe465e819aed2c421a6743d8debbde79b6a8600739300630a01bf2c"}, - {file = "aiohttp-3.9.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ae79c1bc12c34082d92bf9422764f799aee4746fd7a392db46b7fd357d4a17a"}, - {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d3ebb9e1316ec74277d19c5f482f98cc65a73ccd5430540d6d11682cd857430"}, - {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84dabd95154f43a2ea80deffec9cb44d2e301e38a0c9d331cc4aa0166fe28ae3"}, - {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8a02fbeca6f63cb1f0475c799679057fc9268b77075ab7cf3f1c600e81dd46b"}, - {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c26959ca7b75ff768e2776d8055bf9582a6267e24556bb7f7bd29e677932be72"}, - {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:714d4e5231fed4ba2762ed489b4aec07b2b9953cf4ee31e9871caac895a839c0"}, - {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e7a6a8354f1b62e15d48e04350f13e726fa08b62c3d7b8401c0a1314f02e3558"}, - {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c413016880e03e69d166efb5a1a95d40f83d5a3a648d16486592c49ffb76d0db"}, - {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ff84aeb864e0fac81f676be9f4685f0527b660f1efdc40dcede3c251ef1e867f"}, - {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ad7f2919d7dac062f24d6f5fe95d401597fbb015a25771f85e692d043c9d7832"}, - {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:702e2c7c187c1a498a4e2b03155d52658fdd6fda882d3d7fbb891a5cf108bb10"}, - {file = "aiohttp-3.9.5-cp310-cp310-win32.whl", hash = "sha256:67c3119f5ddc7261d47163ed86d760ddf0e625cd6246b4ed852e82159617b5fb"}, - {file = "aiohttp-3.9.5-cp310-cp310-win_amd64.whl", hash = "sha256:471f0ef53ccedec9995287f02caf0c068732f026455f07db3f01a46e49d76bbb"}, - {file = "aiohttp-3.9.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e0ae53e33ee7476dd3d1132f932eeb39bf6125083820049d06edcdca4381f342"}, - {file = "aiohttp-3.9.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c088c4d70d21f8ca5c0b8b5403fe84a7bc8e024161febdd4ef04575ef35d474d"}, - {file = "aiohttp-3.9.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:639d0042b7670222f33b0028de6b4e2fad6451462ce7df2af8aee37dcac55424"}, - {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f26383adb94da5e7fb388d441bf09c61e5e35f455a3217bfd790c6b6bc64b2ee"}, - {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66331d00fb28dc90aa606d9a54304af76b335ae204d1836f65797d6fe27f1ca2"}, - {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ff550491f5492ab5ed3533e76b8567f4b37bd2995e780a1f46bca2024223233"}, - {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f22eb3a6c1080d862befa0a89c380b4dafce29dc6cd56083f630073d102eb595"}, - {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a81b1143d42b66ffc40a441379387076243ef7b51019204fd3ec36b9f69e77d6"}, - {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f64fd07515dad67f24b6ea4a66ae2876c01031de91c93075b8093f07c0a2d93d"}, - {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:93e22add827447d2e26d67c9ac0161756007f152fdc5210277d00a85f6c92323"}, - {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:55b39c8684a46e56ef8c8d24faf02de4a2b2ac60d26cee93bc595651ff545de9"}, - {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4715a9b778f4293b9f8ae7a0a7cef9829f02ff8d6277a39d7f40565c737d3771"}, - {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:afc52b8d969eff14e069a710057d15ab9ac17cd4b6753042c407dcea0e40bf75"}, - {file = "aiohttp-3.9.5-cp311-cp311-win32.whl", hash = "sha256:b3df71da99c98534be076196791adca8819761f0bf6e08e07fd7da25127150d6"}, - {file = "aiohttp-3.9.5-cp311-cp311-win_amd64.whl", hash = "sha256:88e311d98cc0bf45b62fc46c66753a83445f5ab20038bcc1b8a1cc05666f428a"}, - {file = "aiohttp-3.9.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:c7a4b7a6cf5b6eb11e109a9755fd4fda7d57395f8c575e166d363b9fc3ec4678"}, - {file = "aiohttp-3.9.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0a158704edf0abcac8ac371fbb54044f3270bdbc93e254a82b6c82be1ef08f3c"}, - {file = "aiohttp-3.9.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d153f652a687a8e95ad367a86a61e8d53d528b0530ef382ec5aaf533140ed00f"}, - {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82a6a97d9771cb48ae16979c3a3a9a18b600a8505b1115cfe354dfb2054468b4"}, - {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60cdbd56f4cad9f69c35eaac0fbbdf1f77b0ff9456cebd4902f3dd1cf096464c"}, - {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8676e8fd73141ded15ea586de0b7cda1542960a7b9ad89b2b06428e97125d4fa"}, - {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da00da442a0e31f1c69d26d224e1efd3a1ca5bcbf210978a2ca7426dfcae9f58"}, - {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18f634d540dd099c262e9f887c8bbacc959847cfe5da7a0e2e1cf3f14dbf2daf"}, - {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:320e8618eda64e19d11bdb3bd04ccc0a816c17eaecb7e4945d01deee2a22f95f"}, - {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:2faa61a904b83142747fc6a6d7ad8fccff898c849123030f8e75d5d967fd4a81"}, - {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:8c64a6dc3fe5db7b1b4d2b5cb84c4f677768bdc340611eca673afb7cf416ef5a"}, - {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:393c7aba2b55559ef7ab791c94b44f7482a07bf7640d17b341b79081f5e5cd1a"}, - {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c671dc117c2c21a1ca10c116cfcd6e3e44da7fcde37bf83b2be485ab377b25da"}, - {file = "aiohttp-3.9.5-cp312-cp312-win32.whl", hash = "sha256:5a7ee16aab26e76add4afc45e8f8206c95d1d75540f1039b84a03c3b3800dd59"}, - {file = "aiohttp-3.9.5-cp312-cp312-win_amd64.whl", hash = "sha256:5ca51eadbd67045396bc92a4345d1790b7301c14d1848feaac1d6a6c9289e888"}, - {file = "aiohttp-3.9.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:694d828b5c41255e54bc2dddb51a9f5150b4eefa9886e38b52605a05d96566e8"}, - {file = "aiohttp-3.9.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0605cc2c0088fcaae79f01c913a38611ad09ba68ff482402d3410bf59039bfb8"}, - {file = "aiohttp-3.9.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4558e5012ee03d2638c681e156461d37b7a113fe13970d438d95d10173d25f78"}, - {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dbc053ac75ccc63dc3a3cc547b98c7258ec35a215a92bd9f983e0aac95d3d5b"}, - {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4109adee842b90671f1b689901b948f347325045c15f46b39797ae1bf17019de"}, - {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6ea1a5b409a85477fd8e5ee6ad8f0e40bf2844c270955e09360418cfd09abac"}, - {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3c2890ca8c59ee683fd09adf32321a40fe1cf164e3387799efb2acebf090c11"}, - {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3916c8692dbd9d55c523374a3b8213e628424d19116ac4308e434dbf6d95bbdd"}, - {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8d1964eb7617907c792ca00b341b5ec3e01ae8c280825deadbbd678447b127e1"}, - {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d5ab8e1f6bee051a4bf6195e38a5c13e5e161cb7bad83d8854524798bd9fcd6e"}, - {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:52c27110f3862a1afbcb2af4281fc9fdc40327fa286c4625dfee247c3ba90156"}, - {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:7f64cbd44443e80094309875d4f9c71d0401e966d191c3d469cde4642bc2e031"}, - {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8b4f72fbb66279624bfe83fd5eb6aea0022dad8eec62b71e7bf63ee1caadeafe"}, - {file = "aiohttp-3.9.5-cp38-cp38-win32.whl", hash = "sha256:6380c039ec52866c06d69b5c7aad5478b24ed11696f0e72f6b807cfb261453da"}, - {file = "aiohttp-3.9.5-cp38-cp38-win_amd64.whl", hash = "sha256:da22dab31d7180f8c3ac7c7635f3bcd53808f374f6aa333fe0b0b9e14b01f91a"}, - {file = "aiohttp-3.9.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1732102949ff6087589408d76cd6dea656b93c896b011ecafff418c9661dc4ed"}, - {file = "aiohttp-3.9.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c6021d296318cb6f9414b48e6a439a7f5d1f665464da507e8ff640848ee2a58a"}, - {file = "aiohttp-3.9.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:239f975589a944eeb1bad26b8b140a59a3a320067fb3cd10b75c3092405a1372"}, - {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b7b30258348082826d274504fbc7c849959f1989d86c29bc355107accec6cfb"}, - {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2adf5c87ff6d8b277814a28a535b59e20bfea40a101db6b3bdca7e9926bc24"}, - {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a3d838441bebcf5cf442700e3963f58b5c33f015341f9ea86dcd7d503c07e2"}, - {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e3a1ae66e3d0c17cf65c08968a5ee3180c5a95920ec2731f53343fac9bad106"}, - {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9c69e77370cce2d6df5d12b4e12bdcca60c47ba13d1cbbc8645dd005a20b738b"}, - {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0cbf56238f4bbf49dab8c2dc2e6b1b68502b1e88d335bea59b3f5b9f4c001475"}, - {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d1469f228cd9ffddd396d9948b8c9cd8022b6d1bf1e40c6f25b0fb90b4f893ed"}, - {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:45731330e754f5811c314901cebdf19dd776a44b31927fa4b4dbecab9e457b0c"}, - {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:3fcb4046d2904378e3aeea1df51f697b0467f2aac55d232c87ba162709478c46"}, - {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8cf142aa6c1a751fcb364158fd710b8a9be874b81889c2bd13aa8893197455e2"}, - {file = "aiohttp-3.9.5-cp39-cp39-win32.whl", hash = "sha256:7b179eea70833c8dee51ec42f3b4097bd6370892fa93f510f76762105568cf09"}, - {file = "aiohttp-3.9.5-cp39-cp39-win_amd64.whl", hash = "sha256:38d80498e2e169bc61418ff36170e0aad0cd268da8b38a17c4cf29d254a8b3f1"}, - {file = "aiohttp-3.9.5.tar.gz", hash = "sha256:edea7d15772ceeb29db4aff55e482d4bcfb6ae160ce144f2682de02f6d693551"}, + {file = "aiohttp-3.10.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:be7443669ae9c016b71f402e43208e13ddf00912f47f623ee5994e12fc7d4b3f"}, + {file = "aiohttp-3.10.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b06b7843929e41a94ea09eb1ce3927865387e3e23ebe108e0d0d09b08d25be9"}, + {file = "aiohttp-3.10.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:333cf6cf8e65f6a1e06e9eb3e643a0c515bb850d470902274239fea02033e9a8"}, + {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:274cfa632350225ce3fdeb318c23b4a10ec25c0e2c880eff951a3842cf358ac1"}, + {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d9e5e4a85bdb56d224f412d9c98ae4cbd032cc4f3161818f692cd81766eee65a"}, + {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b606353da03edcc71130b52388d25f9a30a126e04caef1fd637e31683033abd"}, + {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab5a5a0c7a7991d90446a198689c0535be89bbd6b410a1f9a66688f0880ec026"}, + {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:578a4b875af3e0daaf1ac6fa983d93e0bbfec3ead753b6d6f33d467100cdc67b"}, + {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8105fd8a890df77b76dd3054cddf01a879fc13e8af576805d667e0fa0224c35d"}, + {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3bcd391d083f636c06a68715e69467963d1f9600f85ef556ea82e9ef25f043f7"}, + {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fbc6264158392bad9df19537e872d476f7c57adf718944cc1e4495cbabf38e2a"}, + {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e48d5021a84d341bcaf95c8460b152cfbad770d28e5fe14a768988c461b821bc"}, + {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2609e9ab08474702cc67b7702dbb8a80e392c54613ebe80db7e8dbdb79837c68"}, + {file = "aiohttp-3.10.10-cp310-cp310-win32.whl", hash = "sha256:84afcdea18eda514c25bc68b9af2a2b1adea7c08899175a51fe7c4fb6d551257"}, + {file = "aiohttp-3.10.10-cp310-cp310-win_amd64.whl", hash = "sha256:9c72109213eb9d3874f7ac8c0c5fa90e072d678e117d9061c06e30c85b4cf0e6"}, + {file = "aiohttp-3.10.10-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c30a0eafc89d28e7f959281b58198a9fa5e99405f716c0289b7892ca345fe45f"}, + {file = "aiohttp-3.10.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:258c5dd01afc10015866114e210fb7365f0d02d9d059c3c3415382ab633fcbcb"}, + {file = "aiohttp-3.10.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:15ecd889a709b0080f02721255b3f80bb261c2293d3c748151274dfea93ac871"}, + {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3935f82f6f4a3820270842e90456ebad3af15810cf65932bd24da4463bc0a4c"}, + {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:413251f6fcf552a33c981c4709a6bba37b12710982fec8e558ae944bfb2abd38"}, + {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d1720b4f14c78a3089562b8875b53e36b51c97c51adc53325a69b79b4b48ebcb"}, + {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:679abe5d3858b33c2cf74faec299fda60ea9de62916e8b67e625d65bf069a3b7"}, + {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:79019094f87c9fb44f8d769e41dbb664d6e8fcfd62f665ccce36762deaa0e911"}, + {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fe2fb38c2ed905a2582948e2de560675e9dfbee94c6d5ccdb1301c6d0a5bf092"}, + {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a3f00003de6eba42d6e94fabb4125600d6e484846dbf90ea8e48a800430cc142"}, + {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1bbb122c557a16fafc10354b9d99ebf2f2808a660d78202f10ba9d50786384b9"}, + {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:30ca7c3b94708a9d7ae76ff281b2f47d8eaf2579cd05971b5dc681db8caac6e1"}, + {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:df9270660711670e68803107d55c2b5949c2e0f2e4896da176e1ecfc068b974a"}, + {file = "aiohttp-3.10.10-cp311-cp311-win32.whl", hash = "sha256:aafc8ee9b742ce75044ae9a4d3e60e3d918d15a4c2e08a6c3c3e38fa59b92d94"}, + {file = "aiohttp-3.10.10-cp311-cp311-win_amd64.whl", hash = "sha256:362f641f9071e5f3ee6f8e7d37d5ed0d95aae656adf4ef578313ee585b585959"}, + {file = "aiohttp-3.10.10-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9294bbb581f92770e6ed5c19559e1e99255e4ca604a22c5c6397b2f9dd3ee42c"}, + {file = "aiohttp-3.10.10-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a8fa23fe62c436ccf23ff930149c047f060c7126eae3ccea005f0483f27b2e28"}, + {file = "aiohttp-3.10.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c6a5b8c7926ba5d8545c7dd22961a107526562da31a7a32fa2456baf040939f"}, + {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:007ec22fbc573e5eb2fb7dec4198ef8f6bf2fe4ce20020798b2eb5d0abda6138"}, + {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9627cc1a10c8c409b5822a92d57a77f383b554463d1884008e051c32ab1b3742"}, + {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:50edbcad60d8f0e3eccc68da67f37268b5144ecc34d59f27a02f9611c1d4eec7"}, + {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a45d85cf20b5e0d0aa5a8dca27cce8eddef3292bc29d72dcad1641f4ed50aa16"}, + {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b00807e2605f16e1e198f33a53ce3c4523114059b0c09c337209ae55e3823a8"}, + {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f2d4324a98062be0525d16f768a03e0bbb3b9fe301ceee99611dc9a7953124e6"}, + {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:438cd072f75bb6612f2aca29f8bd7cdf6e35e8f160bc312e49fbecab77c99e3a"}, + {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:baa42524a82f75303f714108fea528ccacf0386af429b69fff141ffef1c534f9"}, + {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a7d8d14fe962153fc681f6366bdec33d4356f98a3e3567782aac1b6e0e40109a"}, + {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c1277cd707c465cd09572a774559a3cc7c7a28802eb3a2a9472588f062097205"}, + {file = "aiohttp-3.10.10-cp312-cp312-win32.whl", hash = "sha256:59bb3c54aa420521dc4ce3cc2c3fe2ad82adf7b09403fa1f48ae45c0cbde6628"}, + {file = "aiohttp-3.10.10-cp312-cp312-win_amd64.whl", hash = "sha256:0e1b370d8007c4ae31ee6db7f9a2fe801a42b146cec80a86766e7ad5c4a259cf"}, + {file = "aiohttp-3.10.10-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ad7593bb24b2ab09e65e8a1d385606f0f47c65b5a2ae6c551db67d6653e78c28"}, + {file = "aiohttp-3.10.10-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1eb89d3d29adaf533588f209768a9c02e44e4baf832b08118749c5fad191781d"}, + {file = "aiohttp-3.10.10-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3fe407bf93533a6fa82dece0e74dbcaaf5d684e5a51862887f9eaebe6372cd79"}, + {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50aed5155f819873d23520919e16703fc8925e509abbb1a1491b0087d1cd969e"}, + {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4f05e9727ce409358baa615dbeb9b969db94324a79b5a5cea45d39bdb01d82e6"}, + {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dffb610a30d643983aeb185ce134f97f290f8935f0abccdd32c77bed9388b42"}, + {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa6658732517ddabe22c9036479eabce6036655ba87a0224c612e1ae6af2087e"}, + {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:741a46d58677d8c733175d7e5aa618d277cd9d880301a380fd296975a9cdd7bc"}, + {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e00e3505cd80440f6c98c6d69269dcc2a119f86ad0a9fd70bccc59504bebd68a"}, + {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ffe595f10566f8276b76dc3a11ae4bb7eba1aac8ddd75811736a15b0d5311414"}, + {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bdfcf6443637c148c4e1a20c48c566aa694fa5e288d34b20fcdc58507882fed3"}, + {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d183cf9c797a5291e8301790ed6d053480ed94070637bfaad914dd38b0981f67"}, + {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:77abf6665ae54000b98b3c742bc6ea1d1fb31c394bcabf8b5d2c1ac3ebfe7f3b"}, + {file = "aiohttp-3.10.10-cp313-cp313-win32.whl", hash = "sha256:4470c73c12cd9109db8277287d11f9dd98f77fc54155fc71a7738a83ffcc8ea8"}, + {file = "aiohttp-3.10.10-cp313-cp313-win_amd64.whl", hash = "sha256:486f7aabfa292719a2753c016cc3a8f8172965cabb3ea2e7f7436c7f5a22a151"}, + {file = "aiohttp-3.10.10-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:1b66ccafef7336a1e1f0e389901f60c1d920102315a56df85e49552308fc0486"}, + {file = "aiohttp-3.10.10-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:acd48d5b80ee80f9432a165c0ac8cbf9253eaddb6113269a5e18699b33958dbb"}, + {file = "aiohttp-3.10.10-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3455522392fb15ff549d92fbf4b73b559d5e43dc522588f7eb3e54c3f38beee7"}, + {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45c3b868724137f713a38376fef8120c166d1eadd50da1855c112fe97954aed8"}, + {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:da1dee8948d2137bb51fbb8a53cce6b1bcc86003c6b42565f008438b806cccd8"}, + {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c5ce2ce7c997e1971b7184ee37deb6ea9922ef5163c6ee5aa3c274b05f9e12fa"}, + {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28529e08fde6f12eba8677f5a8608500ed33c086f974de68cc65ab218713a59d"}, + {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f7db54c7914cc99d901d93a34704833568d86c20925b2762f9fa779f9cd2e70f"}, + {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:03a42ac7895406220124c88911ebee31ba8b2d24c98507f4a8bf826b2937c7f2"}, + {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:7e338c0523d024fad378b376a79faff37fafb3c001872a618cde1d322400a572"}, + {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:038f514fe39e235e9fef6717fbf944057bfa24f9b3db9ee551a7ecf584b5b480"}, + {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:64f6c17757251e2b8d885d728b6433d9d970573586a78b78ba8929b0f41d045a"}, + {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:93429602396f3383a797a2a70e5f1de5df8e35535d7806c9f91df06f297e109b"}, + {file = "aiohttp-3.10.10-cp38-cp38-win32.whl", hash = "sha256:c823bc3971c44ab93e611ab1a46b1eafeae474c0c844aff4b7474287b75fe49c"}, + {file = "aiohttp-3.10.10-cp38-cp38-win_amd64.whl", hash = "sha256:54ca74df1be3c7ca1cf7f4c971c79c2daf48d9aa65dea1a662ae18926f5bc8ce"}, + {file = "aiohttp-3.10.10-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:01948b1d570f83ee7bbf5a60ea2375a89dfb09fd419170e7f5af029510033d24"}, + {file = "aiohttp-3.10.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9fc1500fd2a952c5c8e3b29aaf7e3cc6e27e9cfc0a8819b3bce48cc1b849e4cc"}, + {file = "aiohttp-3.10.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f614ab0c76397661b90b6851a030004dac502e48260ea10f2441abd2207fbcc7"}, + {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00819de9e45d42584bed046314c40ea7e9aea95411b38971082cad449392b08c"}, + {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05646ebe6b94cc93407b3bf34b9eb26c20722384d068eb7339de802154d61bc5"}, + {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:998f3bd3cfc95e9424a6acd7840cbdd39e45bc09ef87533c006f94ac47296090"}, + {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9010c31cd6fa59438da4e58a7f19e4753f7f264300cd152e7f90d4602449762"}, + {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ea7ffc6d6d6f8a11e6f40091a1040995cdff02cfc9ba4c2f30a516cb2633554"}, + {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ef9c33cc5cbca35808f6c74be11eb7f5f6b14d2311be84a15b594bd3e58b5527"}, + {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ce0cdc074d540265bfeb31336e678b4e37316849d13b308607efa527e981f5c2"}, + {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:597a079284b7ee65ee102bc3a6ea226a37d2b96d0418cc9047490f231dc09fe8"}, + {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:7789050d9e5d0c309c706953e5e8876e38662d57d45f936902e176d19f1c58ab"}, + {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e7f8b04d83483577fd9200461b057c9f14ced334dcb053090cea1da9c8321a91"}, + {file = "aiohttp-3.10.10-cp39-cp39-win32.whl", hash = "sha256:c02a30b904282777d872266b87b20ed8cc0d1501855e27f831320f471d54d983"}, + {file = "aiohttp-3.10.10-cp39-cp39-win_amd64.whl", hash = "sha256:edfe3341033a6b53a5c522c802deb2079eee5cbfbb0af032a55064bd65c73a23"}, + {file = "aiohttp-3.10.10.tar.gz", hash = "sha256:0631dd7c9f0822cc61c88586ca76d5b5ada26538097d0f1df510b082bad3411a"}, ] [package.dependencies] +aiohappyeyeballs = ">=2.3.0" aiosignal = ">=1.1.2" async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""} attrs = ">=17.3.0" frozenlist = ">=1.1.1" multidict = ">=4.5,<7.0" -yarl = ">=1.0,<2.0" +yarl = ">=1.12.0,<2.0" [package.extras] -speedups = ["Brotli", "aiodns", "brotlicffi"] +speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"] [[package]] name = "aiosignal" @@ -176,22 +203,22 @@ files = [ [[package]] name = "attrs" -version = "23.2.0" +version = "24.2.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.7" files = [ - {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, - {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, + {file = "attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2"}, + {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"}, ] [package.extras] -cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] -dev = ["attrs[tests]", "pre-commit"] -docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] -tests = ["attrs[tests-no-zope]", "zope-interface"] -tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] -tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] +benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] [[package]] name = "avro" @@ -220,13 +247,13 @@ files = [ [[package]] name = "azure-core" -version = "1.30.2" +version = "1.31.0" description = "Microsoft Azure Core Library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "azure-core-1.30.2.tar.gz", hash = "sha256:a14dc210efcd608821aa472d9fb8e8d035d29b68993819147bc290a8ac224472"}, - {file = "azure_core-1.30.2-py3-none-any.whl", hash = "sha256:cf019c1ca832e96274ae85abd3d9f752397194d9fea3b41487290562ac8abe4a"}, + {file = "azure_core-1.31.0-py3-none-any.whl", hash = "sha256:22954de3777e0250029360ef31d80448ef1be13b80a459bff80ba7073379e2cd"}, + {file = "azure_core-1.31.0.tar.gz", hash = "sha256:656a0dd61e1869b1506b7c6a3b31d62f15984b1a573d6326f6aa2f3e4123284b"}, ] [package.dependencies] @@ -239,20 +266,20 @@ aio = ["aiohttp (>=3.0)"] [[package]] name = "azure-identity" -version = "1.17.1" +version = "1.19.0" description = "Microsoft Azure Identity Library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "azure-identity-1.17.1.tar.gz", hash = "sha256:32ecc67cc73f4bd0595e4f64b1ca65cd05186f4fe6f98ed2ae9f1aa32646efea"}, - {file = "azure_identity-1.17.1-py3-none-any.whl", hash = "sha256:db8d59c183b680e763722bfe8ebc45930e6c57df510620985939f7f3191e0382"}, + {file = "azure_identity-1.19.0-py3-none-any.whl", hash = "sha256:e3f6558c181692d7509f09de10cca527c7dce426776454fb97df512a46527e81"}, + {file = "azure_identity-1.19.0.tar.gz", hash = "sha256:500144dc18197d7019b81501165d4fa92225f03778f17d7ca8a2a180129a9c83"}, ] [package.dependencies] -azure-core = ">=1.23.0" +azure-core = ">=1.31.0" cryptography = ">=2.5" -msal = ">=1.24.0" -msal-extensions = ">=0.3.0" +msal = ">=1.30.0" +msal-extensions = ">=1.2.0" typing-extensions = ">=4.0.0" [[package]] @@ -287,33 +314,33 @@ msrest = ">=0.6.21" [[package]] name = "azure-storage-blob" -version = "12.20.0" +version = "12.23.1" description = "Microsoft Azure Blob Storage Client Library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "azure-storage-blob-12.20.0.tar.gz", hash = "sha256:eeb91256e41d4b5b9bad6a87fd0a8ade07dd58aa52344e2c8d2746e27a017d3b"}, - {file = "azure_storage_blob-12.20.0-py3-none-any.whl", hash = "sha256:de6b3bf3a90e9341a6bcb96a2ebe981dffff993e9045818f6549afea827a52a9"}, + {file = "azure_storage_blob-12.23.1-py3-none-any.whl", hash = "sha256:1c2238aa841d1545f42714a5017c010366137a44a0605da2d45f770174bfc6b4"}, + {file = "azure_storage_blob-12.23.1.tar.gz", hash = "sha256:a587e54d4e39d2a27bd75109db164ffa2058fe194061e5446c5a89bca918272f"}, ] [package.dependencies] -azure-core = ">=1.28.0" +azure-core = ">=1.30.0" cryptography = ">=2.1.4" isodate = ">=0.6.1" typing-extensions = ">=4.6.0" [package.extras] -aio = ["azure-core[aio] (>=1.28.0)"] +aio = ["azure-core[aio] (>=1.30.0)"] [[package]] name = "babel" -version = "2.15.0" +version = "2.16.0" description = "Internationalization utilities" optional = false python-versions = ">=3.8" files = [ - {file = "Babel-2.15.0-py3-none-any.whl", hash = "sha256:08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb"}, - {file = "babel-2.15.0.tar.gz", hash = "sha256:8daf0e265d05768bc6c7a314cf1321e9a123afc328cc635c18622a2f30a04413"}, + {file = "babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b"}, + {file = "babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316"}, ] [package.extras] @@ -342,13 +369,13 @@ lxml = ["lxml"] [[package]] name = "bokeh" -version = "3.4.1" +version = "3.6.0" description = "Interactive plots and applications in the browser from Python" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" files = [ - {file = "bokeh-3.4.1-py3-none-any.whl", hash = "sha256:1e3c502a0a8205338fc74dadbfa321f8a0965441b39501e36796a47b4017b642"}, - {file = "bokeh-3.4.1.tar.gz", hash = "sha256:d824961e4265367b0750ce58b07e564ad0b83ca64b335521cd3421e9b9f10d89"}, + {file = "bokeh-3.6.0-py3-none-any.whl", hash = "sha256:699e0df76cdfe54b5f574738647bd0ce230fa44fa0fcda5923e1f0f550f83d74"}, + {file = "bokeh-3.6.0.tar.gz", hash = "sha256:0032dc1e76ad097b07626e51584685ff48c65481fbaaad105663b1046165867a"}, ] [package.dependencies] @@ -364,17 +391,17 @@ xyzservices = ">=2021.09.1" [[package]] name = "boto3" -version = "1.34.131" +version = "1.35.51" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.34.131-py3-none-any.whl", hash = "sha256:05e388cb937e82be70bfd7eb0c84cf8011ff35cf582a593873ac21675268683b"}, - {file = "boto3-1.34.131.tar.gz", hash = "sha256:dab8f72a6c4e62b4fd70da09e08a6b2a65ea2115b27dd63737142005776ef216"}, + {file = "boto3-1.35.51-py3-none-any.whl", hash = "sha256:c922f6a18958af9d8af0489d6d8503b517029d8159b26aa4859a8294561c72e9"}, + {file = "boto3-1.35.51.tar.gz", hash = "sha256:a57c6c7012ecb40c43e565a6f7a891f39efa990ff933eab63cd456f7501c2731"}, ] [package.dependencies] -botocore = ">=1.34.131,<1.35.0" +botocore = ">=1.35.51,<1.36.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -383,13 +410,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.131" +version = "1.35.51" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.34.131-py3-none-any.whl", hash = "sha256:13b011d7b206ce00727dcee26548fa3b550db9046d5a0e90ac25a6e6c8fde6ef"}, - {file = "botocore-1.34.131.tar.gz", hash = "sha256:502ddafe1d627fcf1e4c007c86454e5dd011dba7c58bd8e8a5368a79f3e387dc"}, + {file = "botocore-1.35.51-py3-none-any.whl", hash = "sha256:4d65b00111bd12b98e9f920ecab602cf619cc6a6d0be6e5dd53f517e4b92901c"}, + {file = "botocore-1.35.51.tar.gz", hash = "sha256:a9b3d1da76b3e896ad74605c01d88f596324a3337393d4bfbfa0d6c35822ca9c"}, ] [package.dependencies] @@ -398,100 +425,115 @@ python-dateutil = ">=2.1,<3.0.0" urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""} [package.extras] -crt = ["awscrt (==0.20.11)"] +crt = ["awscrt (==0.22.0)"] [[package]] name = "bracex" -version = "2.4" +version = "2.5.post1" description = "Bash style brace expander." optional = false python-versions = ">=3.8" files = [ - {file = "bracex-2.4-py3-none-any.whl", hash = "sha256:efdc71eff95eaff5e0f8cfebe7d01adf2c8637c8c92edaf63ef348c241a82418"}, - {file = "bracex-2.4.tar.gz", hash = "sha256:a27eaf1df42cf561fed58b7a8f3fdf129d1ea16a81e1fadd1d17989bc6384beb"}, + {file = "bracex-2.5.post1-py3-none-any.whl", hash = "sha256:13e5732fec27828d6af308628285ad358047cec36801598368cb28bc631dbaf6"}, + {file = "bracex-2.5.post1.tar.gz", hash = "sha256:12c50952415bfa773d2d9ccb8e79651b8cdb1f31a42f6091b804f6ba2b4a66b6"}, ] [[package]] name = "cachetools" -version = "5.3.3" +version = "5.5.0" description = "Extensible memoizing collections and decorators" optional = false python-versions = ">=3.7" files = [ - {file = "cachetools-5.3.3-py3-none-any.whl", hash = "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945"}, - {file = "cachetools-5.3.3.tar.gz", hash = "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105"}, + {file = "cachetools-5.5.0-py3-none-any.whl", hash = "sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292"}, + {file = "cachetools-5.5.0.tar.gz", hash = "sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a"}, ] [[package]] name = "certifi" -version = "2024.6.2" +version = "2024.8.30" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2024.6.2-py3-none-any.whl", hash = "sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56"}, - {file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"}, + {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"}, + {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"}, ] [[package]] name = "cffi" -version = "1.16.0" +version = "1.17.1" description = "Foreign Function Interface for Python calling C code." optional = false python-versions = ">=3.8" files = [ - {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, - {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, - {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, - {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, - {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, - {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, - {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, - {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, - {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, - {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, - {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, - {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, - {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, - {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, - {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, - {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, - {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, - {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, - {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, - {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, - {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, - {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, - {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, - {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, - {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, - {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, - {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, - {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, - {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, - {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, - {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, - {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, + {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, + {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be"}, + {file = "cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c"}, + {file = "cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15"}, + {file = "cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401"}, + {file = "cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b"}, + {file = "cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655"}, + {file = "cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0"}, + {file = "cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4"}, + {file = "cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93"}, + {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3"}, + {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8"}, + {file = "cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65"}, + {file = "cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903"}, + {file = "cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e"}, + {file = "cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd"}, + {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed"}, + {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9"}, + {file = "cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d"}, + {file = "cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a"}, + {file = "cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1"}, + {file = "cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8"}, + {file = "cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1"}, + {file = "cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16"}, + {file = "cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e"}, + {file = "cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7"}, + {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"}, + {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"}, ] [package.dependencies] @@ -510,101 +552,116 @@ files = [ [[package]] name = "charset-normalizer" -version = "3.3.2" +version = "3.4.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7.0" files = [ - {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, - {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4f9fc98dad6c2eaa32fc3af1417d95b5e3d08aff968df0cd320066def971f9a6"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0de7b687289d3c1b3e8660d0741874abe7888100efe14bd0f9fd7141bcbda92b"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5ed2e36c3e9b4f21dd9422f6893dec0abf2cca553af509b10cd630f878d3eb99"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d3ff7fc90b98c637bda91c89d51264a3dcf210cade3a2c6f838c7268d7a4ca"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1110e22af8ca26b90bd6364fe4c763329b0ebf1ee213ba32b68c73de5752323d"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86f4e8cca779080f66ff4f191a685ced73d2f72d50216f7112185dc02b90b9b7"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f683ddc7eedd742e2889d2bfb96d69573fde1d92fcb811979cdb7165bb9c7d3"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27623ba66c183eca01bf9ff833875b459cad267aeeb044477fedac35e19ba907"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f606a1881d2663630ea5b8ce2efe2111740df4b687bd78b34a8131baa007f79b"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0b309d1747110feb25d7ed6b01afdec269c647d382c857ef4663bbe6ad95a912"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:136815f06a3ae311fae551c3df1f998a1ebd01ddd424aa5603a4336997629e95"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:14215b71a762336254351b00ec720a8e85cada43b987da5a042e4ce3e82bd68e"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:79983512b108e4a164b9c8d34de3992f76d48cadc9554c9e60b43f308988aabe"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-win32.whl", hash = "sha256:c94057af19bc953643a33581844649a7fdab902624d2eb739738a30e2b3e60fc"}, + {file = "charset_normalizer-3.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:55f56e2ebd4e3bc50442fbc0888c9d8c94e4e06a933804e2af3e89e2f9c1c749"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0d99dd8ff461990f12d6e42c7347fd9ab2532fb70e9621ba520f9e8637161d7c"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c57516e58fd17d03ebe67e181a4e4e2ccab1168f8c2976c6a334d4f819fe5944"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6dba5d19c4dfab08e58d5b36304b3f92f3bd5d42c1a3fa37b5ba5cdf6dfcbcee"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf4475b82be41b07cc5e5ff94810e6a01f276e37c2d55571e3fe175e467a1a1c"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce031db0408e487fd2775d745ce30a7cd2923667cf3b69d48d219f1d8f5ddeb6"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ff4e7cdfdb1ab5698e675ca622e72d58a6fa2a8aa58195de0c0061288e6e3ea"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3710a9751938947e6327ea9f3ea6332a09bf0ba0c09cae9cb1f250bd1f1549bc"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82357d85de703176b5587dbe6ade8ff67f9f69a41c0733cf2425378b49954de5"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47334db71978b23ebcf3c0f9f5ee98b8d65992b65c9c4f2d34c2eaf5bcaf0594"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8ce7fd6767a1cc5a92a639b391891bf1c268b03ec7e021c7d6d902285259685c"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f1a2f519ae173b5b6a2c9d5fa3116ce16e48b3462c8b96dfdded11055e3d6365"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:63bc5c4ae26e4bc6be6469943b8253c0fd4e4186c43ad46e713ea61a0ba49129"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bcb4f8ea87d03bc51ad04add8ceaf9b0f085ac045ab4d74e73bbc2dc033f0236"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-win32.whl", hash = "sha256:9ae4ef0b3f6b41bad6366fb0ea4fc1d7ed051528e113a60fa2a65a9abb5b1d99"}, + {file = "charset_normalizer-3.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cee4373f4d3ad28f1ab6290684d8e2ebdb9e7a1b74fdc39e4c211995f77bec27"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0713f3adb9d03d49d365b70b84775d0a0d18e4ab08d12bc46baa6132ba78aaf6"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de7376c29d95d6719048c194a9cf1a1b0393fbe8488a22008610b0361d834ecf"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a51b48f42d9358460b78725283f04bddaf44a9358197b889657deba38f329db"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b295729485b06c1a0683af02a9e42d2caa9db04a373dc38a6a58cdd1e8abddf1"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee803480535c44e7f5ad00788526da7d85525cfefaf8acf8ab9a310000be4b03"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d59d125ffbd6d552765510e3f31ed75ebac2c7470c7274195b9161a32350284"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cda06946eac330cbe6598f77bb54e690b4ca93f593dee1568ad22b04f347c15"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07afec21bbbbf8a5cc3651aa96b980afe2526e7f048fdfb7f1014d84acc8b6d8"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6b40e8d38afe634559e398cc32b1472f376a4099c75fe6299ae607e404c033b2"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b8dcd239c743aa2f9c22ce674a145e0a25cb1566c495928440a181ca1ccf6719"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:84450ba661fb96e9fd67629b93d2941c871ca86fc38d835d19d4225ff946a631"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:44aeb140295a2f0659e113b31cfe92c9061622cadbc9e2a2f7b8ef6b1e29ef4b"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1db4e7fefefd0f548d73e2e2e041f9df5c59e178b4c72fbac4cc6f535cfb1565"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-win32.whl", hash = "sha256:5726cf76c982532c1863fb64d8c6dd0e4c90b6ece9feb06c9f202417a31f7dd7"}, + {file = "charset_normalizer-3.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:b197e7094f232959f8f20541ead1d9862ac5ebea1d58e9849c1bf979255dfac9"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dd4eda173a9fcccb5f2e2bd2a9f423d180194b1bf17cf59e3269899235b2a114"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9e3c4c9e1ed40ea53acf11e2a386383c3304212c965773704e4603d589343ed"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92a7e36b000bf022ef3dbb9c46bfe2d52c047d5e3f3343f43204263c5addc250"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54b6a92d009cbe2fb11054ba694bc9e284dad30a26757b1e372a1fdddaf21920"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ffd9493de4c922f2a38c2bf62b831dcec90ac673ed1ca182fe11b4d8e9f2a64"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:35c404d74c2926d0287fbd63ed5d27eb911eb9e4a3bb2c6d294f3cfd4a9e0c23"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4796efc4faf6b53a18e3d46343535caed491776a22af773f366534056c4e1fbc"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7fdd52961feb4c96507aa649550ec2a0d527c086d284749b2f582f2d40a2e0d"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:92db3c28b5b2a273346bebb24857fda45601aef6ae1c011c0a997106581e8a88"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ab973df98fc99ab39080bfb0eb3a925181454d7c3ac8a1e695fddfae696d9e90"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4b67fdab07fdd3c10bb21edab3cbfe8cf5696f453afce75d815d9d7223fbe88b"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:aa41e526a5d4a9dfcfbab0716c7e8a1b215abd3f3df5a45cf18a12721d31cb5d"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ffc519621dce0c767e96b9c53f09c5d215578e10b02c285809f76509a3931482"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-win32.whl", hash = "sha256:f19c1585933c82098c2a520f8ec1227f20e339e33aca8fa6f956f6691b784e67"}, + {file = "charset_normalizer-3.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:707b82d19e65c9bd28b81dde95249b07bf9f5b90ebe1ef17d9b57473f8a64b7b"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:dbe03226baf438ac4fda9e2d0715022fd579cb641c4cf639fa40d53b2fe6f3e2"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd9a8bd8900e65504a305bf8ae6fa9fbc66de94178c420791d0293702fce2df7"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8831399554b92b72af5932cdbbd4ddc55c55f631bb13ff8fe4e6536a06c5c51"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a14969b8691f7998e74663b77b4c36c0337cb1df552da83d5c9004a93afdb574"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dcaf7c1524c0542ee2fc82cc8ec337f7a9f7edee2532421ab200d2b920fc97cf"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425c5f215d0eecee9a56cdb703203dda90423247421bf0d67125add85d0c4455"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:d5b054862739d276e09928de37c79ddeec42a6e1bfc55863be96a36ba22926f6"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:f3e73a4255342d4eb26ef6df01e3962e73aa29baa3124a8e824c5d3364a65748"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:2f6c34da58ea9c1a9515621f4d9ac379871a8f21168ba1b5e09d74250de5ad62"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:f09cb5a7bbe1ecae6e87901a2eb23e0256bb524a79ccc53eb0b7629fbe7677c4"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:0099d79bdfcf5c1f0c2c72f91516702ebf8b0b8ddd8905f97a8aecf49712c621"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-win32.whl", hash = "sha256:9c98230f5042f4945f957d006edccc2af1e03ed5e37ce7c373f00a5a4daa6149"}, + {file = "charset_normalizer-3.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:62f60aebecfc7f4b82e3f639a7d1433a20ec32824db2199a11ad4f5e146ef5ee"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:af73657b7a68211996527dbfeffbb0864e043d270580c5aef06dc4b659a4b578"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cab5d0b79d987c67f3b9e9c53f54a61360422a5a0bc075f43cab5621d530c3b6"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9289fd5dddcf57bab41d044f1756550f9e7cf0c8e373b8cdf0ce8773dc4bd417"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b493a043635eb376e50eedf7818f2f322eabbaa974e948bd8bdd29eb7ef2a51"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fa2566ca27d67c86569e8c85297aaf413ffab85a8960500f12ea34ff98e4c41"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8e538f46104c815be19c975572d74afb53f29650ea2025bbfaef359d2de2f7f"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fd30dc99682dc2c603c2b315bded2799019cea829f8bf57dc6b61efde6611c8"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2006769bd1640bdf4d5641c69a3d63b71b81445473cac5ded39740a226fa88ab"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:dc15e99b2d8a656f8e666854404f1ba54765871104e50c8e9813af8a7db07f12"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:ab2e5bef076f5a235c3774b4f4028a680432cded7cad37bba0fd90d64b187d19"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:4ec9dd88a5b71abfc74e9df5ebe7921c35cbb3b641181a531ca65cdb5e8e4dea"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:43193c5cda5d612f247172016c4bb71251c784d7a4d9314677186a838ad34858"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:aa693779a8b50cd97570e5a0f343538a8dbd3e496fa5dcb87e29406ad0299654"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-win32.whl", hash = "sha256:7706f5850360ac01d80c89bcef1640683cc12ed87f42579dab6c5d3ed6888613"}, + {file = "charset_normalizer-3.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:c3e446d253bd88f6377260d07c895816ebf33ffffd56c1c792b13bff9c3e1ade"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:980b4f289d1d90ca5efcf07958d3eb38ed9c0b7676bf2831a54d4f66f9c27dfa"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f28f891ccd15c514a0981f3b9db9aa23d62fe1a99997512b0491d2ed323d229a"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8aacce6e2e1edcb6ac625fb0f8c3a9570ccc7bfba1f63419b3769ccf6a00ed0"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd7af3717683bea4c87acd8c0d3d5b44d56120b26fd3f8a692bdd2d5260c620a"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ff2ed8194587faf56555927b3aa10e6fb69d931e33953943bc4f837dfee2242"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e91f541a85298cf35433bf66f3fab2a4a2cff05c127eeca4af174f6d497f0d4b"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:309a7de0a0ff3040acaebb35ec45d18db4b28232f21998851cfa709eeff49d62"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:285e96d9d53422efc0d7a17c60e59f37fbf3dfa942073f666db4ac71e8d726d0"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5d447056e2ca60382d460a604b6302d8db69476fd2015c81e7c35417cfabe4cd"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:20587d20f557fe189b7947d8e7ec5afa110ccf72a3128d61a2a387c3313f46be"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:130272c698667a982a5d0e626851ceff662565379baf0ff2cc58067b81d4f11d"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:ab22fbd9765e6954bc0bcff24c25ff71dcbfdb185fcdaca49e81bac68fe724d3"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7782afc9b6b42200f7362858f9e73b1f8316afb276d316336c0ec3bd73312742"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-win32.whl", hash = "sha256:2de62e8801ddfff069cd5c504ce3bc9672b23266597d4e4f50eda28846c322f2"}, + {file = "charset_normalizer-3.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:95c3c157765b031331dd4db3c775e58deaee050a3042fcad72cbc4189d7c8dca"}, + {file = "charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079"}, + {file = "charset_normalizer-3.4.0.tar.gz", hash = "sha256:223217c3d4f82c3ac5e29032b3f1c2eb0fb591b72161f86d93f5719079dae93e"}, ] [[package]] @@ -621,6 +678,17 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} +[[package]] +name = "cloudpickle" +version = "3.1.0" +description = "Pickler class to extend the standard pickle.Pickler functionality" +optional = false +python-versions = ">=3.8" +files = [ + {file = "cloudpickle-3.1.0-py3-none-any.whl", hash = "sha256:fe11acda67f61aaaec473e3afe030feb131d78a43461b718185363384f1ba12e"}, + {file = "cloudpickle-3.1.0.tar.gz", hash = "sha256:81a929b6e3c7335c863c771d673d105f02efdb89dfaba0c90495d1c64796601b"}, +] + [[package]] name = "colorama" version = "0.4.6" @@ -665,126 +733,157 @@ test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"] [[package]] name = "contourpy" -version = "1.2.1" +version = "1.3.0" description = "Python library for calculating contours of 2D quadrilateral grids" optional = false python-versions = ">=3.9" files = [ - {file = "contourpy-1.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bd7c23df857d488f418439686d3b10ae2fbf9bc256cd045b37a8c16575ea1040"}, - {file = "contourpy-1.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5b9eb0ca724a241683c9685a484da9d35c872fd42756574a7cfbf58af26677fd"}, - {file = "contourpy-1.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c75507d0a55378240f781599c30e7776674dbaf883a46d1c90f37e563453480"}, - {file = "contourpy-1.2.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:11959f0ce4a6f7b76ec578576a0b61a28bdc0696194b6347ba3f1c53827178b9"}, - {file = "contourpy-1.2.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb3315a8a236ee19b6df481fc5f997436e8ade24a9f03dfdc6bd490fea20c6da"}, - {file = "contourpy-1.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39f3ecaf76cd98e802f094e0d4fbc6dc9c45a8d0c4d185f0f6c2234e14e5f75b"}, - {file = "contourpy-1.2.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:94b34f32646ca0414237168d68a9157cb3889f06b096612afdd296003fdd32fd"}, - {file = "contourpy-1.2.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:457499c79fa84593f22454bbd27670227874cd2ff5d6c84e60575c8b50a69619"}, - {file = "contourpy-1.2.1-cp310-cp310-win32.whl", hash = "sha256:ac58bdee53cbeba2ecad824fa8159493f0bf3b8ea4e93feb06c9a465d6c87da8"}, - {file = "contourpy-1.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:9cffe0f850e89d7c0012a1fb8730f75edd4320a0a731ed0c183904fe6ecfc3a9"}, - {file = "contourpy-1.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6022cecf8f44e36af10bd9118ca71f371078b4c168b6e0fab43d4a889985dbb5"}, - {file = "contourpy-1.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ef5adb9a3b1d0c645ff694f9bca7702ec2c70f4d734f9922ea34de02294fdf72"}, - {file = "contourpy-1.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6150ffa5c767bc6332df27157d95442c379b7dce3a38dff89c0f39b63275696f"}, - {file = "contourpy-1.2.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4c863140fafc615c14a4bf4efd0f4425c02230eb8ef02784c9a156461e62c965"}, - {file = "contourpy-1.2.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:00e5388f71c1a0610e6fe56b5c44ab7ba14165cdd6d695429c5cd94021e390b2"}, - {file = "contourpy-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4492d82b3bc7fbb7e3610747b159869468079fe149ec5c4d771fa1f614a14df"}, - {file = "contourpy-1.2.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:49e70d111fee47284d9dd867c9bb9a7058a3c617274900780c43e38d90fe1205"}, - {file = "contourpy-1.2.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b59c0ffceff8d4d3996a45f2bb6f4c207f94684a96bf3d9728dbb77428dd8cb8"}, - {file = "contourpy-1.2.1-cp311-cp311-win32.whl", hash = "sha256:7b4182299f251060996af5249c286bae9361fa8c6a9cda5efc29fe8bfd6062ec"}, - {file = "contourpy-1.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2855c8b0b55958265e8b5888d6a615ba02883b225f2227461aa9127c578a4922"}, - {file = "contourpy-1.2.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:62828cada4a2b850dbef89c81f5a33741898b305db244904de418cc957ff05dc"}, - {file = "contourpy-1.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:309be79c0a354afff9ff7da4aaed7c3257e77edf6c1b448a779329431ee79d7e"}, - {file = "contourpy-1.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e785e0f2ef0d567099b9ff92cbfb958d71c2d5b9259981cd9bee81bd194c9a4"}, - {file = "contourpy-1.2.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1cac0a8f71a041aa587410424ad46dfa6a11f6149ceb219ce7dd48f6b02b87a7"}, - {file = "contourpy-1.2.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af3f4485884750dddd9c25cb7e3915d83c2db92488b38ccb77dd594eac84c4a0"}, - {file = "contourpy-1.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ce6889abac9a42afd07a562c2d6d4b2b7134f83f18571d859b25624a331c90b"}, - {file = "contourpy-1.2.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a1eea9aecf761c661d096d39ed9026574de8adb2ae1c5bd7b33558af884fb2ce"}, - {file = "contourpy-1.2.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:187fa1d4c6acc06adb0fae5544c59898ad781409e61a926ac7e84b8f276dcef4"}, - {file = "contourpy-1.2.1-cp312-cp312-win32.whl", hash = "sha256:c2528d60e398c7c4c799d56f907664673a807635b857df18f7ae64d3e6ce2d9f"}, - {file = "contourpy-1.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:1a07fc092a4088ee952ddae19a2b2a85757b923217b7eed584fdf25f53a6e7ce"}, - {file = "contourpy-1.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bb6834cbd983b19f06908b45bfc2dad6ac9479ae04abe923a275b5f48f1a186b"}, - {file = "contourpy-1.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1d59e739ab0e3520e62a26c60707cc3ab0365d2f8fecea74bfe4de72dc56388f"}, - {file = "contourpy-1.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd3db01f59fdcbce5b22afad19e390260d6d0222f35a1023d9adc5690a889364"}, - {file = "contourpy-1.2.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a12a813949e5066148712a0626895c26b2578874e4cc63160bb007e6df3436fe"}, - {file = "contourpy-1.2.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe0ccca550bb8e5abc22f530ec0466136379c01321fd94f30a22231e8a48d985"}, - {file = "contourpy-1.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1d59258c3c67c865435d8fbeb35f8c59b8bef3d6f46c1f29f6123556af28445"}, - {file = "contourpy-1.2.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f32c38afb74bd98ce26de7cc74a67b40afb7b05aae7b42924ea990d51e4dac02"}, - {file = "contourpy-1.2.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d31a63bc6e6d87f77d71e1abbd7387ab817a66733734883d1fc0021ed9bfa083"}, - {file = "contourpy-1.2.1-cp39-cp39-win32.whl", hash = "sha256:ddcb8581510311e13421b1f544403c16e901c4e8f09083c881fab2be80ee31ba"}, - {file = "contourpy-1.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:10a37ae557aabf2509c79715cd20b62e4c7c28b8cd62dd7d99e5ed3ce28c3fd9"}, - {file = "contourpy-1.2.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a31f94983fecbac95e58388210427d68cd30fe8a36927980fab9c20062645609"}, - {file = "contourpy-1.2.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef2b055471c0eb466033760a521efb9d8a32b99ab907fc8358481a1dd29e3bd3"}, - {file = "contourpy-1.2.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:b33d2bc4f69caedcd0a275329eb2198f560b325605810895627be5d4b876bf7f"}, - {file = "contourpy-1.2.1.tar.gz", hash = "sha256:4d8908b3bee1c889e547867ca4cdc54e5ab6be6d3e078556814a22457f49423c"}, + {file = "contourpy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:880ea32e5c774634f9fcd46504bf9f080a41ad855f4fef54f5380f5133d343c7"}, + {file = "contourpy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:76c905ef940a4474a6289c71d53122a4f77766eef23c03cd57016ce19d0f7b42"}, + {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92f8557cbb07415a4d6fa191f20fd9d2d9eb9c0b61d1b2f52a8926e43c6e9af7"}, + {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36f965570cff02b874773c49bfe85562b47030805d7d8360748f3eca570f4cab"}, + {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cacd81e2d4b6f89c9f8a5b69b86490152ff39afc58a95af002a398273e5ce589"}, + {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69375194457ad0fad3a839b9e29aa0b0ed53bb54db1bfb6c3ae43d111c31ce41"}, + {file = "contourpy-1.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a52040312b1a858b5e31ef28c2e865376a386c60c0e248370bbea2d3f3b760d"}, + {file = "contourpy-1.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3faeb2998e4fcb256542e8a926d08da08977f7f5e62cf733f3c211c2a5586223"}, + {file = "contourpy-1.3.0-cp310-cp310-win32.whl", hash = "sha256:36e0cff201bcb17a0a8ecc7f454fe078437fa6bda730e695a92f2d9932bd507f"}, + {file = "contourpy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:87ddffef1dbe5e669b5c2440b643d3fdd8622a348fe1983fad7a0f0ccb1cd67b"}, + {file = "contourpy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fa4c02abe6c446ba70d96ece336e621efa4aecae43eaa9b030ae5fb92b309ad"}, + {file = "contourpy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:834e0cfe17ba12f79963861e0f908556b2cedd52e1f75e6578801febcc6a9f49"}, + {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbc4c3217eee163fa3984fd1567632b48d6dfd29216da3ded3d7b844a8014a66"}, + {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4865cd1d419e0c7a7bf6de1777b185eebdc51470800a9f42b9e9decf17762081"}, + {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:303c252947ab4b14c08afeb52375b26781ccd6a5ccd81abcdfc1fafd14cf93c1"}, + {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637f674226be46f6ba372fd29d9523dd977a291f66ab2a74fbeb5530bb3f445d"}, + {file = "contourpy-1.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:76a896b2f195b57db25d6b44e7e03f221d32fe318d03ede41f8b4d9ba1bff53c"}, + {file = "contourpy-1.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e1fd23e9d01591bab45546c089ae89d926917a66dceb3abcf01f6105d927e2cb"}, + {file = "contourpy-1.3.0-cp311-cp311-win32.whl", hash = "sha256:d402880b84df3bec6eab53cd0cf802cae6a2ef9537e70cf75e91618a3801c20c"}, + {file = "contourpy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:6cb6cc968059db9c62cb35fbf70248f40994dfcd7aa10444bbf8b3faeb7c2d67"}, + {file = "contourpy-1.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:570ef7cf892f0afbe5b2ee410c507ce12e15a5fa91017a0009f79f7d93a1268f"}, + {file = "contourpy-1.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:da84c537cb8b97d153e9fb208c221c45605f73147bd4cadd23bdae915042aad6"}, + {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0be4d8425bfa755e0fd76ee1e019636ccc7c29f77a7c86b4328a9eb6a26d0639"}, + {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c0da700bf58f6e0b65312d0a5e695179a71d0163957fa381bb3c1f72972537c"}, + {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb8b141bb00fa977d9122636b16aa67d37fd40a3d8b52dd837e536d64b9a4d06"}, + {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3634b5385c6716c258d0419c46d05c8aa7dc8cb70326c9a4fb66b69ad2b52e09"}, + {file = "contourpy-1.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0dce35502151b6bd35027ac39ba6e5a44be13a68f55735c3612c568cac3805fd"}, + {file = "contourpy-1.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea348f053c645100612b333adc5983d87be69acdc6d77d3169c090d3b01dc35"}, + {file = "contourpy-1.3.0-cp312-cp312-win32.whl", hash = "sha256:90f73a5116ad1ba7174341ef3ea5c3150ddf20b024b98fb0c3b29034752c8aeb"}, + {file = "contourpy-1.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:b11b39aea6be6764f84360fce6c82211a9db32a7c7de8fa6dd5397cf1d079c3b"}, + {file = "contourpy-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3e1c7fa44aaae40a2247e2e8e0627f4bea3dd257014764aa644f319a5f8600e3"}, + {file = "contourpy-1.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:364174c2a76057feef647c802652f00953b575723062560498dc7930fc9b1cb7"}, + {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32b238b3b3b649e09ce9aaf51f0c261d38644bdfa35cbaf7b263457850957a84"}, + {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d51fca85f9f7ad0b65b4b9fe800406d0d77017d7270d31ec3fb1cc07358fdea0"}, + {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:732896af21716b29ab3e988d4ce14bc5133733b85956316fb0c56355f398099b"}, + {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d73f659398a0904e125280836ae6f88ba9b178b2fed6884f3b1f95b989d2c8da"}, + {file = "contourpy-1.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c6c7c2408b7048082932cf4e641fa3b8ca848259212f51c8c59c45aa7ac18f14"}, + {file = "contourpy-1.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f317576606de89da6b7e0861cf6061f6146ead3528acabff9236458a6ba467f8"}, + {file = "contourpy-1.3.0-cp313-cp313-win32.whl", hash = "sha256:31cd3a85dbdf1fc002280c65caa7e2b5f65e4a973fcdf70dd2fdcb9868069294"}, + {file = "contourpy-1.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:4553c421929ec95fb07b3aaca0fae668b2eb5a5203d1217ca7c34c063c53d087"}, + {file = "contourpy-1.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:345af746d7766821d05d72cb8f3845dfd08dd137101a2cb9b24de277d716def8"}, + {file = "contourpy-1.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3bb3808858a9dc68f6f03d319acd5f1b8a337e6cdda197f02f4b8ff67ad2057b"}, + {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:420d39daa61aab1221567b42eecb01112908b2cab7f1b4106a52caaec8d36973"}, + {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d63ee447261e963af02642ffcb864e5a2ee4cbfd78080657a9880b8b1868e18"}, + {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:167d6c890815e1dac9536dca00828b445d5d0df4d6a8c6adb4a7ec3166812fa8"}, + {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:710a26b3dc80c0e4febf04555de66f5fd17e9cf7170a7b08000601a10570bda6"}, + {file = "contourpy-1.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:75ee7cb1a14c617f34a51d11fa7524173e56551646828353c4af859c56b766e2"}, + {file = "contourpy-1.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:33c92cdae89ec5135d036e7218e69b0bb2851206077251f04a6c4e0e21f03927"}, + {file = "contourpy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a11077e395f67ffc2c44ec2418cfebed032cd6da3022a94fc227b6faf8e2acb8"}, + {file = "contourpy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e8134301d7e204c88ed7ab50028ba06c683000040ede1d617298611f9dc6240c"}, + {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e12968fdfd5bb45ffdf6192a590bd8ddd3ba9e58360b29683c6bb71a7b41edca"}, + {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fd2a0fc506eccaaa7595b7e1418951f213cf8255be2600f1ea1b61e46a60c55f"}, + {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4cfb5c62ce023dfc410d6059c936dcf96442ba40814aefbfa575425a3a7f19dc"}, + {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68a32389b06b82c2fdd68276148d7b9275b5f5cf13e5417e4252f6d1a34f72a2"}, + {file = "contourpy-1.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:94e848a6b83da10898cbf1311a815f770acc9b6a3f2d646f330d57eb4e87592e"}, + {file = "contourpy-1.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d78ab28a03c854a873787a0a42254a0ccb3cb133c672f645c9f9c8f3ae9d0800"}, + {file = "contourpy-1.3.0-cp39-cp39-win32.whl", hash = "sha256:81cb5ed4952aae6014bc9d0421dec7c5835c9c8c31cdf51910b708f548cf58e5"}, + {file = "contourpy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:14e262f67bd7e6eb6880bc564dcda30b15e351a594657e55b7eec94b6ef72843"}, + {file = "contourpy-1.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fe41b41505a5a33aeaed2a613dccaeaa74e0e3ead6dd6fd3a118fb471644fd6c"}, + {file = "contourpy-1.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eca7e17a65f72a5133bdbec9ecf22401c62bcf4821361ef7811faee695799779"}, + {file = "contourpy-1.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:1ec4dc6bf570f5b22ed0d7efba0dfa9c5b9e0431aeea7581aa217542d9e809a4"}, + {file = "contourpy-1.3.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:00ccd0dbaad6d804ab259820fa7cb0b8036bda0686ef844d24125d8287178ce0"}, + {file = "contourpy-1.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ca947601224119117f7c19c9cdf6b3ab54c5726ef1d906aa4a69dfb6dd58102"}, + {file = "contourpy-1.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6ec93afeb848a0845a18989da3beca3eec2c0f852322efe21af1931147d12cb"}, + {file = "contourpy-1.3.0.tar.gz", hash = "sha256:7ffa0db17717a8ffb127efd0c95a4362d996b892c2904db72428d5b52e1938a4"}, ] [package.dependencies] -numpy = ">=1.20" +numpy = ">=1.23" [package.extras] bokeh = ["bokeh", "selenium"] docs = ["furo", "sphinx (>=7.2)", "sphinx-copybutton"] -mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.8.0)", "types-Pillow"] +mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.11.1)", "types-Pillow"] test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] -test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"] +test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"] [[package]] name = "coverage" -version = "7.5.4" +version = "7.6.4" description = "Code coverage measurement for Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "coverage-7.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6cfb5a4f556bb51aba274588200a46e4dd6b505fb1a5f8c5ae408222eb416f99"}, - {file = "coverage-7.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2174e7c23e0a454ffe12267a10732c273243b4f2d50d07544a91198f05c48f47"}, - {file = "coverage-7.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2214ee920787d85db1b6a0bd9da5f8503ccc8fcd5814d90796c2f2493a2f4d2e"}, - {file = "coverage-7.5.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1137f46adb28e3813dec8c01fefadcb8c614f33576f672962e323b5128d9a68d"}, - {file = "coverage-7.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b385d49609f8e9efc885790a5a0e89f2e3ae042cdf12958b6034cc442de428d3"}, - {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b4a474f799456e0eb46d78ab07303286a84a3140e9700b9e154cfebc8f527016"}, - {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5cd64adedf3be66f8ccee418473c2916492d53cbafbfcff851cbec5a8454b136"}, - {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e564c2cf45d2f44a9da56f4e3a26b2236504a496eb4cb0ca7221cd4cc7a9aca9"}, - {file = "coverage-7.5.4-cp310-cp310-win32.whl", hash = "sha256:7076b4b3a5f6d2b5d7f1185fde25b1e54eb66e647a1dfef0e2c2bfaf9b4c88c8"}, - {file = "coverage-7.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:018a12985185038a5b2bcafab04ab833a9a0f2c59995b3cec07e10074c78635f"}, - {file = "coverage-7.5.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:db14f552ac38f10758ad14dd7b983dbab424e731588d300c7db25b6f89e335b5"}, - {file = "coverage-7.5.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3257fdd8e574805f27bb5342b77bc65578e98cbc004a92232106344053f319ba"}, - {file = "coverage-7.5.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a6612c99081d8d6134005b1354191e103ec9705d7ba2754e848211ac8cacc6b"}, - {file = "coverage-7.5.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d45d3cbd94159c468b9b8c5a556e3f6b81a8d1af2a92b77320e887c3e7a5d080"}, - {file = "coverage-7.5.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed550e7442f278af76d9d65af48069f1fb84c9f745ae249c1a183c1e9d1b025c"}, - {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7a892be37ca35eb5019ec85402c3371b0f7cda5ab5056023a7f13da0961e60da"}, - {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8192794d120167e2a64721d88dbd688584675e86e15d0569599257566dec9bf0"}, - {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:820bc841faa502e727a48311948e0461132a9c8baa42f6b2b84a29ced24cc078"}, - {file = "coverage-7.5.4-cp311-cp311-win32.whl", hash = "sha256:6aae5cce399a0f065da65c7bb1e8abd5c7a3043da9dceb429ebe1b289bc07806"}, - {file = "coverage-7.5.4-cp311-cp311-win_amd64.whl", hash = "sha256:d2e344d6adc8ef81c5a233d3a57b3c7d5181f40e79e05e1c143da143ccb6377d"}, - {file = "coverage-7.5.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:54317c2b806354cbb2dc7ac27e2b93f97096912cc16b18289c5d4e44fc663233"}, - {file = "coverage-7.5.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:042183de01f8b6d531e10c197f7f0315a61e8d805ab29c5f7b51a01d62782747"}, - {file = "coverage-7.5.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6bb74ed465d5fb204b2ec41d79bcd28afccf817de721e8a807d5141c3426638"}, - {file = "coverage-7.5.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3d45ff86efb129c599a3b287ae2e44c1e281ae0f9a9bad0edc202179bcc3a2e"}, - {file = "coverage-7.5.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5013ed890dc917cef2c9f765c4c6a8ae9df983cd60dbb635df8ed9f4ebc9f555"}, - {file = "coverage-7.5.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1014fbf665fef86cdfd6cb5b7371496ce35e4d2a00cda501cf9f5b9e6fced69f"}, - {file = "coverage-7.5.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3684bc2ff328f935981847082ba4fdc950d58906a40eafa93510d1b54c08a66c"}, - {file = "coverage-7.5.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:581ea96f92bf71a5ec0974001f900db495488434a6928a2ca7f01eee20c23805"}, - {file = "coverage-7.5.4-cp312-cp312-win32.whl", hash = "sha256:73ca8fbc5bc622e54627314c1a6f1dfdd8db69788f3443e752c215f29fa87a0b"}, - {file = "coverage-7.5.4-cp312-cp312-win_amd64.whl", hash = "sha256:cef4649ec906ea7ea5e9e796e68b987f83fa9a718514fe147f538cfeda76d7a7"}, - {file = "coverage-7.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cdd31315fc20868c194130de9ee6bfd99755cc9565edff98ecc12585b90be882"}, - {file = "coverage-7.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:02ff6e898197cc1e9fa375581382b72498eb2e6d5fc0b53f03e496cfee3fac6d"}, - {file = "coverage-7.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d05c16cf4b4c2fc880cb12ba4c9b526e9e5d5bb1d81313d4d732a5b9fe2b9d53"}, - {file = "coverage-7.5.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c5986ee7ea0795a4095ac4d113cbb3448601efca7f158ec7f7087a6c705304e4"}, - {file = "coverage-7.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5df54843b88901fdc2f598ac06737f03d71168fd1175728054c8f5a2739ac3e4"}, - {file = "coverage-7.5.4-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:ab73b35e8d109bffbda9a3e91c64e29fe26e03e49addf5b43d85fc426dde11f9"}, - {file = "coverage-7.5.4-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:aea072a941b033813f5e4814541fc265a5c12ed9720daef11ca516aeacd3bd7f"}, - {file = "coverage-7.5.4-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:16852febd96acd953b0d55fc842ce2dac1710f26729b31c80b940b9afcd9896f"}, - {file = "coverage-7.5.4-cp38-cp38-win32.whl", hash = "sha256:8f894208794b164e6bd4bba61fc98bf6b06be4d390cf2daacfa6eca0a6d2bb4f"}, - {file = "coverage-7.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:e2afe743289273209c992075a5a4913e8d007d569a406ffed0bd080ea02b0633"}, - {file = "coverage-7.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b95c3a8cb0463ba9f77383d0fa8c9194cf91f64445a63fc26fb2327e1e1eb088"}, - {file = "coverage-7.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3d7564cc09dd91b5a6001754a5b3c6ecc4aba6323baf33a12bd751036c998be4"}, - {file = "coverage-7.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44da56a2589b684813f86d07597fdf8a9c6ce77f58976727329272f5a01f99f7"}, - {file = "coverage-7.5.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e16f3d6b491c48c5ae726308e6ab1e18ee830b4cdd6913f2d7f77354b33f91c8"}, - {file = "coverage-7.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbc5958cb471e5a5af41b0ddaea96a37e74ed289535e8deca404811f6cb0bc3d"}, - {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a04e990a2a41740b02d6182b498ee9796cf60eefe40cf859b016650147908029"}, - {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ddbd2f9713a79e8e7242d7c51f1929611e991d855f414ca9996c20e44a895f7c"}, - {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b1ccf5e728ccf83acd313c89f07c22d70d6c375a9c6f339233dcf792094bcbf7"}, - {file = "coverage-7.5.4-cp39-cp39-win32.whl", hash = "sha256:56b4eafa21c6c175b3ede004ca12c653a88b6f922494b023aeb1e836df953ace"}, - {file = "coverage-7.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:65e528e2e921ba8fd67d9055e6b9f9e34b21ebd6768ae1c1723f4ea6ace1234d"}, - {file = "coverage-7.5.4-pp38.pp39.pp310-none-any.whl", hash = "sha256:79b356f3dd5b26f3ad23b35c75dbdaf1f9e2450b6bcefc6d0825ea0aa3f86ca5"}, - {file = "coverage-7.5.4.tar.gz", hash = "sha256:a44963520b069e12789d0faea4e9fdb1e410cdc4aab89d94f7f55cbb7fef0353"}, + {file = "coverage-7.6.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f8ae553cba74085db385d489c7a792ad66f7f9ba2ee85bfa508aeb84cf0ba07"}, + {file = "coverage-7.6.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8165b796df0bd42e10527a3f493c592ba494f16ef3c8b531288e3d0d72c1f6f0"}, + {file = "coverage-7.6.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7c8b95bf47db6d19096a5e052ffca0a05f335bc63cef281a6e8fe864d450a72"}, + {file = "coverage-7.6.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ed9281d1b52628e81393f5eaee24a45cbd64965f41857559c2b7ff19385df51"}, + {file = "coverage-7.6.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0809082ee480bb8f7416507538243c8863ac74fd8a5d2485c46f0f7499f2b491"}, + {file = "coverage-7.6.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d541423cdd416b78626b55f123412fcf979d22a2c39fce251b350de38c15c15b"}, + {file = "coverage-7.6.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:58809e238a8a12a625c70450b48e8767cff9eb67c62e6154a642b21ddf79baea"}, + {file = "coverage-7.6.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c9b8e184898ed014884ca84c70562b4a82cbc63b044d366fedc68bc2b2f3394a"}, + {file = "coverage-7.6.4-cp310-cp310-win32.whl", hash = "sha256:6bd818b7ea14bc6e1f06e241e8234508b21edf1b242d49831831a9450e2f35fa"}, + {file = "coverage-7.6.4-cp310-cp310-win_amd64.whl", hash = "sha256:06babbb8f4e74b063dbaeb74ad68dfce9186c595a15f11f5d5683f748fa1d172"}, + {file = "coverage-7.6.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:73d2b73584446e66ee633eaad1a56aad577c077f46c35ca3283cd687b7715b0b"}, + {file = "coverage-7.6.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:51b44306032045b383a7a8a2c13878de375117946d68dcb54308111f39775a25"}, + {file = "coverage-7.6.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b3fb02fe73bed561fa12d279a417b432e5b50fe03e8d663d61b3d5990f29546"}, + {file = "coverage-7.6.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed8fe9189d2beb6edc14d3ad19800626e1d9f2d975e436f84e19efb7fa19469b"}, + {file = "coverage-7.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b369ead6527d025a0fe7bd3864e46dbee3aa8f652d48df6174f8d0bac9e26e0e"}, + {file = "coverage-7.6.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ade3ca1e5f0ff46b678b66201f7ff477e8fa11fb537f3b55c3f0568fbfe6e718"}, + {file = "coverage-7.6.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:27fb4a050aaf18772db513091c9c13f6cb94ed40eacdef8dad8411d92d9992db"}, + {file = "coverage-7.6.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4f704f0998911abf728a7783799444fcbbe8261c4a6c166f667937ae6a8aa522"}, + {file = "coverage-7.6.4-cp311-cp311-win32.whl", hash = "sha256:29155cd511ee058e260db648b6182c419422a0d2e9a4fa44501898cf918866cf"}, + {file = "coverage-7.6.4-cp311-cp311-win_amd64.whl", hash = "sha256:8902dd6a30173d4ef09954bfcb24b5d7b5190cf14a43170e386979651e09ba19"}, + {file = "coverage-7.6.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:12394842a3a8affa3ba62b0d4ab7e9e210c5e366fbac3e8b2a68636fb19892c2"}, + {file = "coverage-7.6.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b6b4c83d8e8ea79f27ab80778c19bc037759aea298da4b56621f4474ffeb117"}, + {file = "coverage-7.6.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d5b8007f81b88696d06f7df0cb9af0d3b835fe0c8dbf489bad70b45f0e45613"}, + {file = "coverage-7.6.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b57b768feb866f44eeed9f46975f3d6406380275c5ddfe22f531a2bf187eda27"}, + {file = "coverage-7.6.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5915fcdec0e54ee229926868e9b08586376cae1f5faa9bbaf8faf3561b393d52"}, + {file = "coverage-7.6.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b58c672d14f16ed92a48db984612f5ce3836ae7d72cdd161001cc54512571f2"}, + {file = "coverage-7.6.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:2fdef0d83a2d08d69b1f2210a93c416d54e14d9eb398f6ab2f0a209433db19e1"}, + {file = "coverage-7.6.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8cf717ee42012be8c0cb205dbbf18ffa9003c4cbf4ad078db47b95e10748eec5"}, + {file = "coverage-7.6.4-cp312-cp312-win32.whl", hash = "sha256:7bb92c539a624cf86296dd0c68cd5cc286c9eef2d0c3b8b192b604ce9de20a17"}, + {file = "coverage-7.6.4-cp312-cp312-win_amd64.whl", hash = "sha256:1032e178b76a4e2b5b32e19d0fd0abbce4b58e77a1ca695820d10e491fa32b08"}, + {file = "coverage-7.6.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:023bf8ee3ec6d35af9c1c6ccc1d18fa69afa1cb29eaac57cb064dbb262a517f9"}, + {file = "coverage-7.6.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b0ac3d42cb51c4b12df9c5f0dd2f13a4f24f01943627120ec4d293c9181219ba"}, + {file = "coverage-7.6.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8fe4984b431f8621ca53d9380901f62bfb54ff759a1348cd140490ada7b693c"}, + {file = "coverage-7.6.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5fbd612f8a091954a0c8dd4c0b571b973487277d26476f8480bfa4b2a65b5d06"}, + {file = "coverage-7.6.4-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dacbc52de979f2823a819571f2e3a350a7e36b8cb7484cdb1e289bceaf35305f"}, + {file = "coverage-7.6.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dab4d16dfef34b185032580e2f2f89253d302facba093d5fa9dbe04f569c4f4b"}, + {file = "coverage-7.6.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:862264b12ebb65ad8d863d51f17758b1684560b66ab02770d4f0baf2ff75da21"}, + {file = "coverage-7.6.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5beb1ee382ad32afe424097de57134175fea3faf847b9af002cc7895be4e2a5a"}, + {file = "coverage-7.6.4-cp313-cp313-win32.whl", hash = "sha256:bf20494da9653f6410213424f5f8ad0ed885e01f7e8e59811f572bdb20b8972e"}, + {file = "coverage-7.6.4-cp313-cp313-win_amd64.whl", hash = "sha256:182e6cd5c040cec0a1c8d415a87b67ed01193ed9ad458ee427741c7d8513d963"}, + {file = "coverage-7.6.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a181e99301a0ae128493a24cfe5cfb5b488c4e0bf2f8702091473d033494d04f"}, + {file = "coverage-7.6.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:df57bdbeffe694e7842092c5e2e0bc80fff7f43379d465f932ef36f027179806"}, + {file = "coverage-7.6.4-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bcd1069e710600e8e4cf27f65c90c7843fa8edfb4520fb0ccb88894cad08b11"}, + {file = "coverage-7.6.4-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99b41d18e6b2a48ba949418db48159d7a2e81c5cc290fc934b7d2380515bd0e3"}, + {file = "coverage-7.6.4-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6b1e54712ba3474f34b7ef7a41e65bd9037ad47916ccb1cc78769bae324c01a"}, + {file = "coverage-7.6.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:53d202fd109416ce011578f321460795abfe10bb901b883cafd9b3ef851bacfc"}, + {file = "coverage-7.6.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:c48167910a8f644671de9f2083a23630fbf7a1cb70ce939440cd3328e0919f70"}, + {file = "coverage-7.6.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cc8ff50b50ce532de2fa7a7daae9dd12f0a699bfcd47f20945364e5c31799fef"}, + {file = "coverage-7.6.4-cp313-cp313t-win32.whl", hash = "sha256:b8d3a03d9bfcaf5b0141d07a88456bb6a4c3ce55c080712fec8418ef3610230e"}, + {file = "coverage-7.6.4-cp313-cp313t-win_amd64.whl", hash = "sha256:f3ddf056d3ebcf6ce47bdaf56142af51bb7fad09e4af310241e9db7a3a8022e1"}, + {file = "coverage-7.6.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9cb7fa111d21a6b55cbf633039f7bc2749e74932e3aa7cb7333f675a58a58bf3"}, + {file = "coverage-7.6.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:11a223a14e91a4693d2d0755c7a043db43d96a7450b4f356d506c2562c48642c"}, + {file = "coverage-7.6.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a413a096c4cbac202433c850ee43fa326d2e871b24554da8327b01632673a076"}, + {file = "coverage-7.6.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00a1d69c112ff5149cabe60d2e2ee948752c975d95f1e1096742e6077affd376"}, + {file = "coverage-7.6.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f76846299ba5c54d12c91d776d9605ae33f8ae2b9d1d3c3703cf2db1a67f2c0"}, + {file = "coverage-7.6.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fe439416eb6380de434886b00c859304338f8b19f6f54811984f3420a2e03858"}, + {file = "coverage-7.6.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:0294ca37f1ba500667b1aef631e48d875ced93ad5e06fa665a3295bdd1d95111"}, + {file = "coverage-7.6.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6f01ba56b1c0e9d149f9ac85a2f999724895229eb36bd997b61e62999e9b0901"}, + {file = "coverage-7.6.4-cp39-cp39-win32.whl", hash = "sha256:bc66f0bf1d7730a17430a50163bb264ba9ded56739112368ba985ddaa9c3bd09"}, + {file = "coverage-7.6.4-cp39-cp39-win_amd64.whl", hash = "sha256:c481b47f6b5845064c65a7bc78bc0860e635a9b055af0df46fdf1c58cebf8e8f"}, + {file = "coverage-7.6.4-pp39.pp310-none-any.whl", hash = "sha256:3c65d37f3a9ebb703e710befdc489a38683a5b152242664b973a7b7b22348a4e"}, + {file = "coverage-7.6.4.tar.gz", hash = "sha256:29fc0f17b1d3fea332f8001d4558f8214af7f1d87a345f3a133c901d60347c73"}, ] [package.dependencies] @@ -795,49 +894,68 @@ toml = ["tomli"] [[package]] name = "cryptography" -version = "41.0.7" +version = "43.0.3" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = ">=3.7" files = [ - {file = "cryptography-41.0.7-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:3c78451b78313fa81607fa1b3f1ae0a5ddd8014c38a02d9db0616133987b9cdf"}, - {file = "cryptography-41.0.7-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:928258ba5d6f8ae644e764d0f996d61a8777559f72dfeb2eea7e2fe0ad6e782d"}, - {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a1b41bc97f1ad230a41657d9155113c7521953869ae57ac39ac7f1bb471469a"}, - {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:841df4caa01008bad253bce2a6f7b47f86dc9f08df4b433c404def869f590a15"}, - {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5429ec739a29df2e29e15d082f1d9ad683701f0ec7709ca479b3ff2708dae65a"}, - {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:43f2552a2378b44869fe8827aa19e69512e3245a219104438692385b0ee119d1"}, - {file = "cryptography-41.0.7-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:af03b32695b24d85a75d40e1ba39ffe7db7ffcb099fe507b39fd41a565f1b157"}, - {file = "cryptography-41.0.7-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:49f0805fc0b2ac8d4882dd52f4a3b935b210935d500b6b805f321addc8177406"}, - {file = "cryptography-41.0.7-cp37-abi3-win32.whl", hash = "sha256:f983596065a18a2183e7f79ab3fd4c475205b839e02cbc0efbbf9666c4b3083d"}, - {file = "cryptography-41.0.7-cp37-abi3-win_amd64.whl", hash = "sha256:90452ba79b8788fa380dfb587cca692976ef4e757b194b093d845e8d99f612f2"}, - {file = "cryptography-41.0.7-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:079b85658ea2f59c4f43b70f8119a52414cdb7be34da5d019a77bf96d473b960"}, - {file = "cryptography-41.0.7-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:b640981bf64a3e978a56167594a0e97db71c89a479da8e175d8bb5be5178c003"}, - {file = "cryptography-41.0.7-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e3114da6d7f95d2dee7d3f4eec16dacff819740bbab931aff8648cb13c5ff5e7"}, - {file = "cryptography-41.0.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d5ec85080cce7b0513cfd233914eb8b7bbd0633f1d1703aa28d1dd5a72f678ec"}, - {file = "cryptography-41.0.7-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7a698cb1dac82c35fcf8fe3417a3aaba97de16a01ac914b89a0889d364d2f6be"}, - {file = "cryptography-41.0.7-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:37a138589b12069efb424220bf78eac59ca68b95696fc622b6ccc1c0a197204a"}, - {file = "cryptography-41.0.7-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:68a2dec79deebc5d26d617bfdf6e8aab065a4f34934b22d3b5010df3ba36612c"}, - {file = "cryptography-41.0.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:09616eeaef406f99046553b8a40fbf8b1e70795a91885ba4c96a70793de5504a"}, - {file = "cryptography-41.0.7-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:48a0476626da912a44cc078f9893f292f0b3e4c739caf289268168d8f4702a39"}, - {file = "cryptography-41.0.7-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c7f3201ec47d5207841402594f1d7950879ef890c0c495052fa62f58283fde1a"}, - {file = "cryptography-41.0.7-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c5ca78485a255e03c32b513f8c2bc39fedb7f5c5f8535545bdc223a03b24f248"}, - {file = "cryptography-41.0.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d6c391c021ab1f7a82da5d8d0b3cee2f4b2c455ec86c8aebbc84837a631ff309"}, - {file = "cryptography-41.0.7.tar.gz", hash = "sha256:13f93ce9bea8016c253b34afc6bd6a75993e5c40672ed5405a9c832f0d4a00bc"}, + {file = "cryptography-43.0.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:bf7a1932ac4176486eab36a19ed4c0492da5d97123f1406cf15e41b05e787d2e"}, + {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63efa177ff54aec6e1c0aefaa1a241232dcd37413835a9b674b6e3f0ae2bfd3e"}, + {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e1ce50266f4f70bf41a2c6dc4358afadae90e2a1e5342d3c08883df1675374f"}, + {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:443c4a81bb10daed9a8f334365fe52542771f25aedaf889fd323a853ce7377d6"}, + {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:74f57f24754fe349223792466a709f8e0c093205ff0dca557af51072ff47ab18"}, + {file = "cryptography-43.0.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9762ea51a8fc2a88b70cf2995e5675b38d93bf36bd67d91721c309df184f49bd"}, + {file = "cryptography-43.0.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:81ef806b1fef6b06dcebad789f988d3b37ccaee225695cf3e07648eee0fc6b73"}, + {file = "cryptography-43.0.3-cp37-abi3-win32.whl", hash = "sha256:cbeb489927bd7af4aa98d4b261af9a5bc025bd87f0e3547e11584be9e9427be2"}, + {file = "cryptography-43.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:f46304d6f0c6ab8e52770addfa2fc41e6629495548862279641972b6215451cd"}, + {file = "cryptography-43.0.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:8ac43ae87929a5982f5948ceda07001ee5e83227fd69cf55b109144938d96984"}, + {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:846da004a5804145a5f441b8530b4bf35afbf7da70f82409f151695b127213d5"}, + {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f996e7268af62598f2fc1204afa98a3b5712313a55c4c9d434aef49cadc91d4"}, + {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f7b178f11ed3664fd0e995a47ed2b5ff0a12d893e41dd0494f406d1cf555cab7"}, + {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:c2e6fc39c4ab499049df3bdf567f768a723a5e8464816e8f009f121a5a9f4405"}, + {file = "cryptography-43.0.3-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e1be4655c7ef6e1bbe6b5d0403526601323420bcf414598955968c9ef3eb7d16"}, + {file = "cryptography-43.0.3-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:df6b6c6d742395dd77a23ea3728ab62f98379eff8fb61be2744d4679ab678f73"}, + {file = "cryptography-43.0.3-cp39-abi3-win32.whl", hash = "sha256:d56e96520b1020449bbace2b78b603442e7e378a9b3bd68de65c782db1507995"}, + {file = "cryptography-43.0.3-cp39-abi3-win_amd64.whl", hash = "sha256:0c580952eef9bf68c4747774cde7ec1d85a6e61de97281f2dba83c7d2c806362"}, + {file = "cryptography-43.0.3-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d03b5621a135bffecad2c73e9f4deb1a0f977b9a8ffe6f8e002bf6c9d07b918c"}, + {file = "cryptography-43.0.3-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a2a431ee15799d6db9fe80c82b055bae5a752bef645bba795e8e52687c69efe3"}, + {file = "cryptography-43.0.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:281c945d0e28c92ca5e5930664c1cefd85efe80e5c0d2bc58dd63383fda29f83"}, + {file = "cryptography-43.0.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f18c716be16bc1fea8e95def49edf46b82fccaa88587a45f8dc0ff6ab5d8e0a7"}, + {file = "cryptography-43.0.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4a02ded6cd4f0a5562a8887df8b3bd14e822a90f97ac5e544c162899bc467664"}, + {file = "cryptography-43.0.3-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:53a583b6637ab4c4e3591a15bc9db855b8d9dee9a669b550f311480acab6eb08"}, + {file = "cryptography-43.0.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1ec0bcf7e17c0c5669d881b1cd38c4972fade441b27bda1051665faaa89bdcaa"}, + {file = "cryptography-43.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2ce6fae5bdad59577b44e4dfed356944fbf1d925269114c28be377692643b4ff"}, + {file = "cryptography-43.0.3.tar.gz", hash = "sha256:315b9001266a492a6ff443b61238f956b214dbec9910a081ba5b6646a055a805"}, ] [package.dependencies] -cffi = ">=1.12" +cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} [package.extras] docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] -docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"] +docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"] nox = ["nox"] -pep8test = ["black", "check-sdist", "mypy", "ruff"] +pep8test = ["check-sdist", "click", "mypy", "ruff"] sdist = ["build"] ssh = ["bcrypt (>=3.1.5)"] -test = ["pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test = ["certifi", "cryptography-vectors (==43.0.3)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test-randomorder = ["pytest-randomly"] +[[package]] +name = "cycler" +version = "0.12.1" +description = "Composable style cycles" +optional = false +python-versions = ">=3.8" +files = [ + {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"}, + {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"}, +] + +[package.extras] +docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] +tests = ["pytest", "pytest-cov", "pytest-xdist"] + [[package]] name = "darglint" version = "1.8.1" @@ -851,13 +969,13 @@ files = [ [[package]] name = "db-dtypes" -version = "1.2.0" +version = "1.3.0" description = "Pandas Data Types for SQL systems (BigQuery, Spanner)" optional = false python-versions = ">=3.7" files = [ - {file = "db-dtypes-1.2.0.tar.gz", hash = "sha256:3531bb1fb8b5fbab33121fe243ccc2ade16ab2524f4c113b05cc702a1908e6ea"}, - {file = "db_dtypes-1.2.0-py2.py3-none-any.whl", hash = "sha256:6320bddd31d096447ef749224d64aab00972ed20e4392d86f7d8b81ad79f7ff0"}, + {file = "db_dtypes-1.3.0-py2.py3-none-any.whl", hash = "sha256:7e65c59f849ccbe6f7bc4d0253edcc212a7907662906921caba3e4aadd0bc277"}, + {file = "db_dtypes-1.3.0.tar.gz", hash = "sha256:7bcbc8858b07474dc85b77bb2f3ae488978d1336f5ea73b58c39d9118bc3e91b"}, ] [package.dependencies] @@ -868,44 +986,48 @@ pyarrow = ">=3.0.0" [[package]] name = "dbldatagen" -version = "0.4.0" +version = "0.4.0.post1" description = "Databricks Labs - PySpark Synthetic Data Generator" optional = false python-versions = ">=3.8.10" files = [ - {file = "dbldatagen-0.4.0-py3-none-any.whl", hash = "sha256:e60b0089b0600239d8f291b2b4dc76aafd57708da2eeb58f1a28c5ab13fdced0"}, - {file = "dbldatagen-0.4.0.tar.gz", hash = "sha256:0003ed8d85399a995e178ffea17600f23720ddeb7374ea00afb8a13941859c51"}, + {file = "dbldatagen-0.4.0.post1-py3-none-any.whl", hash = "sha256:b94b5fcf2bf5113fe789f5cdf92b50eb62b5e6c25fc867b634d6543cc1e79d40"}, + {file = "dbldatagen-0.4.0.post1.tar.gz", hash = "sha256:a254fba2a6384c75e2dfb38b1e8cdc1c52b417c59fd6ec977e11175ad7567f34"}, ] [[package]] name = "debugpy" -version = "1.8.1" +version = "1.8.7" description = "An implementation of the Debug Adapter Protocol for Python" optional = false python-versions = ">=3.8" files = [ - {file = "debugpy-1.8.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:3bda0f1e943d386cc7a0e71bfa59f4137909e2ed947fb3946c506e113000f741"}, - {file = "debugpy-1.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dda73bf69ea479c8577a0448f8c707691152e6c4de7f0c4dec5a4bc11dee516e"}, - {file = "debugpy-1.8.1-cp310-cp310-win32.whl", hash = "sha256:3a79c6f62adef994b2dbe9fc2cc9cc3864a23575b6e387339ab739873bea53d0"}, - {file = "debugpy-1.8.1-cp310-cp310-win_amd64.whl", hash = "sha256:7eb7bd2b56ea3bedb009616d9e2f64aab8fc7000d481faec3cd26c98a964bcdd"}, - {file = "debugpy-1.8.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:016a9fcfc2c6b57f939673c874310d8581d51a0fe0858e7fac4e240c5eb743cb"}, - {file = "debugpy-1.8.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd97ed11a4c7f6d042d320ce03d83b20c3fb40da892f994bc041bbc415d7a099"}, - {file = "debugpy-1.8.1-cp311-cp311-win32.whl", hash = "sha256:0de56aba8249c28a300bdb0672a9b94785074eb82eb672db66c8144fff673146"}, - {file = "debugpy-1.8.1-cp311-cp311-win_amd64.whl", hash = "sha256:1a9fe0829c2b854757b4fd0a338d93bc17249a3bf69ecf765c61d4c522bb92a8"}, - {file = "debugpy-1.8.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3ebb70ba1a6524d19fa7bb122f44b74170c447d5746a503e36adc244a20ac539"}, - {file = "debugpy-1.8.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2e658a9630f27534e63922ebf655a6ab60c370f4d2fc5c02a5b19baf4410ace"}, - {file = "debugpy-1.8.1-cp312-cp312-win32.whl", hash = "sha256:caad2846e21188797a1f17fc09c31b84c7c3c23baf2516fed5b40b378515bbf0"}, - {file = "debugpy-1.8.1-cp312-cp312-win_amd64.whl", hash = "sha256:edcc9f58ec0fd121a25bc950d4578df47428d72e1a0d66c07403b04eb93bcf98"}, - {file = "debugpy-1.8.1-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:7a3afa222f6fd3d9dfecd52729bc2e12c93e22a7491405a0ecbf9e1d32d45b39"}, - {file = "debugpy-1.8.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d915a18f0597ef685e88bb35e5d7ab968964b7befefe1aaea1eb5b2640b586c7"}, - {file = "debugpy-1.8.1-cp38-cp38-win32.whl", hash = "sha256:92116039b5500633cc8d44ecc187abe2dfa9b90f7a82bbf81d079fcdd506bae9"}, - {file = "debugpy-1.8.1-cp38-cp38-win_amd64.whl", hash = "sha256:e38beb7992b5afd9d5244e96ad5fa9135e94993b0c551ceebf3fe1a5d9beb234"}, - {file = "debugpy-1.8.1-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:bfb20cb57486c8e4793d41996652e5a6a885b4d9175dd369045dad59eaacea42"}, - {file = "debugpy-1.8.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efd3fdd3f67a7e576dd869c184c5dd71d9aaa36ded271939da352880c012e703"}, - {file = "debugpy-1.8.1-cp39-cp39-win32.whl", hash = "sha256:58911e8521ca0c785ac7a0539f1e77e0ce2df753f786188f382229278b4cdf23"}, - {file = "debugpy-1.8.1-cp39-cp39-win_amd64.whl", hash = "sha256:6df9aa9599eb05ca179fb0b810282255202a66835c6efb1d112d21ecb830ddd3"}, - {file = "debugpy-1.8.1-py2.py3-none-any.whl", hash = "sha256:28acbe2241222b87e255260c76741e1fbf04fdc3b6d094fcf57b6c6f75ce1242"}, - {file = "debugpy-1.8.1.zip", hash = "sha256:f696d6be15be87aef621917585f9bb94b1dc9e8aced570db1b8a6fc14e8f9b42"}, + {file = "debugpy-1.8.7-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:95fe04a573b8b22896c404365e03f4eda0ce0ba135b7667a1e57bd079793b96b"}, + {file = "debugpy-1.8.7-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:628a11f4b295ffb4141d8242a9bb52b77ad4a63a2ad19217a93be0f77f2c28c9"}, + {file = "debugpy-1.8.7-cp310-cp310-win32.whl", hash = "sha256:85ce9c1d0eebf622f86cc68618ad64bf66c4fc3197d88f74bb695a416837dd55"}, + {file = "debugpy-1.8.7-cp310-cp310-win_amd64.whl", hash = "sha256:29e1571c276d643757ea126d014abda081eb5ea4c851628b33de0c2b6245b037"}, + {file = "debugpy-1.8.7-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:caf528ff9e7308b74a1749c183d6808ffbedbb9fb6af78b033c28974d9b8831f"}, + {file = "debugpy-1.8.7-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cba1d078cf2e1e0b8402e6bda528bf8fda7ccd158c3dba6c012b7897747c41a0"}, + {file = "debugpy-1.8.7-cp311-cp311-win32.whl", hash = "sha256:171899588bcd412151e593bd40d9907133a7622cd6ecdbdb75f89d1551df13c2"}, + {file = "debugpy-1.8.7-cp311-cp311-win_amd64.whl", hash = "sha256:6e1c4ffb0c79f66e89dfd97944f335880f0d50ad29525dc792785384923e2211"}, + {file = "debugpy-1.8.7-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:4d27d842311353ede0ad572600c62e4bcd74f458ee01ab0dd3a1a4457e7e3706"}, + {file = "debugpy-1.8.7-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:703c1fd62ae0356e194f3e7b7a92acd931f71fe81c4b3be2c17a7b8a4b546ec2"}, + {file = "debugpy-1.8.7-cp312-cp312-win32.whl", hash = "sha256:2f729228430ef191c1e4df72a75ac94e9bf77413ce5f3f900018712c9da0aaca"}, + {file = "debugpy-1.8.7-cp312-cp312-win_amd64.whl", hash = "sha256:45c30aaefb3e1975e8a0258f5bbd26cd40cde9bfe71e9e5a7ac82e79bad64e39"}, + {file = "debugpy-1.8.7-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:d050a1ec7e925f514f0f6594a1e522580317da31fbda1af71d1530d6ea1f2b40"}, + {file = "debugpy-1.8.7-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2f4349a28e3228a42958f8ddaa6333d6f8282d5edaea456070e48609c5983b7"}, + {file = "debugpy-1.8.7-cp313-cp313-win32.whl", hash = "sha256:11ad72eb9ddb436afb8337891a986302e14944f0f755fd94e90d0d71e9100bba"}, + {file = "debugpy-1.8.7-cp313-cp313-win_amd64.whl", hash = "sha256:2efb84d6789352d7950b03d7f866e6d180284bc02c7e12cb37b489b7083d81aa"}, + {file = "debugpy-1.8.7-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:4b908291a1d051ef3331484de8e959ef3e66f12b5e610c203b5b75d2725613a7"}, + {file = "debugpy-1.8.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da8df5b89a41f1fd31503b179d0a84a5fdb752dddd5b5388dbd1ae23cda31ce9"}, + {file = "debugpy-1.8.7-cp38-cp38-win32.whl", hash = "sha256:b12515e04720e9e5c2216cc7086d0edadf25d7ab7e3564ec8b4521cf111b4f8c"}, + {file = "debugpy-1.8.7-cp38-cp38-win_amd64.whl", hash = "sha256:93176e7672551cb5281577cdb62c63aadc87ec036f0c6a486f0ded337c504596"}, + {file = "debugpy-1.8.7-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:90d93e4f2db442f8222dec5ec55ccfc8005821028982f1968ebf551d32b28907"}, + {file = "debugpy-1.8.7-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6db2a370e2700557a976eaadb16243ec9c91bd46f1b3bb15376d7aaa7632c81"}, + {file = "debugpy-1.8.7-cp39-cp39-win32.whl", hash = "sha256:a6cf2510740e0c0b4a40330640e4b454f928c7b99b0c9dbf48b11efba08a8cda"}, + {file = "debugpy-1.8.7-cp39-cp39-win_amd64.whl", hash = "sha256:6a9d9d6d31846d8e34f52987ee0f1a904c7baa4912bf4843ab39dadf9b8f3e0d"}, + {file = "debugpy-1.8.7-py2.py3-none-any.whl", hash = "sha256:57b00de1c8d2c84a61b90880f7e5b6deaf4c312ecbde3a0e8912f2a56c4ac9ae"}, + {file = "debugpy-1.8.7.zip", hash = "sha256:18b8f731ed3e2e1df8e9cdaa23fb1fc9c24e570cd0081625308ec51c82efe42e"}, ] [[package]] @@ -964,13 +1086,13 @@ tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""} [[package]] name = "dill" -version = "0.3.8" +version = "0.3.9" description = "serialize all of Python" optional = false python-versions = ">=3.8" files = [ - {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"}, - {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"}, + {file = "dill-0.3.9-py3-none-any.whl", hash = "sha256:468dff3b89520b474c0397703366b7b95eebe6303f108adf9b19da1f702be87a"}, + {file = "dill-0.3.9.tar.gz", hash = "sha256:81aa267dddf68cbfe8029c42ca9ec6a4ab3b22371d1c450abc54422577b4512c"}, ] [package.extras] @@ -979,13 +1101,13 @@ profile = ["gprof2dot (>=2022.7.29)"] [[package]] name = "distlib" -version = "0.3.8" +version = "0.3.9" description = "Distribution utilities" optional = false python-versions = "*" files = [ - {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"}, - {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, + {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"}, + {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"}, ] [[package]] @@ -1004,24 +1126,24 @@ six = ">=1.4.0" [[package]] name = "docstring-parser-fork" -version = "0.0.8" +version = "0.0.9" description = "Parse Python docstrings in reST, Google and Numpydoc format" optional = false python-versions = "<4.0,>=3.7" files = [ - {file = "docstring_parser_fork-0.0.8-py3-none-any.whl", hash = "sha256:88098ae01b0909b241954ad2c50c0c29ec2292223366a540bfd68332be8fd595"}, - {file = "docstring_parser_fork-0.0.8.tar.gz", hash = "sha256:59d3b00d42ba9f4e229a7df7e1f6fc742845f88a1190973cc33ba336a5405425"}, + {file = "docstring_parser_fork-0.0.9-py3-none-any.whl", hash = "sha256:0be85ad00cb25bf5beeb673e46e777facf0f47552fa3a7570d120ef7e3374401"}, + {file = "docstring_parser_fork-0.0.9.tar.gz", hash = "sha256:95b23cc5092af85080c716a6da68360f5ae4fcffa75f4a3aca5e539783cbcc3d"}, ] [[package]] name = "exceptiongroup" -version = "1.2.1" +version = "1.2.2" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, - {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, + {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, + {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, ] [package.extras] @@ -1043,13 +1165,13 @@ testing = ["hatch", "pre-commit", "pytest", "tox"] [[package]] name = "executing" -version = "2.0.1" +version = "2.1.0" description = "Get the currently executing AST node of a frame, and other information" optional = false -python-versions = ">=3.5" +python-versions = ">=3.8" files = [ - {file = "executing-2.0.1-py2.py3-none-any.whl", hash = "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc"}, - {file = "executing-2.0.1.tar.gz", hash = "sha256:35afe2ce3affba8ee97f2d69927fa823b08b472b7b994e36a52a964b93d16147"}, + {file = "executing-2.1.0-py2.py3-none-any.whl", hash = "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf"}, + {file = "executing-2.1.0.tar.gz", hash = "sha256:8ea27ddd260da8150fa5a708269c4a10e76161e2496ec3e587da9e3c0fe4b9ab"}, ] [package.extras] @@ -1057,29 +1179,29 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth [[package]] name = "filelock" -version = "3.15.4" +version = "3.16.1" description = "A platform independent file lock." optional = false python-versions = ">=3.8" files = [ - {file = "filelock-3.15.4-py3-none-any.whl", hash = "sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7"}, - {file = "filelock-3.15.4.tar.gz", hash = "sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb"}, + {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"}, + {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"}, ] [package.extras] -docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-asyncio (>=0.21)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)", "virtualenv (>=20.26.2)"] -typing = ["typing-extensions (>=4.8)"] +docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] +typing = ["typing-extensions (>=4.12.2)"] [[package]] name = "flake8" -version = "7.1.0" +version = "7.1.1" description = "the modular source code checker: pep8 pyflakes and co" optional = false python-versions = ">=3.8.1" files = [ - {file = "flake8-7.1.0-py2.py3-none-any.whl", hash = "sha256:2e416edcc62471a64cea09353f4e7bdba32aeb079b6e360554c659a122b1bc6a"}, - {file = "flake8-7.1.0.tar.gz", hash = "sha256:48a07b626b55236e0fb4784ee69a465fbf59d79eec1f5b4785c3d3bc57d17aa5"}, + {file = "flake8-7.1.1-py2.py3-none-any.whl", hash = "sha256:597477df7860daa5aa0fdd84bf5208a043ab96b8e96ab708770ae0364dd03213"}, + {file = "flake8-7.1.1.tar.gz", hash = "sha256:049d058491e228e03e67b390f311bbf88fce2dbaa8fa673e7aea87b7198b8d38"}, ] [package.dependencies] @@ -1087,101 +1209,187 @@ mccabe = ">=0.7.0,<0.8.0" pycodestyle = ">=2.12.0,<2.13.0" pyflakes = ">=3.2.0,<3.3.0" +[[package]] +name = "fonttools" +version = "4.54.1" +description = "Tools to manipulate font files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fonttools-4.54.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7ed7ee041ff7b34cc62f07545e55e1468808691dddfd315d51dd82a6b37ddef2"}, + {file = "fonttools-4.54.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:41bb0b250c8132b2fcac148e2e9198e62ff06f3cc472065dff839327945c5882"}, + {file = "fonttools-4.54.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7965af9b67dd546e52afcf2e38641b5be956d68c425bef2158e95af11d229f10"}, + {file = "fonttools-4.54.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:278913a168f90d53378c20c23b80f4e599dca62fbffae4cc620c8eed476b723e"}, + {file = "fonttools-4.54.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0e88e3018ac809b9662615072dcd6b84dca4c2d991c6d66e1970a112503bba7e"}, + {file = "fonttools-4.54.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4aa4817f0031206e637d1e685251ac61be64d1adef111060df84fdcbc6ab6c44"}, + {file = "fonttools-4.54.1-cp310-cp310-win32.whl", hash = "sha256:7e3b7d44e18c085fd8c16dcc6f1ad6c61b71ff463636fcb13df7b1b818bd0c02"}, + {file = "fonttools-4.54.1-cp310-cp310-win_amd64.whl", hash = "sha256:dd9cc95b8d6e27d01e1e1f1fae8559ef3c02c76317da650a19047f249acd519d"}, + {file = "fonttools-4.54.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5419771b64248484299fa77689d4f3aeed643ea6630b2ea750eeab219588ba20"}, + {file = "fonttools-4.54.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:301540e89cf4ce89d462eb23a89464fef50915255ece765d10eee8b2bf9d75b2"}, + {file = "fonttools-4.54.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76ae5091547e74e7efecc3cbf8e75200bc92daaeb88e5433c5e3e95ea8ce5aa7"}, + {file = "fonttools-4.54.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82834962b3d7c5ca98cb56001c33cf20eb110ecf442725dc5fdf36d16ed1ab07"}, + {file = "fonttools-4.54.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d26732ae002cc3d2ecab04897bb02ae3f11f06dd7575d1df46acd2f7c012a8d8"}, + {file = "fonttools-4.54.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:58974b4987b2a71ee08ade1e7f47f410c367cdfc5a94fabd599c88165f56213a"}, + {file = "fonttools-4.54.1-cp311-cp311-win32.whl", hash = "sha256:ab774fa225238986218a463f3fe151e04d8c25d7de09df7f0f5fce27b1243dbc"}, + {file = "fonttools-4.54.1-cp311-cp311-win_amd64.whl", hash = "sha256:07e005dc454eee1cc60105d6a29593459a06321c21897f769a281ff2d08939f6"}, + {file = "fonttools-4.54.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:54471032f7cb5fca694b5f1a0aaeba4af6e10ae989df408e0216f7fd6cdc405d"}, + {file = "fonttools-4.54.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fa92cb248e573daab8d032919623cc309c005086d743afb014c836636166f08"}, + {file = "fonttools-4.54.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a911591200114969befa7f2cb74ac148bce5a91df5645443371aba6d222e263"}, + {file = "fonttools-4.54.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93d458c8a6a354dc8b48fc78d66d2a8a90b941f7fec30e94c7ad9982b1fa6bab"}, + {file = "fonttools-4.54.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5eb2474a7c5be8a5331146758debb2669bf5635c021aee00fd7c353558fc659d"}, + {file = "fonttools-4.54.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c9c563351ddc230725c4bdf7d9e1e92cbe6ae8553942bd1fb2b2ff0884e8b714"}, + {file = "fonttools-4.54.1-cp312-cp312-win32.whl", hash = "sha256:fdb062893fd6d47b527d39346e0c5578b7957dcea6d6a3b6794569370013d9ac"}, + {file = "fonttools-4.54.1-cp312-cp312-win_amd64.whl", hash = "sha256:e4564cf40cebcb53f3dc825e85910bf54835e8a8b6880d59e5159f0f325e637e"}, + {file = "fonttools-4.54.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6e37561751b017cf5c40fce0d90fd9e8274716de327ec4ffb0df957160be3bff"}, + {file = "fonttools-4.54.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:357cacb988a18aace66e5e55fe1247f2ee706e01debc4b1a20d77400354cddeb"}, + {file = "fonttools-4.54.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8e953cc0bddc2beaf3a3c3b5dd9ab7554677da72dfaf46951e193c9653e515a"}, + {file = "fonttools-4.54.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:58d29b9a294573d8319f16f2f79e42428ba9b6480442fa1836e4eb89c4d9d61c"}, + {file = "fonttools-4.54.1-cp313-cp313-win32.whl", hash = "sha256:9ef1b167e22709b46bf8168368b7b5d3efeaaa746c6d39661c1b4405b6352e58"}, + {file = "fonttools-4.54.1-cp313-cp313-win_amd64.whl", hash = "sha256:262705b1663f18c04250bd1242b0515d3bbae177bee7752be67c979b7d47f43d"}, + {file = "fonttools-4.54.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ed2f80ca07025551636c555dec2b755dd005e2ea8fbeb99fc5cdff319b70b23b"}, + {file = "fonttools-4.54.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9dc080e5a1c3b2656caff2ac2633d009b3a9ff7b5e93d0452f40cd76d3da3b3c"}, + {file = "fonttools-4.54.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d152d1be65652fc65e695e5619e0aa0982295a95a9b29b52b85775243c06556"}, + {file = "fonttools-4.54.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8583e563df41fdecef31b793b4dd3af8a9caa03397be648945ad32717a92885b"}, + {file = "fonttools-4.54.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:0d1d353ef198c422515a3e974a1e8d5b304cd54a4c2eebcae708e37cd9eeffb1"}, + {file = "fonttools-4.54.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fda582236fee135d4daeca056c8c88ec5f6f6d88a004a79b84a02547c8f57386"}, + {file = "fonttools-4.54.1-cp38-cp38-win32.whl", hash = "sha256:e7d82b9e56716ed32574ee106cabca80992e6bbdcf25a88d97d21f73a0aae664"}, + {file = "fonttools-4.54.1-cp38-cp38-win_amd64.whl", hash = "sha256:ada215fd079e23e060157aab12eba0d66704316547f334eee9ff26f8c0d7b8ab"}, + {file = "fonttools-4.54.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f5b8a096e649768c2f4233f947cf9737f8dbf8728b90e2771e2497c6e3d21d13"}, + {file = "fonttools-4.54.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4e10d2e0a12e18f4e2dd031e1bf7c3d7017be5c8dbe524d07706179f355c5dac"}, + {file = "fonttools-4.54.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31c32d7d4b0958600eac75eaf524b7b7cb68d3a8c196635252b7a2c30d80e986"}, + {file = "fonttools-4.54.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c39287f5c8f4a0c5a55daf9eaf9ccd223ea59eed3f6d467133cc727d7b943a55"}, + {file = "fonttools-4.54.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a7a310c6e0471602fe3bf8efaf193d396ea561486aeaa7adc1f132e02d30c4b9"}, + {file = "fonttools-4.54.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d3b659d1029946f4ff9b6183984578041b520ce0f8fb7078bb37ec7445806b33"}, + {file = "fonttools-4.54.1-cp39-cp39-win32.whl", hash = "sha256:e96bc94c8cda58f577277d4a71f51c8e2129b8b36fd05adece6320dd3d57de8a"}, + {file = "fonttools-4.54.1-cp39-cp39-win_amd64.whl", hash = "sha256:e8a4b261c1ef91e7188a30571be6ad98d1c6d9fa2427244c545e2fa0a2494dd7"}, + {file = "fonttools-4.54.1-py3-none-any.whl", hash = "sha256:37cddd62d83dc4f72f7c3f3c2bcf2697e89a30efb152079896544a93907733bd"}, + {file = "fonttools-4.54.1.tar.gz", hash = "sha256:957f669d4922f92c171ba01bef7f29410668db09f6c02111e22b2bce446f3285"}, +] + +[package.extras] +all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"] +graphite = ["lz4 (>=1.7.4.2)"] +interpolatable = ["munkres", "pycairo", "scipy"] +lxml = ["lxml (>=4.0)"] +pathops = ["skia-pathops (>=0.5.0)"] +plot = ["matplotlib"] +repacker = ["uharfbuzz (>=0.23.0)"] +symfont = ["sympy"] +type1 = ["xattr"] +ufo = ["fs (>=2.2.0,<3)"] +unicode = ["unicodedata2 (>=15.1.0)"] +woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] + [[package]] name = "frozenlist" -version = "1.4.1" +version = "1.5.0" description = "A list-like structure which implements collections.abc.MutableSequence" optional = false python-versions = ">=3.8" files = [ - {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac"}, - {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868"}, - {file = "frozenlist-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:74fb4bee6880b529a0c6560885fce4dc95936920f9f20f53d99a213f7bf66776"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:068b63f23b17df8569b7fdca5517edef76171cf3897eb68beb01341131fbd2ad"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c849d495bf5154cd8da18a9eb15db127d4dba2968d88831aff6f0331ea9bd4c"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9750cc7fe1ae3b1611bb8cfc3f9ec11d532244235d75901fb6b8e42ce9229dfe"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9b2de4cf0cdd5bd2dee4c4f63a653c61d2408055ab77b151c1957f221cabf2a"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0633c8d5337cb5c77acbccc6357ac49a1770b8c487e5b3505c57b949b4b82e98"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:27657df69e8801be6c3638054e202a135c7f299267f1a55ed3a598934f6c0d75"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:f9a3ea26252bd92f570600098783d1371354d89d5f6b7dfd87359d669f2109b5"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4f57dab5fe3407b6c0c1cc907ac98e8a189f9e418f3b6e54d65a718aaafe3950"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e02a0e11cf6597299b9f3bbd3f93d79217cb90cfd1411aec33848b13f5c656cc"}, - {file = "frozenlist-1.4.1-cp310-cp310-win32.whl", hash = "sha256:a828c57f00f729620a442881cc60e57cfcec6842ba38e1b19fd3e47ac0ff8dc1"}, - {file = "frozenlist-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:f56e2333dda1fe0f909e7cc59f021eba0d2307bc6f012a1ccf2beca6ba362439"}, - {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a0cb6f11204443f27a1628b0e460f37fb30f624be6051d490fa7d7e26d4af3d0"}, - {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b46c8ae3a8f1f41a0d2ef350c0b6e65822d80772fe46b653ab6b6274f61d4a49"}, - {file = "frozenlist-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fde5bd59ab5357e3853313127f4d3565fc7dad314a74d7b5d43c22c6a5ed2ced"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:722e1124aec435320ae01ee3ac7bec11a5d47f25d0ed6328f2273d287bc3abb0"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2471c201b70d58a0f0c1f91261542a03d9a5e088ed3dc6c160d614c01649c106"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c757a9dd70d72b076d6f68efdbb9bc943665ae954dad2801b874c8c69e185068"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f146e0911cb2f1da549fc58fc7bcd2b836a44b79ef871980d605ec392ff6b0d2"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9c515e7914626b2a2e1e311794b4c35720a0be87af52b79ff8e1429fc25f19"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c302220494f5c1ebeb0912ea782bcd5e2f8308037b3c7553fad0e48ebad6ad82"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:442acde1e068288a4ba7acfe05f5f343e19fac87bfc96d89eb886b0363e977ec"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:1b280e6507ea8a4fa0c0a7150b4e526a8d113989e28eaaef946cc77ffd7efc0a"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:fe1a06da377e3a1062ae5fe0926e12b84eceb8a50b350ddca72dc85015873f74"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:db9e724bebd621d9beca794f2a4ff1d26eed5965b004a97f1f1685a173b869c2"}, - {file = "frozenlist-1.4.1-cp311-cp311-win32.whl", hash = "sha256:e774d53b1a477a67838a904131c4b0eef6b3d8a651f8b138b04f748fccfefe17"}, - {file = "frozenlist-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:fb3c2db03683b5767dedb5769b8a40ebb47d6f7f45b1b3e3b4b51ec8ad9d9825"}, - {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1979bc0aeb89b33b588c51c54ab0161791149f2461ea7c7c946d95d5f93b56ae"}, - {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cc7b01b3754ea68a62bd77ce6020afaffb44a590c2289089289363472d13aedb"}, - {file = "frozenlist-1.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9c92be9fd329ac801cc420e08452b70e7aeab94ea4233a4804f0915c14eba9b"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3894db91f5a489fc8fa6a9991820f368f0b3cbdb9cd8849547ccfab3392d86"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba60bb19387e13597fb059f32cd4d59445d7b18b69a745b8f8e5db0346f33480"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8aefbba5f69d42246543407ed2461db31006b0f76c4e32dfd6f42215a2c41d09"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780d3a35680ced9ce682fbcf4cb9c2bad3136eeff760ab33707b71db84664e3a"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9acbb16f06fe7f52f441bb6f413ebae6c37baa6ef9edd49cdd567216da8600cd"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:23b701e65c7b36e4bf15546a89279bd4d8675faabc287d06bbcfac7d3c33e1e6"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3e0153a805a98f5ada7e09826255ba99fb4f7524bb81bf6b47fb702666484ae1"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:dd9b1baec094d91bf36ec729445f7769d0d0cf6b64d04d86e45baf89e2b9059b"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:1a4471094e146b6790f61b98616ab8e44f72661879cc63fa1049d13ef711e71e"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5667ed53d68d91920defdf4035d1cdaa3c3121dc0b113255124bcfada1cfa1b8"}, - {file = "frozenlist-1.4.1-cp312-cp312-win32.whl", hash = "sha256:beee944ae828747fd7cb216a70f120767fc9f4f00bacae8543c14a6831673f89"}, - {file = "frozenlist-1.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:64536573d0a2cb6e625cf309984e2d873979709f2cf22839bf2d61790b448ad5"}, - {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:20b51fa3f588ff2fe658663db52a41a4f7aa6c04f6201449c6c7c476bd255c0d"}, - {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:410478a0c562d1a5bcc2f7ea448359fcb050ed48b3c6f6f4f18c313a9bdb1826"}, - {file = "frozenlist-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c6321c9efe29975232da3bd0af0ad216800a47e93d763ce64f291917a381b8eb"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48f6a4533887e189dae092f1cf981f2e3885175f7a0f33c91fb5b7b682b6bab6"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6eb73fa5426ea69ee0e012fb59cdc76a15b1283d6e32e4f8dc4482ec67d1194d"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbeb989b5cc29e8daf7f976b421c220f1b8c731cbf22b9130d8815418ea45887"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32453c1de775c889eb4e22f1197fe3bdfe457d16476ea407472b9442e6295f7a"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693945278a31f2086d9bf3df0fe8254bbeaef1fe71e1351c3bd730aa7d31c41b"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1d0ce09d36d53bbbe566fe296965b23b961764c0bcf3ce2fa45f463745c04701"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3a670dc61eb0d0eb7080890c13de3066790f9049b47b0de04007090807c776b0"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:dca69045298ce5c11fd539682cff879cc1e664c245d1c64da929813e54241d11"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a06339f38e9ed3a64e4c4e43aec7f59084033647f908e4259d279a52d3757d09"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b7f2f9f912dca3934c1baec2e4585a674ef16fe00218d833856408c48d5beee7"}, - {file = "frozenlist-1.4.1-cp38-cp38-win32.whl", hash = "sha256:e7004be74cbb7d9f34553a5ce5fb08be14fb33bc86f332fb71cbe5216362a497"}, - {file = "frozenlist-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:5a7d70357e7cee13f470c7883a063aae5fe209a493c57d86eb7f5a6f910fae09"}, - {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bfa4a17e17ce9abf47a74ae02f32d014c5e9404b6d9ac7f729e01562bbee601e"}, - {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b7e3ed87d4138356775346e6845cccbe66cd9e207f3cd11d2f0b9fd13681359d"}, - {file = "frozenlist-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c99169d4ff810155ca50b4da3b075cbde79752443117d89429595c2e8e37fed8"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edb678da49d9f72c9f6c609fbe41a5dfb9a9282f9e6a2253d5a91e0fc382d7c0"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6db4667b187a6742b33afbbaf05a7bc551ffcf1ced0000a571aedbb4aa42fc7b"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55fdc093b5a3cb41d420884cdaf37a1e74c3c37a31f46e66286d9145d2063bd0"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82e8211d69a4f4bc360ea22cd6555f8e61a1bd211d1d5d39d3d228b48c83a897"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89aa2c2eeb20957be2d950b85974b30a01a762f3308cd02bb15e1ad632e22dc7"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d3e0c25a2350080e9319724dede4f31f43a6c9779be48021a7f4ebde8b2d742"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7268252af60904bf52c26173cbadc3a071cece75f873705419c8681f24d3edea"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0c250a29735d4f15321007fb02865f0e6b6a41a6b88f1f523ca1596ab5f50bd5"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:96ec70beabbd3b10e8bfe52616a13561e58fe84c0101dd031dc78f250d5128b9"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:23b2d7679b73fe0e5a4560b672a39f98dfc6f60df63823b0a9970525325b95f6"}, - {file = "frozenlist-1.4.1-cp39-cp39-win32.whl", hash = "sha256:a7496bfe1da7fb1a4e1cc23bb67c58fab69311cc7d32b5a99c2007b4b2a0e932"}, - {file = "frozenlist-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e6a20a581f9ce92d389a8c7d7c3dd47c81fd5d6e655c8dddf341e14aa48659d0"}, - {file = "frozenlist-1.4.1-py3-none-any.whl", hash = "sha256:04ced3e6a46b4cfffe20f9ae482818e34eba9b5fb0ce4056e4cc9b6e212d09b7"}, - {file = "frozenlist-1.4.1.tar.gz", hash = "sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b"}, + {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"}, + {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"}, + {file = "frozenlist-1.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:15538c0cbf0e4fa11d1e3a71f823524b0c46299aed6e10ebb4c2089abd8c3bec"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e79225373c317ff1e35f210dd5f1344ff31066ba8067c307ab60254cd3a78ad5"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9272fa73ca71266702c4c3e2d4a28553ea03418e591e377a03b8e3659d94fa76"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:498524025a5b8ba81695761d78c8dd7382ac0b052f34e66939c42df860b8ff17"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92b5278ed9d50fe610185ecd23c55d8b307d75ca18e94c0e7de328089ac5dcba"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f3c8c1dacd037df16e85227bac13cca58c30da836c6f936ba1df0c05d046d8d"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2ac49a9bedb996086057b75bf93538240538c6d9b38e57c82d51f75a73409d2"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e66cc454f97053b79c2ab09c17fbe3c825ea6b4de20baf1be28919460dd7877f"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:5a3ba5f9a0dfed20337d3e966dc359784c9f96503674c2faf015f7fe8e96798c"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6321899477db90bdeb9299ac3627a6a53c7399c8cd58d25da094007402b039ab"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:76e4753701248476e6286f2ef492af900ea67d9706a0155335a40ea21bf3b2f5"}, + {file = "frozenlist-1.5.0-cp310-cp310-win32.whl", hash = "sha256:977701c081c0241d0955c9586ffdd9ce44f7a7795df39b9151cd9a6fd0ce4cfb"}, + {file = "frozenlist-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:189f03b53e64144f90990d29a27ec4f7997d91ed3d01b51fa39d2dbe77540fd4"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fd74520371c3c4175142d02a976aee0b4cb4a7cc912a60586ffd8d5929979b30"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2f3f7a0fbc219fb4455264cae4d9f01ad41ae6ee8524500f381de64ffaa077d5"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f47c9c9028f55a04ac254346e92977bf0f166c483c74b4232bee19a6697e4778"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0996c66760924da6e88922756d99b47512a71cfd45215f3570bf1e0b694c206a"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2fe128eb4edeabe11896cb6af88fca5346059f6c8d807e3b910069f39157869"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a8ea951bbb6cacd492e3948b8da8c502a3f814f5d20935aae74b5df2b19cf3d"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de537c11e4aa01d37db0d403b57bd6f0546e71a82347a97c6a9f0dcc532b3a45"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c2623347b933fcb9095841f1cc5d4ff0b278addd743e0e966cb3d460278840d"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cee6798eaf8b1416ef6909b06f7dc04b60755206bddc599f52232606e18179d3"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f5f9da7f5dbc00a604fe74aa02ae7c98bcede8a3b8b9666f9f86fc13993bc71a"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:90646abbc7a5d5c7c19461d2e3eeb76eb0b204919e6ece342feb6032c9325ae9"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:bdac3c7d9b705d253b2ce370fde941836a5f8b3c5c2b8fd70940a3ea3af7f4f2"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03d33c2ddbc1816237a67f66336616416e2bbb6beb306e5f890f2eb22b959cdf"}, + {file = "frozenlist-1.5.0-cp311-cp311-win32.whl", hash = "sha256:237f6b23ee0f44066219dae14c70ae38a63f0440ce6750f868ee08775073f942"}, + {file = "frozenlist-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:0cc974cc93d32c42e7b0f6cf242a6bd941c57c61b618e78b6c0a96cb72788c1d"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:31115ba75889723431aa9a4e77d5f398f5cf976eea3bdf61749731f62d4a4a21"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7437601c4d89d070eac8323f121fcf25f88674627505334654fd027b091db09d"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7948140d9f8ece1745be806f2bfdf390127cf1a763b925c4a805c603df5e697e"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:feeb64bc9bcc6b45c6311c9e9b99406660a9c05ca8a5b30d14a78555088b0b3a"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:683173d371daad49cffb8309779e886e59c2f369430ad28fe715f66d08d4ab1a"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7d57d8f702221405a9d9b40f9da8ac2e4a1a8b5285aac6100f3393675f0a85ee"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30c72000fbcc35b129cb09956836c7d7abf78ab5416595e4857d1cae8d6251a6"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000a77d6034fbad9b6bb880f7ec073027908f1b40254b5d6f26210d2dab1240e"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5d7f5a50342475962eb18b740f3beecc685a15b52c91f7d975257e13e029eca9"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:87f724d055eb4785d9be84e9ebf0f24e392ddfad00b3fe036e43f489fafc9039"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:6e9080bb2fb195a046e5177f10d9d82b8a204c0736a97a153c2466127de87784"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b93d7aaa36c966fa42efcaf716e6b3900438632a626fb09c049f6a2f09fc631"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:52ef692a4bc60a6dd57f507429636c2af8b6046db8b31b18dac02cbc8f507f7f"}, + {file = "frozenlist-1.5.0-cp312-cp312-win32.whl", hash = "sha256:29d94c256679247b33a3dc96cce0f93cbc69c23bf75ff715919332fdbb6a32b8"}, + {file = "frozenlist-1.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:8969190d709e7c48ea386db202d708eb94bdb29207a1f269bab1196ce0dcca1f"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7a1a048f9215c90973402e26c01d1cff8a209e1f1b53f72b95c13db61b00f953"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dd47a5181ce5fcb463b5d9e17ecfdb02b678cca31280639255ce9d0e5aa67af0"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1431d60b36d15cda188ea222033eec8e0eab488f39a272461f2e6d9e1a8e63c2"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6482a5851f5d72767fbd0e507e80737f9c8646ae7fd303def99bfe813f76cf7f"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44c49271a937625619e862baacbd037a7ef86dd1ee215afc298a417ff3270608"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:12f78f98c2f1c2429d42e6a485f433722b0061d5c0b0139efa64f396efb5886b"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce3aa154c452d2467487765e3adc730a8c153af77ad84096bc19ce19a2400840"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b7dc0c4338e6b8b091e8faf0db3168a37101943e687f373dce00959583f7439"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:45e0896250900b5aa25180f9aec243e84e92ac84bd4a74d9ad4138ef3f5c97de"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:561eb1c9579d495fddb6da8959fd2a1fca2c6d060d4113f5844b433fc02f2641"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:df6e2f325bfee1f49f81aaac97d2aa757c7646534a06f8f577ce184afe2f0a9e"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:140228863501b44b809fb39ec56b5d4071f4d0aa6d216c19cbb08b8c5a7eadb9"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7707a25d6a77f5d27ea7dc7d1fc608aa0a478193823f88511ef5e6b8a48f9d03"}, + {file = "frozenlist-1.5.0-cp313-cp313-win32.whl", hash = "sha256:31a9ac2b38ab9b5a8933b693db4939764ad3f299fcaa931a3e605bc3460e693c"}, + {file = "frozenlist-1.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:11aabdd62b8b9c4b84081a3c246506d1cddd2dd93ff0ad53ede5defec7886b28"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:dd94994fc91a6177bfaafd7d9fd951bc8689b0a98168aa26b5f543868548d3ca"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0da8bbec082bf6bf18345b180958775363588678f64998c2b7609e34719b10"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73f2e31ea8dd7df61a359b731716018c2be196e5bb3b74ddba107f694fbd7604"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:828afae9f17e6de596825cf4228ff28fbdf6065974e5ac1410cecc22f699d2b3"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1577515d35ed5649d52ab4319db757bb881ce3b2b796d7283e6634d99ace307"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2150cc6305a2c2ab33299453e2968611dacb970d2283a14955923062c8d00b10"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a72b7a6e3cd2725eff67cd64c8f13335ee18fc3c7befc05aed043d24c7b9ccb9"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c16d2fa63e0800723139137d667e1056bee1a1cf7965153d2d104b62855e9b99"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:17dcc32fc7bda7ce5875435003220a457bcfa34ab7924a49a1c19f55b6ee185c"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:97160e245ea33d8609cd2b8fd997c850b56db147a304a262abc2b3be021a9171"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f1e6540b7fa044eee0bb5111ada694cf3dc15f2b0347ca125ee9ca984d5e9e6e"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:91d6c171862df0a6c61479d9724f22efb6109111017c87567cfeb7b5d1449fdf"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c1fac3e2ace2eb1052e9f7c7db480818371134410e1f5c55d65e8f3ac6d1407e"}, + {file = "frozenlist-1.5.0-cp38-cp38-win32.whl", hash = "sha256:b97f7b575ab4a8af9b7bc1d2ef7f29d3afee2226bd03ca3875c16451ad5a7723"}, + {file = "frozenlist-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:374ca2dabdccad8e2a76d40b1d037f5bd16824933bf7bcea3e59c891fd4a0923"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9bbcdfaf4af7ce002694a4e10a0159d5a8d20056a12b05b45cea944a4953f972"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1893f948bf6681733aaccf36c5232c231e3b5166d607c5fa77773611df6dc336"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2b5e23253bb709ef57a8e95e6ae48daa9ac5f265637529e4ce6b003a37b2621f"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f253985bb515ecd89629db13cb58d702035ecd8cfbca7d7a7e29a0e6d39af5f"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04a5c6babd5e8fb7d3c871dc8b321166b80e41b637c31a995ed844a6139942b6"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9fe0f1c29ba24ba6ff6abf688cb0b7cf1efab6b6aa6adc55441773c252f7411"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:226d72559fa19babe2ccd920273e767c96a49b9d3d38badd7c91a0fdeda8ea08"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15b731db116ab3aedec558573c1a5eec78822b32292fe4f2f0345b7f697745c2"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:366d8f93e3edfe5a918c874702f78faac300209a4d5bf38352b2c1bdc07a766d"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1b96af8c582b94d381a1c1f51ffaedeb77c821c690ea5f01da3d70a487dd0a9b"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c03eff4a41bd4e38415cbed054bbaff4a075b093e2394b6915dca34a40d1e38b"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:50cf5e7ee9b98f22bdecbabf3800ae78ddcc26e4a435515fc72d97903e8488e0"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1e76bfbc72353269c44e0bc2cfe171900fbf7f722ad74c9a7b638052afe6a00c"}, + {file = "frozenlist-1.5.0-cp39-cp39-win32.whl", hash = "sha256:666534d15ba8f0fda3f53969117383d5dc021266b3c1a42c9ec4855e4b58b9d3"}, + {file = "frozenlist-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:5c28f4b5dbef8a0d8aad0d4de24d1e9e981728628afaf4ea0792f5d0939372f0"}, + {file = "frozenlist-1.5.0-py3-none-any.whl", hash = "sha256:d994863bba198a4a518b467bb971c56e1db3f180a25c6cf7bb1949c267f748c3"}, + {file = "frozenlist-1.5.0.tar.gz", hash = "sha256:81d5af29e61b9c8348e876d442253723928dce6433e0e76cd925cd83f1b4b817"}, ] [[package]] name = "fsspec" -version = "2024.6.0" +version = "2024.10.0" description = "File-system specification" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2024.6.0-py3-none-any.whl", hash = "sha256:58d7122eb8a1a46f7f13453187bfea4972d66bf01618d37366521b1998034cee"}, - {file = "fsspec-2024.6.0.tar.gz", hash = "sha256:f579960a56e6d8038a9efc8f9c77279ec12e6299aa86b0769a7e9c46b94527c2"}, + {file = "fsspec-2024.10.0-py3-none-any.whl", hash = "sha256:03b9a6785766a4de40368b88906366755e2819e758b83705c88cd7cb5fe81871"}, + {file = "fsspec-2024.10.0.tar.gz", hash = "sha256:eda2d8a4116d4f2429db8550f2457da57279247dd930bb12f821b58391359493"}, ] [package.extras] @@ -1214,19 +1422,19 @@ tqdm = ["tqdm"] [[package]] name = "gcsfs" -version = "2024.6.0" +version = "2024.10.0" description = "Convenient Filesystem interface over GCS" optional = false python-versions = ">=3.8" files = [ - {file = "gcsfs-2024.6.0-py2.py3-none-any.whl", hash = "sha256:92c9239167bd1e209b662b6f4ab71974f276118779c55360215cce5e0098ca7f"}, - {file = "gcsfs-2024.6.0.tar.gz", hash = "sha256:27bd490d7a9dd641d5f6f4ea0b18fabdcfa6129b84ebdb22b23e3460ded1aa8c"}, + {file = "gcsfs-2024.10.0-py2.py3-none-any.whl", hash = "sha256:bb2d23547e61203ea2dda5fa6c4b91a0c34b74ebe8bb6ab1926f6c33381bceb2"}, + {file = "gcsfs-2024.10.0.tar.gz", hash = "sha256:5df54cfe568e8fdeea5aafa7fed695cdc69a9a674e991ca8c1ce634f5df1d314"}, ] [package.dependencies] aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" decorator = ">4.1.2" -fsspec = "2024.6.0" +fsspec = "2024.10.0" google-auth = ">=1.2" google-auth-oauthlib = "*" google-cloud-storage = "*" @@ -1301,13 +1509,13 @@ beautifulsoup4 = "*" [[package]] name = "google-api-core" -version = "2.19.0" +version = "2.22.0" description = "Google API client core library" optional = false python-versions = ">=3.7" files = [ - {file = "google-api-core-2.19.0.tar.gz", hash = "sha256:cf1b7c2694047886d2af1128a03ae99e391108a08804f87cfd35970e49c9cd10"}, - {file = "google_api_core-2.19.0-py3-none-any.whl", hash = "sha256:8661eec4078c35428fd3f69a2c7ee29e342896b70f01d1a1cbcb334372dd6251"}, + {file = "google_api_core-2.22.0-py3-none-any.whl", hash = "sha256:a6652b6bd51303902494998626653671703c420f6f4c88cfd3f50ed723e9d021"}, + {file = "google_api_core-2.22.0.tar.gz", hash = "sha256:26f8d76b96477db42b55fd02a33aae4a42ec8b86b98b94969b7333a2c828bf35"}, ] [package.dependencies] @@ -1316,23 +1524,24 @@ googleapis-common-protos = ">=1.56.2,<2.0.dev0" grpcio = {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""} grpcio-status = {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "extra == \"grpc\""} proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" requests = ">=2.18.0,<3.0.0.dev0" [package.extras] +async-rest = ["google-auth[aiohttp] (>=2.35.0,<3.0.dev0)"] grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-auth" -version = "2.30.0" +version = "2.35.0" description = "Google Authentication Library" optional = false python-versions = ">=3.7" files = [ - {file = "google-auth-2.30.0.tar.gz", hash = "sha256:ab630a1320f6720909ad76a7dbdb6841cdf5c66b328d690027e4867bdfb16688"}, - {file = "google_auth-2.30.0-py2.py3-none-any.whl", hash = "sha256:8df7da660f62757388b8a7f249df13549b3373f24388cb5d2f1dd91cc18180b5"}, + {file = "google_auth-2.35.0-py2.py3-none-any.whl", hash = "sha256:25df55f327ef021de8be50bad0dfd4a916ad0de96da86cd05661c9297723ad3f"}, + {file = "google_auth-2.35.0.tar.gz", hash = "sha256:f4c64ed4e01e8e8b646ef34c018f8bf3338df0c8e37d8b3bba40e7f574a3278a"}, ] [package.dependencies] @@ -1342,7 +1551,7 @@ rsa = ">=3.1.4,<5" [package.extras] aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"] -enterprise-cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"] +enterprise-cert = ["cryptography", "pyopenssl"] pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] reauth = ["pyu2f (>=0.1.5)"] requests = ["requests (>=2.20.0,<3.0.0.dev0)"] @@ -1367,30 +1576,30 @@ tool = ["click (>=6.0.0)"] [[package]] name = "google-cloud-bigquery" -version = "3.25.0" +version = "3.26.0" description = "Google BigQuery API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-bigquery-3.25.0.tar.gz", hash = "sha256:5b2aff3205a854481117436836ae1403f11f2594e6810a98886afd57eda28509"}, - {file = "google_cloud_bigquery-3.25.0-py2.py3-none-any.whl", hash = "sha256:7f0c371bc74d2a7fb74dacbc00ac0f90c8c2bec2289b51dd6685a275873b1ce9"}, + {file = "google_cloud_bigquery-3.26.0-py2.py3-none-any.whl", hash = "sha256:e0e9ad28afa67a18696e624cbccab284bf2c0a3f6eeb9eeb0426c69b943793a8"}, + {file = "google_cloud_bigquery-3.26.0.tar.gz", hash = "sha256:edbdc788beea659e04c0af7fe4dcd6d9155344b98951a0d5055bd2f15da4ba23"}, ] [package.dependencies] -google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} +google-api-core = {version = ">=2.11.1,<3.0.0dev", extras = ["grpc"]} google-auth = ">=2.14.1,<3.0.0dev" -google-cloud-core = ">=1.6.0,<3.0.0dev" -google-resumable-media = ">=0.6.0,<3.0dev" +google-cloud-core = ">=2.4.1,<3.0.0dev" +google-resumable-media = ">=2.0.0,<3.0dev" packaging = ">=20.0.0" -python-dateutil = ">=2.7.2,<3.0dev" +python-dateutil = ">=2.7.3,<3.0dev" requests = ">=2.21.0,<3.0.0dev" [package.extras] -all = ["Shapely (>=1.8.4,<3.0.0dev)", "db-dtypes (>=0.3.0,<2.0.0dev)", "geopandas (>=0.9.0,<1.0dev)", "google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "importlib-metadata (>=1.0.0)", "ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)", "ipywidgets (>=7.7.0)", "opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)", "pandas (>=1.1.0)", "proto-plus (>=1.15.0,<2.0.0dev)", "protobuf (>=3.19.5,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev)", "pyarrow (>=3.0.0)", "tqdm (>=4.7.4,<5.0.0dev)"] -bigquery-v2 = ["proto-plus (>=1.15.0,<2.0.0dev)", "protobuf (>=3.19.5,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev)"] +all = ["Shapely (>=1.8.4,<3.0.0dev)", "bigquery-magics (>=0.1.0)", "db-dtypes (>=0.3.0,<2.0.0dev)", "geopandas (>=0.9.0,<1.0dev)", "google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "importlib-metadata (>=1.0.0)", "ipykernel (>=6.0.0)", "ipywidgets (>=7.7.0)", "opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)", "pandas (>=1.1.0)", "proto-plus (>=1.22.3,<2.0.0dev)", "protobuf (>=3.20.2,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev)", "pyarrow (>=3.0.0)", "tqdm (>=4.7.4,<5.0.0dev)"] +bigquery-v2 = ["proto-plus (>=1.22.3,<2.0.0dev)", "protobuf (>=3.20.2,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev)"] bqstorage = ["google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "pyarrow (>=3.0.0)"] geopandas = ["Shapely (>=1.8.4,<3.0.0dev)", "geopandas (>=0.9.0,<1.0dev)"] -ipython = ["ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)"] +ipython = ["bigquery-magics (>=0.1.0)"] ipywidgets = ["ipykernel (>=6.0.0)", "ipywidgets (>=7.7.0)"] opentelemetry = ["opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)"] pandas = ["db-dtypes (>=0.3.0,<2.0.0dev)", "importlib-metadata (>=1.0.0)", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)"] @@ -1416,13 +1625,13 @@ grpc = ["grpcio (>=1.38.0,<2.0dev)", "grpcio-status (>=1.38.0,<2.0.dev0)"] [[package]] name = "google-cloud-dataproc" -version = "5.10.1" +version = "5.15.0" description = "Google Cloud Dataproc API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-dataproc-5.10.1.tar.gz", hash = "sha256:f3f0f0f3933328e80273774540368432550e296c255928657069a31a2de01c39"}, - {file = "google_cloud_dataproc-5.10.1-py2.py3-none-any.whl", hash = "sha256:28b763c9b019ca7d7c3e917ade04647c00494e77d4e682ca221d53e8d36f70af"}, + {file = "google_cloud_dataproc-5.15.0-py2.py3-none-any.whl", hash = "sha256:14dfcf327fa1c2ede3601fbbc1d559ace43682481aef42a182fb158af876c083"}, + {file = "google_cloud_dataproc-5.15.0.tar.gz", hash = "sha256:010e335368d0f47963643e323be03916d3e8556b772acbe50215fd54f156f91f"}, ] [package.dependencies] @@ -1434,13 +1643,13 @@ protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4 [[package]] name = "google-cloud-secret-manager" -version = "2.20.0" +version = "2.21.0" description = "Google Cloud Secret Manager API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-secret-manager-2.20.0.tar.gz", hash = "sha256:a086a7413aaf4fffbd1c4fe9229ef0ce9bcf48f5a8df5b449c4a32deb5a2cfde"}, - {file = "google_cloud_secret_manager-2.20.0-py2.py3-none-any.whl", hash = "sha256:c20bf22e59d220c51aa84a1db3411b14b83aa71f788fae8d273c03a4bf3e77ed"}, + {file = "google_cloud_secret_manager-2.21.0-py2.py3-none-any.whl", hash = "sha256:b7fed5c2f3be5e10d94053ea3a7c6a7c5813d38da39c678ef6c1137d6e25a310"}, + {file = "google_cloud_secret_manager-2.21.0.tar.gz", hash = "sha256:d1ae84ecf98cfc319c9a3f1012355cebd19317b662cc9dff1a2c36234580807b"}, ] [package.dependencies] @@ -1448,17 +1657,17 @@ google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extr google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" +protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev" [[package]] name = "google-cloud-storage" -version = "2.17.0" +version = "2.18.2" description = "Google Cloud Storage API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-storage-2.17.0.tar.gz", hash = "sha256:49378abff54ef656b52dca5ef0f2eba9aa83dc2b2c72c78714b03a1a95fe9388"}, - {file = "google_cloud_storage-2.17.0-py2.py3-none-any.whl", hash = "sha256:5b393bc766b7a3bc6f5407b9e665b2450d36282614b7945e570b3480a456d1e1"}, + {file = "google_cloud_storage-2.18.2-py2.py3-none-any.whl", hash = "sha256:97a4d45c368b7d401ed48c4fdfe86e1e1cb96401c9e199e419d289e2c0370166"}, + {file = "google_cloud_storage-2.18.2.tar.gz", hash = "sha256:aaf7acd70cdad9f274d29332673fcab98708d0e1f4dceb5a5356aaef06af4d99"}, ] [package.dependencies] @@ -1466,87 +1675,47 @@ google-api-core = ">=2.15.0,<3.0.0dev" google-auth = ">=2.26.1,<3.0dev" google-cloud-core = ">=2.3.0,<3.0dev" google-crc32c = ">=1.0,<2.0dev" -google-resumable-media = ">=2.6.0" +google-resumable-media = ">=2.7.2" requests = ">=2.18.0,<3.0.0dev" [package.extras] -protobuf = ["protobuf (<5.0.0dev)"] +protobuf = ["protobuf (<6.0.0dev)"] +tracing = ["opentelemetry-api (>=1.1.0)"] [[package]] name = "google-crc32c" -version = "1.5.0" +version = "1.6.0" description = "A python wrapper of the C library 'Google CRC32C'" optional = false -python-versions = ">=3.7" +python-versions = ">=3.9" files = [ - {file = "google-crc32c-1.5.0.tar.gz", hash = "sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7"}, - {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13"}, - {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346"}, - {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65"}, - {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b"}, - {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02"}, - {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4"}, - {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e"}, - {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c"}, - {file = "google_crc32c-1.5.0-cp310-cp310-win32.whl", hash = "sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee"}, - {file = "google_crc32c-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289"}, - {file = "google_crc32c-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273"}, - {file = "google_crc32c-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298"}, - {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57"}, - {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438"}, - {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906"}, - {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183"}, - {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd"}, - {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c"}, - {file = "google_crc32c-1.5.0-cp311-cp311-win32.whl", hash = "sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709"}, - {file = "google_crc32c-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-win32.whl", hash = "sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94"}, - {file = "google_crc32c-1.5.0-cp37-cp37m-win_amd64.whl", hash = "sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740"}, - {file = "google_crc32c-1.5.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8"}, - {file = "google_crc32c-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a"}, - {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946"}, - {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a"}, - {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d"}, - {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a"}, - {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37"}, - {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894"}, - {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a"}, - {file = "google_crc32c-1.5.0-cp38-cp38-win32.whl", hash = "sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4"}, - {file = "google_crc32c-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c"}, - {file = "google_crc32c-1.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7"}, - {file = "google_crc32c-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d"}, - {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100"}, - {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9"}, - {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57"}, - {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210"}, - {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd"}, - {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96"}, - {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61"}, - {file = "google_crc32c-1.5.0-cp39-cp39-win32.whl", hash = "sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c"}, - {file = "google_crc32c-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178"}, - {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462"}, - {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31"}, - {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93"}, + {file = "google_crc32c-1.6.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa"}, + {file = "google_crc32c-1.6.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9"}, + {file = "google_crc32c-1.6.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7"}, + {file = "google_crc32c-1.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e"}, + {file = "google_crc32c-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc"}, + {file = "google_crc32c-1.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42"}, + {file = "google_crc32c-1.6.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4"}, + {file = "google_crc32c-1.6.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8"}, + {file = "google_crc32c-1.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d"}, + {file = "google_crc32c-1.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f"}, + {file = "google_crc32c-1.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3"}, + {file = "google_crc32c-1.6.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d"}, + {file = "google_crc32c-1.6.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b"}, + {file = "google_crc32c-1.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00"}, + {file = "google_crc32c-1.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3"}, + {file = "google_crc32c-1.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760"}, + {file = "google_crc32c-1.6.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205"}, + {file = "google_crc32c-1.6.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0"}, + {file = "google_crc32c-1.6.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2"}, + {file = "google_crc32c-1.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871"}, + {file = "google_crc32c-1.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57"}, + {file = "google_crc32c-1.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c"}, + {file = "google_crc32c-1.6.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc"}, + {file = "google_crc32c-1.6.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d"}, + {file = "google_crc32c-1.6.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24"}, + {file = "google_crc32c-1.6.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d"}, + {file = "google_crc32c-1.6.0.tar.gz", hash = "sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc"}, ] [package.extras] @@ -1554,13 +1723,13 @@ testing = ["pytest"] [[package]] name = "google-resumable-media" -version = "2.7.1" +version = "2.7.2" description = "Utilities for Google Media Downloads and Resumable Uploads" optional = false python-versions = ">=3.7" files = [ - {file = "google-resumable-media-2.7.1.tar.gz", hash = "sha256:eae451a7b2e2cdbaaa0fd2eb00cc8a1ee5e95e16b55597359cbc3d27d7d90e33"}, - {file = "google_resumable_media-2.7.1-py2.py3-none-any.whl", hash = "sha256:103ebc4ba331ab1bfdac0250f8033627a2cd7cde09e7ccff9181e31ba4315b2c"}, + {file = "google_resumable_media-2.7.2-py2.py3-none-any.whl", hash = "sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa"}, + {file = "google_resumable_media-2.7.2.tar.gz", hash = "sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0"}, ] [package.dependencies] @@ -1572,31 +1741,31 @@ requests = ["requests (>=2.18.0,<3.0.0dev)"] [[package]] name = "googleapis-common-protos" -version = "1.63.1" +version = "1.65.0" description = "Common protobufs used in Google APIs" optional = false python-versions = ">=3.7" files = [ - {file = "googleapis-common-protos-1.63.1.tar.gz", hash = "sha256:c6442f7a0a6b2a80369457d79e6672bb7dcbaab88e0848302497e3ec80780a6a"}, - {file = "googleapis_common_protos-1.63.1-py2.py3-none-any.whl", hash = "sha256:0e1c2cdfcbc354b76e4a211a35ea35d6926a835cba1377073c4861db904a1877"}, + {file = "googleapis_common_protos-1.65.0-py2.py3-none-any.whl", hash = "sha256:2972e6c496f435b92590fd54045060867f3fe9be2c82ab148fc8885035479a63"}, + {file = "googleapis_common_protos-1.65.0.tar.gz", hash = "sha256:334a29d07cddc3aa01dee4988f9afd9b2916ee2ff49d6b757155dc0d197852c0"}, ] [package.dependencies] grpcio = {version = ">=1.44.0,<2.0.0.dev0", optional = true, markers = "extra == \"grpc\""} -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" +protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" [package.extras] grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] [[package]] name = "griffe" -version = "1.2.0" +version = "1.5.1" description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "griffe-1.2.0-py3-none-any.whl", hash = "sha256:a8b2fcb1ecdc5a412e646b0b4375eb20a5d2eac3a11dd8c10c56967a4097663c"}, - {file = "griffe-1.2.0.tar.gz", hash = "sha256:1c9f6ef7455930f3f9b0c4145a961c90385d1e2cbc496f7796fbff560ec60d31"}, + {file = "griffe-1.5.1-py3-none-any.whl", hash = "sha256:ad6a7980f8c424c9102160aafa3bcdf799df0e75f7829d75af9ee5aef656f860"}, + {file = "griffe-1.5.1.tar.gz", hash = "sha256:72964f93e08c553257706d6cd2c42d1c172213feb48b2be386f243380b405d4b"}, ] [package.dependencies] @@ -1604,77 +1773,86 @@ colorama = ">=0.4" [[package]] name = "grpc-google-iam-v1" -version = "0.13.0" +version = "0.13.1" description = "IAM API client library" optional = false python-versions = ">=3.7" files = [ - {file = "grpc-google-iam-v1-0.13.0.tar.gz", hash = "sha256:fad318608b9e093258fbf12529180f400d1c44453698a33509cc6ecf005b294e"}, - {file = "grpc_google_iam_v1-0.13.0-py2.py3-none-any.whl", hash = "sha256:53902e2af7de8df8c1bd91373d9be55b0743ec267a7428ea638db3775becae89"}, + {file = "grpc-google-iam-v1-0.13.1.tar.gz", hash = "sha256:3ff4b2fd9d990965e410965253c0da6f66205d5a8291c4c31c6ebecca18a9001"}, + {file = "grpc_google_iam_v1-0.13.1-py2.py3-none-any.whl", hash = "sha256:c3e86151a981811f30d5e7330f271cee53e73bb87755e88cc3b6f0c7b5fe374e"}, ] [package.dependencies] googleapis-common-protos = {version = ">=1.56.0,<2.0.0dev", extras = ["grpc"]} grpcio = ">=1.44.0,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" +protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev" [[package]] name = "grpcio" -version = "1.64.1" +version = "1.67.1" description = "HTTP/2-based RPC framework" optional = false python-versions = ">=3.8" files = [ - {file = "grpcio-1.64.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:55697ecec192bc3f2f3cc13a295ab670f51de29884ca9ae6cd6247df55df2502"}, - {file = "grpcio-1.64.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:3b64ae304c175671efdaa7ec9ae2cc36996b681eb63ca39c464958396697daff"}, - {file = "grpcio-1.64.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:bac71b4b28bc9af61efcdc7630b166440bbfbaa80940c9a697271b5e1dabbc61"}, - {file = "grpcio-1.64.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c024ffc22d6dc59000faf8ad781696d81e8e38f4078cb0f2630b4a3cf231a90"}, - {file = "grpcio-1.64.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7cd5c1325f6808b8ae31657d281aadb2a51ac11ab081ae335f4f7fc44c1721d"}, - {file = "grpcio-1.64.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:0a2813093ddb27418a4c99f9b1c223fab0b053157176a64cc9db0f4557b69bd9"}, - {file = "grpcio-1.64.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2981c7365a9353f9b5c864595c510c983251b1ab403e05b1ccc70a3d9541a73b"}, - {file = "grpcio-1.64.1-cp310-cp310-win32.whl", hash = "sha256:1262402af5a511c245c3ae918167eca57342c72320dffae5d9b51840c4b2f86d"}, - {file = "grpcio-1.64.1-cp310-cp310-win_amd64.whl", hash = "sha256:19264fc964576ddb065368cae953f8d0514ecc6cb3da8903766d9fb9d4554c33"}, - {file = "grpcio-1.64.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:58b1041e7c870bb30ee41d3090cbd6f0851f30ae4eb68228955d973d3efa2e61"}, - {file = "grpcio-1.64.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:bbc5b1d78a7822b0a84c6f8917faa986c1a744e65d762ef6d8be9d75677af2ca"}, - {file = "grpcio-1.64.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:5841dd1f284bd1b3d8a6eca3a7f062b06f1eec09b184397e1d1d43447e89a7ae"}, - {file = "grpcio-1.64.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8caee47e970b92b3dd948371230fcceb80d3f2277b3bf7fbd7c0564e7d39068e"}, - {file = "grpcio-1.64.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73819689c169417a4f978e562d24f2def2be75739c4bed1992435d007819da1b"}, - {file = "grpcio-1.64.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6503b64c8b2dfad299749cad1b595c650c91e5b2c8a1b775380fcf8d2cbba1e9"}, - {file = "grpcio-1.64.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1de403fc1305fd96cfa75e83be3dee8538f2413a6b1685b8452301c7ba33c294"}, - {file = "grpcio-1.64.1-cp311-cp311-win32.whl", hash = "sha256:d4d29cc612e1332237877dfa7fe687157973aab1d63bd0f84cf06692f04c0367"}, - {file = "grpcio-1.64.1-cp311-cp311-win_amd64.whl", hash = "sha256:5e56462b05a6f860b72f0fa50dca06d5b26543a4e88d0396259a07dc30f4e5aa"}, - {file = "grpcio-1.64.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:4657d24c8063e6095f850b68f2d1ba3b39f2b287a38242dcabc166453e950c59"}, - {file = "grpcio-1.64.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:62b4e6eb7bf901719fce0ca83e3ed474ae5022bb3827b0a501e056458c51c0a1"}, - {file = "grpcio-1.64.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:ee73a2f5ca4ba44fa33b4d7d2c71e2c8a9e9f78d53f6507ad68e7d2ad5f64a22"}, - {file = "grpcio-1.64.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:198908f9b22e2672a998870355e226a725aeab327ac4e6ff3a1399792ece4762"}, - {file = "grpcio-1.64.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b9d0acaa8d835a6566c640f48b50054f422d03e77e49716d4c4e8e279665a1"}, - {file = "grpcio-1.64.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:5e42634a989c3aa6049f132266faf6b949ec2a6f7d302dbb5c15395b77d757eb"}, - {file = "grpcio-1.64.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b1a82e0b9b3022799c336e1fc0f6210adc019ae84efb7321d668129d28ee1efb"}, - {file = "grpcio-1.64.1-cp312-cp312-win32.whl", hash = "sha256:55260032b95c49bee69a423c2f5365baa9369d2f7d233e933564d8a47b893027"}, - {file = "grpcio-1.64.1-cp312-cp312-win_amd64.whl", hash = "sha256:c1a786ac592b47573a5bb7e35665c08064a5d77ab88a076eec11f8ae86b3e3f6"}, - {file = "grpcio-1.64.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:a011ac6c03cfe162ff2b727bcb530567826cec85eb8d4ad2bfb4bd023287a52d"}, - {file = "grpcio-1.64.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:4d6dab6124225496010bd22690f2d9bd35c7cbb267b3f14e7a3eb05c911325d4"}, - {file = "grpcio-1.64.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:a5e771d0252e871ce194d0fdcafd13971f1aae0ddacc5f25615030d5df55c3a2"}, - {file = "grpcio-1.64.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c3c1b90ab93fed424e454e93c0ed0b9d552bdf1b0929712b094f5ecfe7a23ad"}, - {file = "grpcio-1.64.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20405cb8b13fd779135df23fabadc53b86522d0f1cba8cca0e87968587f50650"}, - {file = "grpcio-1.64.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0cc79c982ccb2feec8aad0e8fb0d168bcbca85bc77b080d0d3c5f2f15c24ea8f"}, - {file = "grpcio-1.64.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a3a035c37ce7565b8f4f35ff683a4db34d24e53dc487e47438e434eb3f701b2a"}, - {file = "grpcio-1.64.1-cp38-cp38-win32.whl", hash = "sha256:1257b76748612aca0f89beec7fa0615727fd6f2a1ad580a9638816a4b2eb18fd"}, - {file = "grpcio-1.64.1-cp38-cp38-win_amd64.whl", hash = "sha256:0a12ddb1678ebc6a84ec6b0487feac020ee2b1659cbe69b80f06dbffdb249122"}, - {file = "grpcio-1.64.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:75dbbf415026d2862192fe1b28d71f209e2fd87079d98470db90bebe57b33179"}, - {file = "grpcio-1.64.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e3d9f8d1221baa0ced7ec7322a981e28deb23749c76eeeb3d33e18b72935ab62"}, - {file = "grpcio-1.64.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:5f8b75f64d5d324c565b263c67dbe4f0af595635bbdd93bb1a88189fc62ed2e5"}, - {file = "grpcio-1.64.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c84ad903d0d94311a2b7eea608da163dace97c5fe9412ea311e72c3684925602"}, - {file = "grpcio-1.64.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:940e3ec884520155f68a3b712d045e077d61c520a195d1a5932c531f11883489"}, - {file = "grpcio-1.64.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f10193c69fc9d3d726e83bbf0f3d316f1847c3071c8c93d8090cf5f326b14309"}, - {file = "grpcio-1.64.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ac15b6c2c80a4d1338b04d42a02d376a53395ddf0ec9ab157cbaf44191f3ffdd"}, - {file = "grpcio-1.64.1-cp39-cp39-win32.whl", hash = "sha256:03b43d0ccf99c557ec671c7dede64f023c7da9bb632ac65dbc57f166e4970040"}, - {file = "grpcio-1.64.1-cp39-cp39-win_amd64.whl", hash = "sha256:ed6091fa0adcc7e4ff944090cf203a52da35c37a130efa564ded02b7aff63bcd"}, - {file = "grpcio-1.64.1.tar.gz", hash = "sha256:8d51dd1c59d5fa0f34266b80a3805ec29a1f26425c2a54736133f6d87fc4968a"}, + {file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"}, + {file = "grpcio-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:f5a27dddefe0e2357d3e617b9079b4bfdc91341a91565111a21ed6ebbc51b22d"}, + {file = "grpcio-1.67.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:43112046864317498a33bdc4797ae6a268c36345a910de9b9c17159d8346602f"}, + {file = "grpcio-1.67.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9b929f13677b10f63124c1a410994a401cdd85214ad83ab67cc077fc7e480f0"}, + {file = "grpcio-1.67.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7d1797a8a3845437d327145959a2c0c47c05947c9eef5ff1a4c80e499dcc6fa"}, + {file = "grpcio-1.67.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:0489063974d1452436139501bf6b180f63d4977223ee87488fe36858c5725292"}, + {file = "grpcio-1.67.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9fd042de4a82e3e7aca44008ee2fb5da01b3e5adb316348c21980f7f58adc311"}, + {file = "grpcio-1.67.1-cp310-cp310-win32.whl", hash = "sha256:638354e698fd0c6c76b04540a850bf1db27b4d2515a19fcd5cf645c48d3eb1ed"}, + {file = "grpcio-1.67.1-cp310-cp310-win_amd64.whl", hash = "sha256:608d87d1bdabf9e2868b12338cd38a79969eaf920c89d698ead08f48de9c0f9e"}, + {file = "grpcio-1.67.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:7818c0454027ae3384235a65210bbf5464bd715450e30a3d40385453a85a70cb"}, + {file = "grpcio-1.67.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ea33986b70f83844cd00814cee4451055cd8cab36f00ac64a31f5bb09b31919e"}, + {file = "grpcio-1.67.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:c7a01337407dd89005527623a4a72c5c8e2894d22bead0895306b23c6695698f"}, + {file = "grpcio-1.67.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80b866f73224b0634f4312a4674c1be21b2b4afa73cb20953cbbb73a6b36c3cc"}, + {file = "grpcio-1.67.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9fff78ba10d4250bfc07a01bd6254a6d87dc67f9627adece85c0b2ed754fa96"}, + {file = "grpcio-1.67.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8a23cbcc5bb11ea7dc6163078be36c065db68d915c24f5faa4f872c573bb400f"}, + {file = "grpcio-1.67.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1a65b503d008f066e994f34f456e0647e5ceb34cfcec5ad180b1b44020ad4970"}, + {file = "grpcio-1.67.1-cp311-cp311-win32.whl", hash = "sha256:e29ca27bec8e163dca0c98084040edec3bc49afd10f18b412f483cc68c712744"}, + {file = "grpcio-1.67.1-cp311-cp311-win_amd64.whl", hash = "sha256:786a5b18544622bfb1e25cc08402bd44ea83edfb04b93798d85dca4d1a0b5be5"}, + {file = "grpcio-1.67.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:267d1745894200e4c604958da5f856da6293f063327cb049a51fe67348e4f953"}, + {file = "grpcio-1.67.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:85f69fdc1d28ce7cff8de3f9c67db2b0ca9ba4449644488c1e0303c146135ddb"}, + {file = "grpcio-1.67.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:f26b0b547eb8d00e195274cdfc63ce64c8fc2d3e2d00b12bf468ece41a0423a0"}, + {file = "grpcio-1.67.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4422581cdc628f77302270ff839a44f4c24fdc57887dc2a45b7e53d8fc2376af"}, + {file = "grpcio-1.67.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d7616d2ded471231c701489190379e0c311ee0a6c756f3c03e6a62b95a7146e"}, + {file = "grpcio-1.67.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8a00efecde9d6fcc3ab00c13f816313c040a28450e5e25739c24f432fc6d3c75"}, + {file = "grpcio-1.67.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:699e964923b70f3101393710793289e42845791ea07565654ada0969522d0a38"}, + {file = "grpcio-1.67.1-cp312-cp312-win32.whl", hash = "sha256:4e7b904484a634a0fff132958dabdb10d63e0927398273917da3ee103e8d1f78"}, + {file = "grpcio-1.67.1-cp312-cp312-win_amd64.whl", hash = "sha256:5721e66a594a6c4204458004852719b38f3d5522082be9061d6510b455c90afc"}, + {file = "grpcio-1.67.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:aa0162e56fd10a5547fac8774c4899fc3e18c1aa4a4759d0ce2cd00d3696ea6b"}, + {file = "grpcio-1.67.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:beee96c8c0b1a75d556fe57b92b58b4347c77a65781ee2ac749d550f2a365dc1"}, + {file = "grpcio-1.67.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:a93deda571a1bf94ec1f6fcda2872dad3ae538700d94dc283c672a3b508ba3af"}, + {file = "grpcio-1.67.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e6f255980afef598a9e64a24efce87b625e3e3c80a45162d111a461a9f92955"}, + {file = "grpcio-1.67.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e838cad2176ebd5d4a8bb03955138d6589ce9e2ce5d51c3ada34396dbd2dba8"}, + {file = "grpcio-1.67.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:a6703916c43b1d468d0756c8077b12017a9fcb6a1ef13faf49e67d20d7ebda62"}, + {file = "grpcio-1.67.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:917e8d8994eed1d86b907ba2a61b9f0aef27a2155bca6cbb322430fc7135b7bb"}, + {file = "grpcio-1.67.1-cp313-cp313-win32.whl", hash = "sha256:e279330bef1744040db8fc432becc8a727b84f456ab62b744d3fdb83f327e121"}, + {file = "grpcio-1.67.1-cp313-cp313-win_amd64.whl", hash = "sha256:fa0c739ad8b1996bd24823950e3cb5152ae91fca1c09cc791190bf1627ffefba"}, + {file = "grpcio-1.67.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:178f5db771c4f9a9facb2ab37a434c46cb9be1a75e820f187ee3d1e7805c4f65"}, + {file = "grpcio-1.67.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0f3e49c738396e93b7ba9016e153eb09e0778e776df6090c1b8c91877cc1c426"}, + {file = "grpcio-1.67.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:24e8a26dbfc5274d7474c27759b54486b8de23c709d76695237515bc8b5baeab"}, + {file = "grpcio-1.67.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b6c16489326d79ead41689c4b84bc40d522c9a7617219f4ad94bc7f448c5085"}, + {file = "grpcio-1.67.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e6a4dcf5af7bbc36fd9f81c9f372e8ae580870a9e4b6eafe948cd334b81cf3"}, + {file = "grpcio-1.67.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:95b5f2b857856ed78d72da93cd7d09b6db8ef30102e5e7fe0961fe4d9f7d48e8"}, + {file = "grpcio-1.67.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b49359977c6ec9f5d0573ea4e0071ad278ef905aa74e420acc73fd28ce39e9ce"}, + {file = "grpcio-1.67.1-cp38-cp38-win32.whl", hash = "sha256:f5b76ff64aaac53fede0cc93abf57894ab2a7362986ba22243d06218b93efe46"}, + {file = "grpcio-1.67.1-cp38-cp38-win_amd64.whl", hash = "sha256:804c6457c3cd3ec04fe6006c739579b8d35c86ae3298ffca8de57b493524b771"}, + {file = "grpcio-1.67.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:a25bdea92b13ff4d7790962190bf6bf5c4639876e01c0f3dda70fc2769616335"}, + {file = "grpcio-1.67.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cdc491ae35a13535fd9196acb5afe1af37c8237df2e54427be3eecda3653127e"}, + {file = "grpcio-1.67.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:85f862069b86a305497e74d0dc43c02de3d1d184fc2c180993aa8aa86fbd19b8"}, + {file = "grpcio-1.67.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ec74ef02010186185de82cc594058a3ccd8d86821842bbac9873fd4a2cf8be8d"}, + {file = "grpcio-1.67.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01f616a964e540638af5130469451cf580ba8c7329f45ca998ab66e0c7dcdb04"}, + {file = "grpcio-1.67.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:299b3d8c4f790c6bcca485f9963b4846dd92cf6f1b65d3697145d005c80f9fe8"}, + {file = "grpcio-1.67.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:60336bff760fbb47d7e86165408126f1dded184448e9a4c892189eb7c9d3f90f"}, + {file = "grpcio-1.67.1-cp39-cp39-win32.whl", hash = "sha256:5ed601c4c6008429e3d247ddb367fe8c7259c355757448d7c1ef7bd4a6739e8e"}, + {file = "grpcio-1.67.1-cp39-cp39-win_amd64.whl", hash = "sha256:5db70d32d6703b89912af16d6d45d78406374a8b8ef0d28140351dd0ec610e98"}, + {file = "grpcio-1.67.1.tar.gz", hash = "sha256:3dc2ed4cabea4dc14d5e708c2b426205956077cc5de419b4d4079315017e9732"}, ] [package.extras] -protobuf = ["grpcio-tools (>=1.64.1)"] +protobuf = ["grpcio-tools (>=1.67.1)"] [[package]] name = "grpcio-status" @@ -1741,13 +1919,13 @@ uvloop = {version = ">=0.19.0,<1", markers = "sys_platform != \"win32\""} [[package]] name = "huggingface-hub" -version = "0.23.4" +version = "0.26.2" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" files = [ - {file = "huggingface_hub-0.23.4-py3-none-any.whl", hash = "sha256:3a0b957aa87150addf0cc7bd71b4d954b78e749850e1e7fb29ebbd2db64ca037"}, - {file = "huggingface_hub-0.23.4.tar.gz", hash = "sha256:35d99016433900e44ae7efe1c209164a5a81dbbcd53a52f99c281dcd7ce22431"}, + {file = "huggingface_hub-0.26.2-py3-none-any.whl", hash = "sha256:98c2a5a8e786c7b2cb6fdeb2740893cba4d53e312572ed3d8afafda65b128c46"}, + {file = "huggingface_hub-0.26.2.tar.gz", hash = "sha256:b100d853465d965733964d123939ba287da60a547087783ddff8a323f340332b"}, ] [package.dependencies] @@ -1760,17 +1938,17 @@ tqdm = ">=4.42.1" typing-extensions = ">=3.7.4.3" [package.extras] -all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.5.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] cli = ["InquirerPy (==0.3.4)"] -dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.5.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] hf-transfer = ["hf-transfer (>=0.1.4)"] -inference = ["aiohttp", "minijinja (>=1.0)"] -quality = ["mypy (==1.5.1)", "ruff (>=0.3.0)"] +inference = ["aiohttp"] +quality = ["libcst (==1.4.0)", "mypy (==1.5.1)", "ruff (>=0.5.0)"] tensorflow = ["graphviz", "pydot", "tensorflow"] tensorflow-testing = ["keras (<3.0)", "tensorflow"] -testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] -torch = ["safetensors", "torch"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +torch = ["safetensors[torch]", "torch"] typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"] [[package]] @@ -1805,13 +1983,13 @@ packaging = "*" [[package]] name = "identify" -version = "2.5.36" +version = "2.6.1" description = "File identification library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "identify-2.5.36-py2.py3-none-any.whl", hash = "sha256:37d93f380f4de590500d9dba7db359d0d3da95ffe7f9de1753faa159e71e7dfa"}, - {file = "identify-2.5.36.tar.gz", hash = "sha256:e5e00f54165f9047fbebeb4a560f9acfb8af4c88232be60a488e9b68d122745d"}, + {file = "identify-2.6.1-py2.py3-none-any.whl", hash = "sha256:53863bcac7caf8d2ed85bd20312ea5dcfc22226800f6d6881f232d861db5a8f0"}, + {file = "identify-2.6.1.tar.gz", hash = "sha256:91478c5fb7c3aac5ff7bf9b4344f803843dc586832d5f110d672b19aa1984c98"}, ] [package.extras] @@ -1819,15 +1997,18 @@ license = ["ukkonen"] [[package]] name = "idna" -version = "3.7" +version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false -python-versions = ">=3.5" +python-versions = ">=3.6" files = [ - {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, - {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, + {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, + {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, ] +[package.extras] +all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] + [[package]] name = "iniconfig" version = "2.0.0" @@ -1866,13 +2047,13 @@ tests = ["coverage[toml]", "pytest", "pytest-cov", "pytest-mock"] [[package]] name = "ipykernel" -version = "6.29.4" +version = "6.29.5" description = "IPython Kernel for Jupyter" optional = false python-versions = ">=3.8" files = [ - {file = "ipykernel-6.29.4-py3-none-any.whl", hash = "sha256:1181e653d95c6808039c509ef8e67c4126b3b3af7781496c7cbfb5ed938a27da"}, - {file = "ipykernel-6.29.4.tar.gz", hash = "sha256:3d44070060f9475ac2092b760123fadf105d2e2493c24848b6691a7c4f42af5c"}, + {file = "ipykernel-6.29.5-py3-none-any.whl", hash = "sha256:afdb66ba5aa354b09b91379bac28ae4afebbb30e8b39510c9690afb7a10421b5"}, + {file = "ipykernel-6.29.5.tar.gz", hash = "sha256:f093a22c4a40f8828f8e330a9c297cb93dcab13bd9678ded6de8e5cf81c56215"}, ] [package.dependencies] @@ -1937,18 +2118,15 @@ test-extra = ["curio", "ipython[test]", "matplotlib (!=3.2.0)", "nbformat", "num [[package]] name = "isodate" -version = "0.6.1" +version = "0.7.2" description = "An ISO 8601 date/time/duration parser and formatter" optional = false -python-versions = "*" +python-versions = ">=3.7" files = [ - {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, - {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, + {file = "isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15"}, + {file = "isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6"}, ] -[package.dependencies] -six = "*" - [[package]] name = "isort" version = "5.13.2" @@ -2037,13 +2215,13 @@ files = [ [[package]] name = "jproperties" -version = "2.1.1" +version = "2.1.2" description = "Java Property file parser and writer for Python" optional = false -python-versions = "*" +python-versions = ">=2.7" files = [ - {file = "jproperties-2.1.1-py2.py3-none-any.whl", hash = "sha256:4dfcd7cab56d9c79bce4453f7ca9ffbe0ff0574ddcf1c2a99a8646df60634664"}, - {file = "jproperties-2.1.1.tar.gz", hash = "sha256:40b71124e8d257e8954899a91cd2d5c0f72e0f67f1b72048a5ba264567604f29"}, + {file = "jproperties-2.1.2-py2.py3-none-any.whl", hash = "sha256:4108e868353a9f4a12bb86a92df5462d0e18d00119169533972ce473029be79a"}, + {file = "jproperties-2.1.2.tar.gz", hash = "sha256:036fcd52c10a8a1c21e6fa2a1c292c93892e759b76490acc4809213a36ddc329"}, ] [package.dependencies] @@ -2051,13 +2229,13 @@ six = ">=1.13,<2.0" [[package]] name = "jupyter-client" -version = "8.6.2" +version = "8.6.3" description = "Jupyter protocol implementation and client libraries" optional = false python-versions = ">=3.8" files = [ - {file = "jupyter_client-8.6.2-py3-none-any.whl", hash = "sha256:50cbc5c66fd1b8f65ecb66bc490ab73217993632809b6e505687de18e9dea39f"}, - {file = "jupyter_client-8.6.2.tar.gz", hash = "sha256:2bda14d55ee5ba58552a8c53ae43d215ad9868853489213f37da060ced54d8df"}, + {file = "jupyter_client-8.6.3-py3-none-any.whl", hash = "sha256:e8a19cc986cc45905ac3362915f410f3af85424b4c0905e94fa5f2cb08e8f23f"}, + {file = "jupyter_client-8.6.3.tar.gz", hash = "sha256:35b3a0947c4a6e9d589eb97d7d4cd5e90f910ee73101611f01283732bd6d9419"}, ] [package.dependencies] @@ -2091,6 +2269,159 @@ traitlets = ">=5.3" docs = ["myst-parser", "pydata-sphinx-theme", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "traitlets"] test = ["ipykernel", "pre-commit", "pytest (<8)", "pytest-cov", "pytest-timeout"] +[[package]] +name = "kiwisolver" +version = "1.4.7" +description = "A fast implementation of the Cassowary constraint solver" +optional = false +python-versions = ">=3.8" +files = [ + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8a9c83f75223d5e48b0bc9cb1bf2776cf01563e00ade8775ffe13b0b6e1af3a6"}, + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58370b1ffbd35407444d57057b57da5d6549d2d854fa30249771775c63b5fe17"}, + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aa0abdf853e09aff551db11fce173e2177d00786c688203f52c87ad7fcd91ef9"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8d53103597a252fb3ab8b5845af04c7a26d5e7ea8122303dd7a021176a87e8b9"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:88f17c5ffa8e9462fb79f62746428dd57b46eb931698e42e990ad63103f35e6c"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88a9ca9c710d598fd75ee5de59d5bda2684d9db36a9f50b6125eaea3969c2599"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4d742cb7af1c28303a51b7a27aaee540e71bb8e24f68c736f6f2ffc82f2bf05"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28c7fea2196bf4c2f8d46a0415c77a1c480cc0724722f23d7410ffe9842c407"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e968b84db54f9d42046cf154e02911e39c0435c9801681e3fc9ce8a3c4130278"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0c18ec74c0472de033e1bebb2911c3c310eef5649133dd0bedf2a169a1b269e5"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8f0ea6da6d393d8b2e187e6a5e3fb81f5862010a40c3945e2c6d12ae45cfb2ad"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:f106407dda69ae456dd1227966bf445b157ccc80ba0dff3802bb63f30b74e895"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:84ec80df401cfee1457063732d90022f93951944b5b58975d34ab56bb150dfb3"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win32.whl", hash = "sha256:71bb308552200fb2c195e35ef05de12f0c878c07fc91c270eb3d6e41698c3bcc"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win_amd64.whl", hash = "sha256:44756f9fd339de0fb6ee4f8c1696cfd19b2422e0d70b4cefc1cc7f1f64045a8c"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win_arm64.whl", hash = "sha256:78a42513018c41c2ffd262eb676442315cbfe3c44eed82385c2ed043bc63210a"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d2b0e12a42fb4e72d509fc994713d099cbb15ebf1103545e8a45f14da2dfca54"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2a8781ac3edc42ea4b90bc23e7d37b665d89423818e26eb6df90698aa2287c95"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:46707a10836894b559e04b0fd143e343945c97fd170d69a2d26d640b4e297935"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef97b8df011141c9b0f6caf23b29379f87dd13183c978a30a3c546d2c47314cb"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab58c12a2cd0fc769089e6d38466c46d7f76aced0a1f54c77652446733d2d02"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:803b8e1459341c1bb56d1c5c010406d5edec8a0713a0945851290a7930679b51"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9a9e8a507420fe35992ee9ecb302dab68550dedc0da9e2880dd88071c5fb052"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18077b53dc3bb490e330669a99920c5e6a496889ae8c63b58fbc57c3d7f33a18"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6af936f79086a89b3680a280c47ea90b4df7047b5bdf3aa5c524bbedddb9e545"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3abc5b19d24af4b77d1598a585b8a719beb8569a71568b66f4ebe1fb0449460b"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:933d4de052939d90afbe6e9d5273ae05fb836cc86c15b686edd4b3560cc0ee36"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:65e720d2ab2b53f1f72fb5da5fb477455905ce2c88aaa671ff0a447c2c80e8e3"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3bf1ed55088f214ba6427484c59553123fdd9b218a42bbc8c6496d6754b1e523"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win32.whl", hash = "sha256:4c00336b9dd5ad96d0a558fd18a8b6f711b7449acce4c157e7343ba92dd0cf3d"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:929e294c1ac1e9f615c62a4e4313ca1823ba37326c164ec720a803287c4c499b"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:e33e8fbd440c917106b237ef1a2f1449dfbb9b6f6e1ce17c94cd6a1e0d438376"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:5360cc32706dab3931f738d3079652d20982511f7c0ac5711483e6eab08efff2"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942216596dc64ddb25adb215c3c783215b23626f8d84e8eff8d6d45c3f29f75a"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:48b571ecd8bae15702e4f22d3ff6a0f13e54d3d00cd25216d5e7f658242065ee"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad42ba922c67c5f219097b28fae965e10045ddf145d2928bfac2eb2e17673640"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:612a10bdae23404a72941a0fc8fa2660c6ea1217c4ce0dbcab8a8f6543ea9e7f"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e838bba3a3bac0fe06d849d29772eb1afb9745a59710762e4ba3f4cb8424483"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:22f499f6157236c19f4bbbd472fa55b063db77a16cd74d49afe28992dff8c258"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693902d433cf585133699972b6d7c42a8b9f8f826ebcaf0132ff55200afc599e"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4e77f2126c3e0b0d055f44513ed349038ac180371ed9b52fe96a32aa071a5107"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:657a05857bda581c3656bfc3b20e353c232e9193eb167766ad2dc58b56504948"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4bfa75a048c056a411f9705856abfc872558e33c055d80af6a380e3658766038"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:34ea1de54beef1c104422d210c47c7d2a4999bdecf42c7b5718fbe59a4cac383"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:90da3b5f694b85231cf93586dad5e90e2d71b9428f9aad96952c99055582f520"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win32.whl", hash = "sha256:18e0cca3e008e17fe9b164b55735a325140a5a35faad8de92dd80265cd5eb80b"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:58cb20602b18f86f83a5c87d3ee1c766a79c0d452f8def86d925e6c60fbf7bfb"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:f5a8b53bdc0b3961f8b6125e198617c40aeed638b387913bf1ce78afb1b0be2a"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2e6039dcbe79a8e0f044f1c39db1986a1b8071051efba3ee4d74f5b365f5226e"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a1ecf0ac1c518487d9d23b1cd7139a6a65bc460cd101ab01f1be82ecf09794b6"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7ab9ccab2b5bd5702ab0803676a580fffa2aa178c2badc5557a84cc943fcf750"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f816dd2277f8d63d79f9c8473a79fe54047bc0467754962840782c575522224d"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf8bcc23ceb5a1b624572a1623b9f79d2c3b337c8c455405ef231933a10da379"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dea0bf229319828467d7fca8c7c189780aa9ff679c94539eed7532ebe33ed37c"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c06a4c7cf15ec739ce0e5971b26c93638730090add60e183530d70848ebdd34"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:913983ad2deb14e66d83c28b632fd35ba2b825031f2fa4ca29675e665dfecbe1"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5337ec7809bcd0f424c6b705ecf97941c46279cf5ed92311782c7c9c2026f07f"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4c26ed10c4f6fa6ddb329a5120ba3b6db349ca192ae211e882970bfc9d91420b"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c619b101e6de2222c1fcb0531e1b17bbffbe54294bfba43ea0d411d428618c27"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:073a36c8273647592ea332e816e75ef8da5c303236ec0167196793eb1e34657a"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3ce6b2b0231bda412463e152fc18335ba32faf4e8c23a754ad50ffa70e4091ee"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win32.whl", hash = "sha256:f4c9aee212bc89d4e13f58be11a56cc8036cabad119259d12ace14b34476fd07"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:8a3ec5aa8e38fc4c8af308917ce12c536f1c88452ce554027e55b22cbbfbff76"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:76c8094ac20ec259471ac53e774623eb62e6e1f56cd8690c67ce6ce4fcb05650"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5d5abf8f8ec1f4e22882273c423e16cae834c36856cac348cfbfa68e01c40f3a"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:aeb3531b196ef6f11776c21674dba836aeea9d5bd1cf630f869e3d90b16cfade"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7d755065e4e866a8086c9bdada157133ff466476a2ad7861828e17b6026e22c"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08471d4d86cbaec61f86b217dd938a83d85e03785f51121e791a6e6689a3be95"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7bbfcb7165ce3d54a3dfbe731e470f65739c4c1f85bb1018ee912bae139e263b"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d34eb8494bea691a1a450141ebb5385e4b69d38bb8403b5146ad279f4b30fa3"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9242795d174daa40105c1d86aba618e8eab7bf96ba8c3ee614da8302a9f95503"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a0f64a48bb81af7450e641e3fe0b0394d7381e342805479178b3d335d60ca7cf"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8e045731a5416357638d1700927529e2b8ab304811671f665b225f8bf8d8f933"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4322872d5772cae7369f8351da1edf255a604ea7087fe295411397d0cfd9655e"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e1631290ee9271dffe3062d2634c3ecac02c83890ada077d225e081aca8aab89"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:edcfc407e4eb17e037bca59be0e85a2031a2ac87e4fed26d3e9df88b4165f92d"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4d05d81ecb47d11e7f8932bd8b61b720bf0b41199358f3f5e36d38e28f0532c5"}, + {file = "kiwisolver-1.4.7-cp38-cp38-win32.whl", hash = "sha256:b38ac83d5f04b15e515fd86f312479d950d05ce2368d5413d46c088dda7de90a"}, + {file = "kiwisolver-1.4.7-cp38-cp38-win_amd64.whl", hash = "sha256:d83db7cde68459fc803052a55ace60bea2bae361fc3b7a6d5da07e11954e4b09"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3f9362ecfca44c863569d3d3c033dbe8ba452ff8eed6f6b5806382741a1334bd"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8df2eb9b2bac43ef8b082e06f750350fbbaf2887534a5be97f6cf07b19d9583"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f32d6edbc638cde7652bd690c3e728b25332acbadd7cad670cc4a02558d9c417"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e2e6c39bd7b9372b0be21456caab138e8e69cc0fc1190a9dfa92bd45a1e6e904"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dda56c24d869b1193fcc763f1284b9126550eaf84b88bbc7256e15028f19188a"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79849239c39b5e1fd906556c474d9b0439ea6792b637511f3fe3a41158d89ca8"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5e3bc157fed2a4c02ec468de4ecd12a6e22818d4f09cde2c31ee3226ffbefab2"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3da53da805b71e41053dc670f9a820d1157aae77b6b944e08024d17bcd51ef88"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8705f17dfeb43139a692298cb6637ee2e59c0194538153e83e9ee0c75c2eddde"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:82a5c2f4b87c26bb1a0ef3d16b5c4753434633b83d365cc0ddf2770c93829e3c"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ce8be0466f4c0d585cdb6c1e2ed07232221df101a4c6f28821d2aa754ca2d9e2"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:409afdfe1e2e90e6ee7fc896f3df9a7fec8e793e58bfa0d052c8a82f99c37abb"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5b9c3f4ee0b9a439d2415012bd1b1cc2df59e4d6a9939f4d669241d30b414327"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win32.whl", hash = "sha256:a79ae34384df2b615eefca647a2873842ac3b596418032bef9a7283675962644"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win_amd64.whl", hash = "sha256:cf0438b42121a66a3a667de17e779330fc0f20b0d97d59d2f2121e182b0505e4"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win_arm64.whl", hash = "sha256:764202cc7e70f767dab49e8df52c7455e8de0df5d858fa801a11aa0d882ccf3f"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:94252291e3fe68001b1dd747b4c0b3be12582839b95ad4d1b641924d68fd4643"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5b7dfa3b546da08a9f622bb6becdb14b3e24aaa30adba66749d38f3cc7ea9706"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd3de6481f4ed8b734da5df134cd5a6a64fe32124fe83dde1e5b5f29fe30b1e6"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a91b5f9f1205845d488c928e8570dcb62b893372f63b8b6e98b863ebd2368ff2"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40fa14dbd66b8b8f470d5fc79c089a66185619d31645f9b0773b88b19f7223c4"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:eb542fe7933aa09d8d8f9d9097ef37532a7df6497819d16efe4359890a2f417a"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bfa1acfa0c54932d5607e19a2c24646fb4c1ae2694437789129cf099789a3b00"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:eee3ea935c3d227d49b4eb85660ff631556841f6e567f0f7bda972df6c2c9935"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f3160309af4396e0ed04db259c3ccbfdc3621b5559b5453075e5de555e1f3a1b"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a17f6a29cf8935e587cc8a4dbfc8368c55edc645283db0ce9801016f83526c2d"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10849fb2c1ecbfae45a693c070e0320a91b35dd4bcf58172c023b994283a124d"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:ac542bf38a8a4be2dc6b15248d36315ccc65f0743f7b1a76688ffb6b5129a5c2"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8b01aac285f91ca889c800042c35ad3b239e704b150cfd3382adfc9dcc780e39"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:48be928f59a1f5c8207154f935334d374e79f2b5d212826307d072595ad76a2e"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f37cfe618a117e50d8c240555331160d73d0411422b59b5ee217843d7b693608"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:599b5c873c63a1f6ed7eead644a8a380cfbdf5db91dcb6f85707aaab213b1674"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:801fa7802e5cfabe3ab0c81a34c323a319b097dfb5004be950482d882f3d7225"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0c6c43471bc764fad4bc99c5c2d6d16a676b1abf844ca7c8702bdae92df01ee0"}, + {file = "kiwisolver-1.4.7.tar.gz", hash = "sha256:9893ff81bd7107f7b685d3017cc6583daadb4fc26e4a888350df530e41980a60"}, +] + +[[package]] +name = "llvmlite" +version = "0.43.0" +description = "lightweight wrapper around basic LLVM functionality" +optional = false +python-versions = ">=3.9" +files = [ + {file = "llvmlite-0.43.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a289af9a1687c6cf463478f0fa8e8aa3b6fb813317b0d70bf1ed0759eab6f761"}, + {file = "llvmlite-0.43.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6d4fd101f571a31acb1559ae1af30f30b1dc4b3186669f92ad780e17c81e91bc"}, + {file = "llvmlite-0.43.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d434ec7e2ce3cc8f452d1cd9a28591745de022f931d67be688a737320dfcead"}, + {file = "llvmlite-0.43.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6912a87782acdff6eb8bf01675ed01d60ca1f2551f8176a300a886f09e836a6a"}, + {file = "llvmlite-0.43.0-cp310-cp310-win_amd64.whl", hash = "sha256:14f0e4bf2fd2d9a75a3534111e8ebeb08eda2f33e9bdd6dfa13282afacdde0ed"}, + {file = "llvmlite-0.43.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e8d0618cb9bfe40ac38a9633f2493d4d4e9fcc2f438d39a4e854f39cc0f5f98"}, + {file = "llvmlite-0.43.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0a9a1a39d4bf3517f2af9d23d479b4175ead205c592ceeb8b89af48a327ea57"}, + {file = "llvmlite-0.43.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1da416ab53e4f7f3bc8d4eeba36d801cc1894b9fbfbf2022b29b6bad34a7df2"}, + {file = "llvmlite-0.43.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:977525a1e5f4059316b183fb4fd34fa858c9eade31f165427a3977c95e3ee749"}, + {file = "llvmlite-0.43.0-cp311-cp311-win_amd64.whl", hash = "sha256:d5bd550001d26450bd90777736c69d68c487d17bf371438f975229b2b8241a91"}, + {file = "llvmlite-0.43.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f99b600aa7f65235a5a05d0b9a9f31150c390f31261f2a0ba678e26823ec38f7"}, + {file = "llvmlite-0.43.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:35d80d61d0cda2d767f72de99450766250560399edc309da16937b93d3b676e7"}, + {file = "llvmlite-0.43.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eccce86bba940bae0d8d48ed925f21dbb813519169246e2ab292b5092aba121f"}, + {file = "llvmlite-0.43.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df6509e1507ca0760787a199d19439cc887bfd82226f5af746d6977bd9f66844"}, + {file = "llvmlite-0.43.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a2872ee80dcf6b5dbdc838763d26554c2a18aa833d31a2635bff16aafefb9c9"}, + {file = "llvmlite-0.43.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9cd2a7376f7b3367019b664c21f0c61766219faa3b03731113ead75107f3b66c"}, + {file = "llvmlite-0.43.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18e9953c748b105668487b7c81a3e97b046d8abf95c4ddc0cd3c94f4e4651ae8"}, + {file = "llvmlite-0.43.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74937acd22dc11b33946b67dca7680e6d103d6e90eeaaaf932603bec6fe7b03a"}, + {file = "llvmlite-0.43.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9efc739cc6ed760f795806f67889923f7274276f0eb45092a1473e40d9b867"}, + {file = "llvmlite-0.43.0-cp39-cp39-win_amd64.whl", hash = "sha256:47e147cdda9037f94b399bf03bfd8a6b6b1f2f90be94a454e3386f006455a9b4"}, + {file = "llvmlite-0.43.0.tar.gz", hash = "sha256:ae2b5b5c3ef67354824fb75517c8db5fbe93bc02cd9671f3c62271626bc041d5"}, +] + [[package]] name = "lxml" version = "5.3.0" @@ -2247,13 +2578,13 @@ source = ["Cython (>=3.0.11)"] [[package]] name = "markdown" -version = "3.6" +version = "3.7" description = "Python implementation of John Gruber's Markdown." optional = false python-versions = ">=3.8" files = [ - {file = "Markdown-3.6-py3-none-any.whl", hash = "sha256:48f276f4d8cfb8ce6527c8f79e2ee29708508bf4d40aa410fbc3b4ee832c850f"}, - {file = "Markdown-3.6.tar.gz", hash = "sha256:ed4f41f6daecbeeb96e576ce414c41d2d876daa9a16cb35fa8ed8c2ddfad0224"}, + {file = "Markdown-3.7-py3-none-any.whl", hash = "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803"}, + {file = "markdown-3.7.tar.gz", hash = "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2"}, ] [package.extras] @@ -2262,73 +2593,142 @@ testing = ["coverage", "pyyaml"] [[package]] name = "markupsafe" -version = "2.1.5" +version = "3.0.2" description = "Safely add untrusted strings to HTML/XML markup." optional = false -python-versions = ">=3.7" +python-versions = ">=3.9" files = [ - {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"}, - {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eaa0a10b7f72326f1372a713e73c3f739b524b3af41feb43e4921cb529f5929a"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:48032821bbdf20f5799ff537c7ac3d1fba0ba032cfc06194faffa8cda8b560ff"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a9d3f5f0901fdec14d8d2f66ef7d035f2157240a433441719ac9a3fba440b13"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88b49a3b9ff31e19998750c38e030fc7bb937398b1f78cfa599aaef92d693144"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cfad01eed2c2e0c01fd0ecd2ef42c492f7f93902e39a42fc9ee1692961443a29"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1225beacc926f536dc82e45f8a4d68502949dc67eea90eab715dea3a21c1b5f0"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3169b1eefae027567d1ce6ee7cae382c57fe26e82775f460f0b2778beaad66c0"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:eb7972a85c54febfb25b5c4b4f3af4dcc731994c7da0d8a0b4a6eb0640e1d178"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-win32.whl", hash = "sha256:8c4e8c3ce11e1f92f6536ff07154f9d49677ebaaafc32db9db4620bc11ed480f"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a"}, + {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"}, +] + +[[package]] +name = "matplotlib" +version = "3.7.3" +description = "Python plotting package" +optional = false +python-versions = ">=3.8" +files = [ + {file = "matplotlib-3.7.3-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:085c33b27561d9c04386789d5aa5eb4a932ddef43cfcdd0e01735f9a6e85ce0c"}, + {file = "matplotlib-3.7.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:c568e80e1c17f68a727f30f591926751b97b98314d8e59804f54f86ae6fa6a22"}, + {file = "matplotlib-3.7.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7baf98c5ad59c5c4743ea884bb025cbffa52dacdfdac0da3e6021a285a90377e"}, + {file = "matplotlib-3.7.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:236024f582e40dac39bca592258888b38ae47a9fed7b8de652d68d3d02d47d2b"}, + {file = "matplotlib-3.7.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12b4f6795efea037ce2d41e7c417ad8bd02d5719c6ad4a8450a0708f4a1cfb89"}, + {file = "matplotlib-3.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78b2136cc6c5415b78977e0e8c608647d597204b05b1d9089ccf513c7d913733"}, + {file = "matplotlib-3.7.3-cp310-cp310-win32.whl", hash = "sha256:122dcbf9be0086e2a95d9e5e0632dbf3bd5b65eaa68c369363310a6c87753059"}, + {file = "matplotlib-3.7.3-cp310-cp310-win_amd64.whl", hash = "sha256:4aab27d9e33293389e3c1d7c881d414a72bdfda0fedc3a6bf46c6fa88d9b8015"}, + {file = "matplotlib-3.7.3-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:d5adc743de91e8e0b13df60deb1b1c285b8effea3d66223afceb14b63c9b05de"}, + {file = "matplotlib-3.7.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:55de4cf7cd0071b8ebf203981b53ab64f988a0a1f897a2dff300a1124e8bcd8b"}, + {file = "matplotlib-3.7.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ac03377fd908aaee2312d0b11735753e907adb6f4d1d102de5e2425249693f6c"}, + {file = "matplotlib-3.7.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:755bafc10a46918ce9a39980009b54b02dd249594e5adf52f9c56acfddb5d0b7"}, + {file = "matplotlib-3.7.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1a6094c6f8e8d18db631754df4fe9a34dec3caf074f6869a7db09f18f9b1d6b2"}, + {file = "matplotlib-3.7.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:272dba2f1b107790ed78ebf5385b8d14b27ad9e90419de340364b49fe549a993"}, + {file = "matplotlib-3.7.3-cp311-cp311-win32.whl", hash = "sha256:591c123bed1cb4b9996fb60b41a6d89c2ec4943244540776c5f1283fb6960a53"}, + {file = "matplotlib-3.7.3-cp311-cp311-win_amd64.whl", hash = "sha256:3bf3a178c6504694cee8b88b353df0051583f2f6f8faa146f67115c27c856881"}, + {file = "matplotlib-3.7.3-cp312-cp312-macosx_10_12_universal2.whl", hash = "sha256:edf54cac8ee3603f3093616b40a931e8c063969756a4d78a86e82c2fea9659f7"}, + {file = "matplotlib-3.7.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:91e36a85ea639a1ba9f91427041eac064b04829945fe331a92617b6cb21d27e5"}, + {file = "matplotlib-3.7.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:caf5eaaf7c68f8d7df269dfbcaf46f48a70ff482bfcebdcc97519671023f2a7d"}, + {file = "matplotlib-3.7.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74bf57f505efea376097e948b7cdd87191a7ce8180616390aef496639edf601f"}, + {file = "matplotlib-3.7.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee152a88a0da527840a426535514b6ed8ac4240eb856b1da92cf48124320e346"}, + {file = "matplotlib-3.7.3-cp312-cp312-win_amd64.whl", hash = "sha256:67a410a9c9e07cbc83581eeea144bbe298870bf0ac0ee2f2e10a015ab7efee19"}, + {file = "matplotlib-3.7.3-cp38-cp38-macosx_10_12_universal2.whl", hash = "sha256:259999c05285cb993d7f2a419cea547863fa215379eda81f7254c9e932963729"}, + {file = "matplotlib-3.7.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:3f4e7fd5a6157e1d018ce2166ec8e531a481dd4a36f035b5c23edfe05a25419a"}, + {file = "matplotlib-3.7.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:faa3d12d8811d08d14080a8b7b9caea9a457dc495350166b56df0db4b9909ef5"}, + {file = "matplotlib-3.7.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:336e88900c11441e458da01c8414fc57e04e17f9d3bb94958a76faa2652bcf6b"}, + {file = "matplotlib-3.7.3-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:12f4c0dd8aa280d796c8772ea8265a14f11a04319baa3a16daa5556065e8baea"}, + {file = "matplotlib-3.7.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1990955b11e7918d256cf3b956b10997f405b7917a3f1c7d8e69c1d15c7b1930"}, + {file = "matplotlib-3.7.3-cp38-cp38-win32.whl", hash = "sha256:e78707b751260b42b721507ad7aa60fe4026d7f51c74cca6b9cd8b123ebb633a"}, + {file = "matplotlib-3.7.3-cp38-cp38-win_amd64.whl", hash = "sha256:e594ee43c59ea39ca5c6244667cac9d017a3527febc31f5532ad9135cf7469ec"}, + {file = "matplotlib-3.7.3-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:6eaa1cf0e94c936a26b78f6d756c5fbc12e0a58c8a68b7248a2a31456ce4e234"}, + {file = "matplotlib-3.7.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:0a97af9d22e8ebedc9f00b043d9bbd29a375e9e10b656982012dded44c10fd77"}, + {file = "matplotlib-3.7.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1f9c6c16597af660433ab330b59ee2934b832ee1fabcaf5cbde7b2add840f31e"}, + {file = "matplotlib-3.7.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7240259b4b9cbc62381f6378cff4d57af539162a18e832c1e48042fabc40b6b"}, + {file = "matplotlib-3.7.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:747c6191d2e88ae854809e69aa358dbf852ff1a5738401b85c1cc9012309897a"}, + {file = "matplotlib-3.7.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec726b08a5275d827aa91bb951e68234a4423adb91cf65bc0fcdc0f2777663f7"}, + {file = "matplotlib-3.7.3-cp39-cp39-win32.whl", hash = "sha256:40e3b9b450c6534f07278310c4e34caff41c2a42377e4b9d47b0f8d3ac1083a2"}, + {file = "matplotlib-3.7.3-cp39-cp39-win_amd64.whl", hash = "sha256:dfc118642903a23e309b1da32886bb39a4314147d013e820c86b5fb4cb2e36d0"}, + {file = "matplotlib-3.7.3-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:165c8082bf8fc0360c24aa4724a22eaadbfd8c28bf1ccf7e94d685cad48261e4"}, + {file = "matplotlib-3.7.3-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ebd8470cc2a3594746ff0513aecbfa2c55ff6f58e6cef2efb1a54eb87c88ffa2"}, + {file = "matplotlib-3.7.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7153453669c9672b52095119fd21dd032d19225d48413a2871519b17db4b0fde"}, + {file = "matplotlib-3.7.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:498a08267dc69dd8f24c4b5d7423fa584d7ce0027ba71f7881df05fc09b89bb7"}, + {file = "matplotlib-3.7.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:d48999c4b19b5a0c058c9cd828ff6fc7748390679f6cf9a2ad653a3e802c87d3"}, + {file = "matplotlib-3.7.3-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22d65d18b4ee8070a5fea5761d59293f1f9e2fac37ec9ce090463b0e629432fd"}, + {file = "matplotlib-3.7.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c40cde976c36693cc0767e27cf5f443f91c23520060bd9496678364adfafe9c"}, + {file = "matplotlib-3.7.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:39018a2b17592448fbfdf4b8352955e6c3905359939791d4ff429296494d1a0c"}, + {file = "matplotlib-3.7.3.tar.gz", hash = "sha256:f09b3dd6bdeb588de91f853bbb2d6f0ff8ab693485b0c49035eaa510cb4f142e"}, ] +[package.dependencies] +contourpy = ">=1.0.1" +cycler = ">=0.10" +fonttools = ">=4.22.0" +kiwisolver = ">=1.0.1" +numpy = ">=1.20,<2" +packaging = ">=20.0" +pillow = ">=6.2.0" +pyparsing = ">=2.3.1" +python-dateutil = ">=2.7" +setuptools_scm = ">=7" + [[package]] name = "matplotlib-inline" version = "0.1.7" @@ -2367,13 +2767,13 @@ files = [ [[package]] name = "mkdocs" -version = "1.6.0" +version = "1.6.1" description = "Project documentation with Markdown." optional = false python-versions = ">=3.8" files = [ - {file = "mkdocs-1.6.0-py3-none-any.whl", hash = "sha256:1eb5cb7676b7d89323e62b56235010216319217d4af5ddc543a91beb8d125ea7"}, - {file = "mkdocs-1.6.0.tar.gz", hash = "sha256:a73f735824ef83a4f3bcb7a231dcab23f5a838f88b7efc54a0eef5fbdbc3c512"}, + {file = "mkdocs-1.6.1-py3-none-any.whl", hash = "sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e"}, + {file = "mkdocs-1.6.1.tar.gz", hash = "sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2"}, ] [package.dependencies] @@ -2427,13 +2827,13 @@ mkdocs = ">=1.1" [[package]] name = "mkdocs-awesome-pages-plugin" -version = "2.9.2" +version = "2.9.3" description = "An MkDocs plugin that simplifies configuring page titles and their order" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8.1" files = [ - {file = "mkdocs_awesome_pages_plugin-2.9.2-py3-none-any.whl", hash = "sha256:9c795587695bd1ee85a8b7e43293005418df5a8b9ef296a3e628be427b693b4d"}, - {file = "mkdocs_awesome_pages_plugin-2.9.2.tar.gz", hash = "sha256:c3f7d366ecfe99b64524c49a84d8e13c576c19a918ea2e6f59bb486a259313af"}, + {file = "mkdocs_awesome_pages_plugin-2.9.3-py3-none-any.whl", hash = "sha256:1ba433d4e7edaf8661b15b93267f78f78e2e06ca590fc0e651ea36b191d64ae4"}, + {file = "mkdocs_awesome_pages_plugin-2.9.3.tar.gz", hash = "sha256:bdf6369871f41bb17f09c3cfb573367732dfcceb5673d7a2c5c76ac2567b242f"}, ] [package.dependencies] @@ -2488,13 +2888,13 @@ requests = "*" [[package]] name = "mkdocs-git-revision-date-localized-plugin" -version = "1.2.6" +version = "1.3.0" description = "Mkdocs plugin that enables displaying the localized date of the last git modification of a markdown file." optional = false python-versions = ">=3.8" files = [ - {file = "mkdocs_git_revision_date_localized_plugin-1.2.6-py3-none-any.whl", hash = "sha256:f015cb0f3894a39b33447b18e270ae391c4e25275cac5a626e80b243784e2692"}, - {file = "mkdocs_git_revision_date_localized_plugin-1.2.6.tar.gz", hash = "sha256:e432942ce4ee8aa9b9f4493e993dee9d2cc08b3ea2b40a3d6b03ca0f2a4bcaa2"}, + {file = "mkdocs_git_revision_date_localized_plugin-1.3.0-py3-none-any.whl", hash = "sha256:c99377ee119372d57a9e47cff4e68f04cce634a74831c06bc89b33e456e840a1"}, + {file = "mkdocs_git_revision_date_localized_plugin-1.3.0.tar.gz", hash = "sha256:439e2f14582204050a664c258861c325064d97cdc848c541e48bb034a6c4d0cb"}, ] [package.dependencies] @@ -2503,15 +2903,20 @@ GitPython = "*" mkdocs = ">=1.0" pytz = "*" +[package.extras] +all = ["GitPython", "babel (>=2.7.0)", "click", "codecov", "mkdocs (>=1.0)", "mkdocs-gen-files", "mkdocs-git-authors-plugin", "mkdocs-material", "mkdocs-static-i18n", "pytest", "pytest-cov", "pytz"] +base = ["GitPython", "babel (>=2.7.0)", "mkdocs (>=1.0)", "pytz"] +dev = ["click", "codecov", "mkdocs-gen-files", "mkdocs-git-authors-plugin", "mkdocs-material", "mkdocs-static-i18n", "pytest", "pytest-cov"] + [[package]] name = "mkdocs-material" -version = "9.5.27" +version = "9.5.42" description = "Documentation that simply works" optional = false python-versions = ">=3.8" files = [ - {file = "mkdocs_material-9.5.27-py3-none-any.whl", hash = "sha256:af8cc263fafa98bb79e9e15a8c966204abf15164987569bd1175fd66a7705182"}, - {file = "mkdocs_material-9.5.27.tar.gz", hash = "sha256:a7d4a35f6d4a62b0c43a0cfe7e987da0980c13587b5bc3c26e690ad494427ec0"}, + {file = "mkdocs_material-9.5.42-py3-none-any.whl", hash = "sha256:452a7c5d21284b373f36b981a2cbebfff59263feebeede1bc28652e9c5bbe316"}, + {file = "mkdocs_material-9.5.42.tar.gz", hash = "sha256:92779b5e9b5934540c574c11647131d217dc540dce72b05feeda088c8eb1b8f2"}, ] [package.dependencies] @@ -2559,13 +2964,13 @@ mkdocs = ">=1.2" [[package]] name = "mkdocstrings" -version = "0.26.1" +version = "0.26.2" description = "Automatic documentation from sources, for MkDocs." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "mkdocstrings-0.26.1-py3-none-any.whl", hash = "sha256:29738bfb72b4608e8e55cc50fb8a54f325dc7ebd2014e4e3881a49892d5983cf"}, - {file = "mkdocstrings-0.26.1.tar.gz", hash = "sha256:bb8b8854d6713d5348ad05b069a09f3b79edbc6a0f33a34c6821141adb03fe33"}, + {file = "mkdocstrings-0.26.2-py3-none-any.whl", hash = "sha256:1248f3228464f3b8d1a15bd91249ce1701fe3104ac517a5f167a0e01ca850ba5"}, + {file = "mkdocstrings-0.26.2.tar.gz", hash = "sha256:34a8b50f1e6cfd29546c6c09fbe02154adfb0b361bb758834bf56aa284ba876e"}, ] [package.dependencies] @@ -2585,13 +2990,13 @@ python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"] [[package]] name = "mkdocstrings-python" -version = "1.12.1" +version = "1.12.2" description = "A Python handler for mkdocstrings." optional = false python-versions = ">=3.9" files = [ - {file = "mkdocstrings_python-1.12.1-py3-none-any.whl", hash = "sha256:205244488199c9aa2a39787ad6a0c862d39b74078ea9aa2be817bc972399563f"}, - {file = "mkdocstrings_python-1.12.1.tar.gz", hash = "sha256:60d6a5ca912c9af4ad431db6d0111ce9f79c6c48d33377dde6a05a8f5f48d792"}, + {file = "mkdocstrings_python-1.12.2-py3-none-any.whl", hash = "sha256:7f7d40d6db3cb1f5d19dbcd80e3efe4d0ba32b073272c0c0de9de2e604eda62a"}, + {file = "mkdocstrings_python-1.12.2.tar.gz", hash = "sha256:7a1760941c0b52a2cd87b960a9e21112ffe52e7df9d0b9583d04d47ed2e186f3"}, ] [package.dependencies] @@ -2601,22 +3006,22 @@ mkdocstrings = ">=0.26" [[package]] name = "msal" -version = "1.29.0" +version = "1.31.0" description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." optional = false python-versions = ">=3.7" files = [ - {file = "msal-1.29.0-py3-none-any.whl", hash = "sha256:6b301e63f967481f0cc1a3a3bac0cf322b276855bc1b0955468d9deb3f33d511"}, - {file = "msal-1.29.0.tar.gz", hash = "sha256:8f6725f099752553f9b2fe84125e2a5ebe47b49f92eacca33ebedd3a9ebaae25"}, + {file = "msal-1.31.0-py3-none-any.whl", hash = "sha256:96bc37cff82ebe4b160d5fc0f1196f6ca8b50e274ecd0ec5bf69c438514086e7"}, + {file = "msal-1.31.0.tar.gz", hash = "sha256:2c4f189cf9cc8f00c80045f66d39b7c0f3ed45873fd3d1f2af9f22db2e12ff4b"}, ] [package.dependencies] -cryptography = ">=2.5,<45" +cryptography = ">=2.5,<46" PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} requests = ">=2.0.0,<3" [package.extras] -broker = ["pymsalruntime (>=0.13.2,<0.17)"] +broker = ["pymsalruntime (>=0.14,<0.18)", "pymsalruntime (>=0.17,<0.18)"] [[package]] name = "msal-extensions" @@ -2656,103 +3061,108 @@ async = ["aiodns", "aiohttp (>=3.0)"] [[package]] name = "multidict" -version = "6.0.5" +version = "6.1.0" description = "multidict implementation" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9"}, - {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604"}, - {file = "multidict-6.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:411bf8515f3be9813d06004cac41ccf7d1cd46dfe233705933dd163b60e37600"}, - {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d147090048129ce3c453f0292e7697d333db95e52616b3793922945804a433c"}, - {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:215ed703caf15f578dca76ee6f6b21b7603791ae090fbf1ef9d865571039ade5"}, - {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c6390cf87ff6234643428991b7359b5f59cc15155695deb4eda5c777d2b880f"}, - {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fd81c4ebdb4f214161be351eb5bcf385426bf023041da2fd9e60681f3cebae"}, - {file = "multidict-6.0.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3cc2ad10255f903656017363cd59436f2111443a76f996584d1077e43ee51182"}, - {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6939c95381e003f54cd4c5516740faba40cf5ad3eeff460c3ad1d3e0ea2549bf"}, - {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:220dd781e3f7af2c2c1053da9fa96d9cf3072ca58f057f4c5adaaa1cab8fc442"}, - {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:766c8f7511df26d9f11cd3a8be623e59cca73d44643abab3f8c8c07620524e4a"}, - {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:fe5d7785250541f7f5019ab9cba2c71169dc7d74d0f45253f8313f436458a4ef"}, - {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c1c1496e73051918fcd4f58ff2e0f2f3066d1c76a0c6aeffd9b45d53243702cc"}, - {file = "multidict-6.0.5-cp310-cp310-win32.whl", hash = "sha256:7afcdd1fc07befad18ec4523a782cde4e93e0a2bf71239894b8d61ee578c1319"}, - {file = "multidict-6.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:99f60d34c048c5c2fabc766108c103612344c46e35d4ed9ae0673d33c8fb26e8"}, - {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f285e862d2f153a70586579c15c44656f888806ed0e5b56b64489afe4a2dbfba"}, - {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:53689bb4e102200a4fafa9de9c7c3c212ab40a7ab2c8e474491914d2305f187e"}, - {file = "multidict-6.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:612d1156111ae11d14afaf3a0669ebf6c170dbb735e510a7438ffe2369a847fd"}, - {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7be7047bd08accdb7487737631d25735c9a04327911de89ff1b26b81745bd4e3"}, - {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de170c7b4fe6859beb8926e84f7d7d6c693dfe8e27372ce3b76f01c46e489fcf"}, - {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04bde7a7b3de05732a4eb39c94574db1ec99abb56162d6c520ad26f83267de29"}, - {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85f67aed7bb647f93e7520633d8f51d3cbc6ab96957c71272b286b2f30dc70ed"}, - {file = "multidict-6.0.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425bf820055005bfc8aa9a0b99ccb52cc2f4070153e34b701acc98d201693733"}, - {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d3eb1ceec286eba8220c26f3b0096cf189aea7057b6e7b7a2e60ed36b373b77f"}, - {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7901c05ead4b3fb75113fb1dd33eb1253c6d3ee37ce93305acd9d38e0b5f21a4"}, - {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e0e79d91e71b9867c73323a3444724d496c037e578a0e1755ae159ba14f4f3d1"}, - {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:29bfeb0dff5cb5fdab2023a7a9947b3b4af63e9c47cae2a10ad58394b517fddc"}, - {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e030047e85cbcedbfc073f71836d62dd5dadfbe7531cae27789ff66bc551bd5e"}, - {file = "multidict-6.0.5-cp311-cp311-win32.whl", hash = "sha256:2f4848aa3baa109e6ab81fe2006c77ed4d3cd1e0ac2c1fbddb7b1277c168788c"}, - {file = "multidict-6.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:2faa5ae9376faba05f630d7e5e6be05be22913782b927b19d12b8145968a85ea"}, - {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:51d035609b86722963404f711db441cf7134f1889107fb171a970c9701f92e1e"}, - {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cbebcd5bcaf1eaf302617c114aa67569dd3f090dd0ce8ba9e35e9985b41ac35b"}, - {file = "multidict-6.0.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2ffc42c922dbfddb4a4c3b438eb056828719f07608af27d163191cb3e3aa6cc5"}, - {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ceb3b7e6a0135e092de86110c5a74e46bda4bd4fbfeeb3a3bcec79c0f861e450"}, - {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79660376075cfd4b2c80f295528aa6beb2058fd289f4c9252f986751a4cd0496"}, - {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4428b29611e989719874670fd152b6625500ad6c686d464e99f5aaeeaca175a"}, - {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d84a5c3a5f7ce6db1f999fb9438f686bc2e09d38143f2d93d8406ed2dd6b9226"}, - {file = "multidict-6.0.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76c0de87358b192de7ea9649beb392f107dcad9ad27276324c24c91774ca5271"}, - {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:79a6d2ba910adb2cbafc95dad936f8b9386e77c84c35bc0add315b856d7c3abb"}, - {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:92d16a3e275e38293623ebf639c471d3e03bb20b8ebb845237e0d3664914caef"}, - {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:fb616be3538599e797a2017cccca78e354c767165e8858ab5116813146041a24"}, - {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:14c2976aa9038c2629efa2c148022ed5eb4cb939e15ec7aace7ca932f48f9ba6"}, - {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:435a0984199d81ca178b9ae2c26ec3d49692d20ee29bc4c11a2a8d4514c67eda"}, - {file = "multidict-6.0.5-cp312-cp312-win32.whl", hash = "sha256:9fe7b0653ba3d9d65cbe7698cca585bf0f8c83dbbcc710db9c90f478e175f2d5"}, - {file = "multidict-6.0.5-cp312-cp312-win_amd64.whl", hash = "sha256:01265f5e40f5a17f8241d52656ed27192be03bfa8764d88e8220141d1e4b3556"}, - {file = "multidict-6.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:19fe01cea168585ba0f678cad6f58133db2aa14eccaf22f88e4a6dccadfad8b3"}, - {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bf7a982604375a8d49b6cc1b781c1747f243d91b81035a9b43a2126c04766f5"}, - {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:107c0cdefe028703fb5dafe640a409cb146d44a6ae201e55b35a4af8e95457dd"}, - {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:403c0911cd5d5791605808b942c88a8155c2592e05332d2bf78f18697a5fa15e"}, - {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aeaf541ddbad8311a87dd695ed9642401131ea39ad7bc8cf3ef3967fd093b626"}, - {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4972624066095e52b569e02b5ca97dbd7a7ddd4294bf4e7247d52635630dd83"}, - {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d946b0a9eb8aaa590df1fe082cee553ceab173e6cb5b03239716338629c50c7a"}, - {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b55358304d7a73d7bdf5de62494aaf70bd33015831ffd98bc498b433dfe5b10c"}, - {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:a3145cb08d8625b2d3fee1b2d596a8766352979c9bffe5d7833e0503d0f0b5e5"}, - {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d65f25da8e248202bd47445cec78e0025c0fe7582b23ec69c3b27a640dd7a8e3"}, - {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c9bf56195c6bbd293340ea82eafd0071cb3d450c703d2c93afb89f93b8386ccc"}, - {file = "multidict-6.0.5-cp37-cp37m-win32.whl", hash = "sha256:69db76c09796b313331bb7048229e3bee7928eb62bab5e071e9f7fcc4879caee"}, - {file = "multidict-6.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:fce28b3c8a81b6b36dfac9feb1de115bab619b3c13905b419ec71d03a3fc1423"}, - {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:76f067f5121dcecf0d63a67f29080b26c43c71a98b10c701b0677e4a065fbd54"}, - {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b82cc8ace10ab5bd93235dfaab2021c70637005e1ac787031f4d1da63d493c1d"}, - {file = "multidict-6.0.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cb241881eefd96b46f89b1a056187ea8e9ba14ab88ba632e68d7a2ecb7aadf7"}, - {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8e94e6912639a02ce173341ff62cc1201232ab86b8a8fcc05572741a5dc7d93"}, - {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09a892e4a9fb47331da06948690ae38eaa2426de97b4ccbfafbdcbe5c8f37ff8"}, - {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55205d03e8a598cfc688c71ca8ea5f66447164efff8869517f175ea632c7cb7b"}, - {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37b15024f864916b4951adb95d3a80c9431299080341ab9544ed148091b53f50"}, - {file = "multidict-6.0.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2a1dee728b52b33eebff5072817176c172050d44d67befd681609b4746e1c2e"}, - {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:edd08e6f2f1a390bf137080507e44ccc086353c8e98c657e666c017718561b89"}, - {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:60d698e8179a42ec85172d12f50b1668254628425a6bd611aba022257cac1386"}, - {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3d25f19500588cbc47dc19081d78131c32637c25804df8414463ec908631e453"}, - {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4cc0ef8b962ac7a5e62b9e826bd0cd5040e7d401bc45a6835910ed699037a461"}, - {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:eca2e9d0cc5a889850e9bbd68e98314ada174ff6ccd1129500103df7a94a7a44"}, - {file = "multidict-6.0.5-cp38-cp38-win32.whl", hash = "sha256:4a6a4f196f08c58c59e0b8ef8ec441d12aee4125a7d4f4fef000ccb22f8d7241"}, - {file = "multidict-6.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:0275e35209c27a3f7951e1ce7aaf93ce0d163b28948444bec61dd7badc6d3f8c"}, - {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e7be68734bd8c9a513f2b0cfd508802d6609da068f40dc57d4e3494cefc92929"}, - {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1d9ea7a7e779d7a3561aade7d596649fbecfa5c08a7674b11b423783217933f9"}, - {file = "multidict-6.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ea1456df2a27c73ce51120fa2f519f1bea2f4a03a917f4a43c8707cf4cbbae1a"}, - {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf590b134eb70629e350691ecca88eac3e3b8b3c86992042fb82e3cb1830d5e1"}, - {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5c0631926c4f58e9a5ccce555ad7747d9a9f8b10619621f22f9635f069f6233e"}, - {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dce1c6912ab9ff5f179eaf6efe7365c1f425ed690b03341911bf4939ef2f3046"}, - {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0868d64af83169e4d4152ec612637a543f7a336e4a307b119e98042e852ad9c"}, - {file = "multidict-6.0.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:141b43360bfd3bdd75f15ed811850763555a251e38b2405967f8e25fb43f7d40"}, - {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7df704ca8cf4a073334e0427ae2345323613e4df18cc224f647f251e5e75a527"}, - {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6214c5a5571802c33f80e6c84713b2c79e024995b9c5897f794b43e714daeec9"}, - {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:cd6c8fca38178e12c00418de737aef1261576bd1b6e8c6134d3e729a4e858b38"}, - {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e02021f87a5b6932fa6ce916ca004c4d441509d33bbdbeca70d05dff5e9d2479"}, - {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ebd8d160f91a764652d3e51ce0d2956b38efe37c9231cd82cfc0bed2e40b581c"}, - {file = "multidict-6.0.5-cp39-cp39-win32.whl", hash = "sha256:04da1bb8c8dbadf2a18a452639771951c662c5ad03aefe4884775454be322c9b"}, - {file = "multidict-6.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:d6f6d4f185481c9669b9447bf9d9cf3b95a0e9df9d169bbc17e363b7d5487755"}, - {file = "multidict-6.0.5-py3-none-any.whl", hash = "sha256:0d63c74e3d7ab26de115c49bffc92cc77ed23395303d496eae515d4204a625e7"}, - {file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"}, + {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3380252550e372e8511d49481bd836264c009adb826b23fefcc5dd3c69692f60"}, + {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99f826cbf970077383d7de805c0681799491cb939c25450b9b5b3ced03ca99f1"}, + {file = "multidict-6.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a114d03b938376557927ab23f1e950827c3b893ccb94b62fd95d430fd0e5cf53"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1c416351ee6271b2f49b56ad7f308072f6f44b37118d69c2cad94f3fa8a40d5"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b5d83030255983181005e6cfbac1617ce9746b219bc2aad52201ad121226581"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3e97b5e938051226dc025ec80980c285b053ffb1e25a3db2a3aa3bc046bf7f56"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d618649d4e70ac6efcbba75be98b26ef5078faad23592f9b51ca492953012429"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10524ebd769727ac77ef2278390fb0068d83f3acb7773792a5080f2b0abf7748"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ff3827aef427c89a25cc96ded1759271a93603aba9fb977a6d264648ebf989db"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:06809f4f0f7ab7ea2cabf9caca7d79c22c0758b58a71f9d32943ae13c7ace056"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:f179dee3b863ab1c59580ff60f9d99f632f34ccb38bf67a33ec6b3ecadd0fd76"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:aaed8b0562be4a0876ee3b6946f6869b7bcdb571a5d1496683505944e268b160"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3c8b88a2ccf5493b6c8da9076fb151ba106960a2df90c2633f342f120751a9e7"}, + {file = "multidict-6.1.0-cp310-cp310-win32.whl", hash = "sha256:4a9cb68166a34117d6646c0023c7b759bf197bee5ad4272f420a0141d7eb03a0"}, + {file = "multidict-6.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:20b9b5fbe0b88d0bdef2012ef7dee867f874b72528cf1d08f1d59b0e3850129d"}, + {file = "multidict-6.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3efe2c2cb5763f2f1b275ad2bf7a287d3f7ebbef35648a9726e3b69284a4f3d6"}, + {file = "multidict-6.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7053d3b0353a8b9de430a4f4b4268ac9a4fb3481af37dfe49825bf45ca24156"}, + {file = "multidict-6.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:27e5fc84ccef8dfaabb09d82b7d179c7cf1a3fbc8a966f8274fcb4ab2eb4cadb"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e2b90b43e696f25c62656389d32236e049568b39320e2735d51f08fd362761b"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d83a047959d38a7ff552ff94be767b7fd79b831ad1cd9920662db05fec24fe72"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d1a9dd711d0877a1ece3d2e4fea11a8e75741ca21954c919406b44e7cf971304"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec2abea24d98246b94913b76a125e855eb5c434f7c46546046372fe60f666351"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4867cafcbc6585e4b678876c489b9273b13e9fff9f6d6d66add5e15d11d926cb"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5b48204e8d955c47c55b72779802b219a39acc3ee3d0116d5080c388970b76e3"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d8fff389528cad1618fb4b26b95550327495462cd745d879a8c7c2115248e399"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a7a9541cd308eed5e30318430a9c74d2132e9a8cb46b901326272d780bf2d423"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:da1758c76f50c39a2efd5e9859ce7d776317eb1dd34317c8152ac9251fc574a3"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c943a53e9186688b45b323602298ab727d8865d8c9ee0b17f8d62d14b56f0753"}, + {file = "multidict-6.1.0-cp311-cp311-win32.whl", hash = "sha256:90f8717cb649eea3504091e640a1b8568faad18bd4b9fcd692853a04475a4b80"}, + {file = "multidict-6.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:82176036e65644a6cc5bd619f65f6f19781e8ec2e5330f51aa9ada7504cc1926"}, + {file = "multidict-6.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b04772ed465fa3cc947db808fa306d79b43e896beb677a56fb2347ca1a49c1fa"}, + {file = "multidict-6.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6180c0ae073bddeb5a97a38c03f30c233e0a4d39cd86166251617d1bbd0af436"}, + {file = "multidict-6.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:071120490b47aa997cca00666923a83f02c7fbb44f71cf7f136df753f7fa8761"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50b3a2710631848991d0bf7de077502e8994c804bb805aeb2925a981de58ec2e"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b58c621844d55e71c1b7f7c498ce5aa6985d743a1a59034c57a905b3f153c1ef"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55b6d90641869892caa9ca42ff913f7ff1c5ece06474fbd32fb2cf6834726c95"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b820514bfc0b98a30e3d85462084779900347e4d49267f747ff54060cc33925"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10a9b09aba0c5b48c53761b7c720aaaf7cf236d5fe394cd399c7ba662d5f9966"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e16bf3e5fc9f44632affb159d30a437bfe286ce9e02754759be5536b169b305"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:76f364861c3bfc98cbbcbd402d83454ed9e01a5224bb3a28bf70002a230f73e2"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:820c661588bd01a0aa62a1283f20d2be4281b086f80dad9e955e690c75fb54a2"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:0e5f362e895bc5b9e67fe6e4ded2492d8124bdf817827f33c5b46c2fe3ffaca6"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3ec660d19bbc671e3a6443325f07263be452c453ac9e512f5eb935e7d4ac28b3"}, + {file = "multidict-6.1.0-cp312-cp312-win32.whl", hash = "sha256:58130ecf8f7b8112cdb841486404f1282b9c86ccb30d3519faf301b2e5659133"}, + {file = "multidict-6.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:188215fc0aafb8e03341995e7c4797860181562380f81ed0a87ff455b70bf1f1"}, + {file = "multidict-6.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d569388c381b24671589335a3be6e1d45546c2988c2ebe30fdcada8457a31008"}, + {file = "multidict-6.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:052e10d2d37810b99cc170b785945421141bf7bb7d2f8799d431e7db229c385f"}, + {file = "multidict-6.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f90c822a402cb865e396a504f9fc8173ef34212a342d92e362ca498cad308e28"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b225d95519a5bf73860323e633a664b0d85ad3d5bede6d30d95b35d4dfe8805b"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:23bfd518810af7de1116313ebd9092cb9aa629beb12f6ed631ad53356ed6b86c"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c09fcfdccdd0b57867577b719c69e347a436b86cd83747f179dbf0cc0d4c1f3"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf6bea52ec97e95560af5ae576bdac3aa3aae0b6758c6efa115236d9e07dae44"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57feec87371dbb3520da6192213c7d6fc892d5589a93db548331954de8248fd2"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0c3f390dc53279cbc8ba976e5f8035eab997829066756d811616b652b00a23a3"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:59bfeae4b25ec05b34f1956eaa1cb38032282cd4dfabc5056d0a1ec4d696d3aa"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b2f59caeaf7632cc633b5cf6fc449372b83bbdf0da4ae04d5be36118e46cc0aa"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:37bb93b2178e02b7b618893990941900fd25b6b9ac0fa49931a40aecdf083fe4"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4e9f48f58c2c523d5a06faea47866cd35b32655c46b443f163d08c6d0ddb17d6"}, + {file = "multidict-6.1.0-cp313-cp313-win32.whl", hash = "sha256:3a37ffb35399029b45c6cc33640a92bef403c9fd388acce75cdc88f58bd19a81"}, + {file = "multidict-6.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:e9aa71e15d9d9beaad2c6b9319edcdc0a49a43ef5c0a4c8265ca9ee7d6c67774"}, + {file = "multidict-6.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:db7457bac39421addd0c8449933ac32d8042aae84a14911a757ae6ca3eef1392"}, + {file = "multidict-6.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d094ddec350a2fb899fec68d8353c78233debde9b7d8b4beeafa70825f1c281a"}, + {file = "multidict-6.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5845c1fd4866bb5dd3125d89b90e57ed3138241540897de748cdf19de8a2fca2"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9079dfc6a70abe341f521f78405b8949f96db48da98aeb43f9907f342f627cdc"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3914f5aaa0f36d5d60e8ece6a308ee1c9784cd75ec8151062614657a114c4478"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c08be4f460903e5a9d0f76818db3250f12e9c344e79314d1d570fc69d7f4eae4"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d093be959277cb7dee84b801eb1af388b6ad3ca6a6b6bf1ed7585895789d027d"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3702ea6872c5a2a4eeefa6ffd36b042e9773f05b1f37ae3ef7264b1163c2dcf6"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:2090f6a85cafc5b2db085124d752757c9d251548cedabe9bd31afe6363e0aff2"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:f67f217af4b1ff66c68a87318012de788dd95fcfeb24cc889011f4e1c7454dfd"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:189f652a87e876098bbc67b4da1049afb5f5dfbaa310dd67c594b01c10388db6"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:6bb5992037f7a9eff7991ebe4273ea7f51f1c1c511e6a2ce511d0e7bdb754492"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f4c2b9e770c4e393876e35a7046879d195cd123b4f116d299d442b335bcd"}, + {file = "multidict-6.1.0-cp38-cp38-win32.whl", hash = "sha256:e27bbb6d14416713a8bd7aaa1313c0fc8d44ee48d74497a0ff4c3a1b6ccb5167"}, + {file = "multidict-6.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:22f3105d4fb15c8f57ff3959a58fcab6ce36814486500cd7485651230ad4d4ef"}, + {file = "multidict-6.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:4e18b656c5e844539d506a0a06432274d7bd52a7487e6828c63a63d69185626c"}, + {file = "multidict-6.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a185f876e69897a6f3325c3f19f26a297fa058c5e456bfcff8015e9a27e83ae1"}, + {file = "multidict-6.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab7c4ceb38d91570a650dba194e1ca87c2b543488fe9309b4212694174fd539c"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e617fb6b0b6953fffd762669610c1c4ffd05632c138d61ac7e14ad187870669c"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16e5f4bf4e603eb1fdd5d8180f1a25f30056f22e55ce51fb3d6ad4ab29f7d96f"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f4c035da3f544b1882bac24115f3e2e8760f10a0107614fc9839fd232200b875"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:957cf8e4b6e123a9eea554fa7ebc85674674b713551de587eb318a2df3e00255"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:483a6aea59cb89904e1ceabd2b47368b5600fb7de78a6e4a2c2987b2d256cf30"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:87701f25a2352e5bf7454caa64757642734da9f6b11384c1f9d1a8e699758057"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:682b987361e5fd7a139ed565e30d81fd81e9629acc7d925a205366877d8c8657"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ce2186a7df133a9c895dea3331ddc5ddad42cdd0d1ea2f0a51e5d161e4762f28"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:9f636b730f7e8cb19feb87094949ba54ee5357440b9658b2a32a5ce4bce53972"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:73eae06aa53af2ea5270cc066dcaf02cc60d2994bbb2c4ef5764949257d10f43"}, + {file = "multidict-6.1.0-cp39-cp39-win32.whl", hash = "sha256:1ca0083e80e791cffc6efce7660ad24af66c8d4079d2a750b29001b53ff59ada"}, + {file = "multidict-6.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:aa466da5b15ccea564bdab9c89175c762bc12825f4659c11227f515cee76fa4a"}, + {file = "multidict-6.1.0-py3-none-any.whl", hash = "sha256:48e171e52d1c4d33888e529b999e5900356b9ae588c2f09a52dcefb158b27506"}, + {file = "multidict-6.1.0.tar.gz", hash = "sha256:22ae2ebf9b0c69d206c003e2f6a914ea33f0a932d4aa16f236afc049d9958f4a"}, ] +[package.dependencies] +typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""} + [[package]] name = "mypy" version = "1.13.0" @@ -2854,6 +3264,40 @@ files = [ {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, ] +[[package]] +name = "numba" +version = "0.60.0" +description = "compiling Python code using LLVM" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numba-0.60.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d761de835cd38fb400d2c26bb103a2726f548dc30368853121d66201672e651"}, + {file = "numba-0.60.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:159e618ef213fba758837f9837fb402bbe65326e60ba0633dbe6c7f274d42c1b"}, + {file = "numba-0.60.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1527dc578b95c7c4ff248792ec33d097ba6bef9eda466c948b68dfc995c25781"}, + {file = "numba-0.60.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe0b28abb8d70f8160798f4de9d486143200f34458d34c4a214114e445d7124e"}, + {file = "numba-0.60.0-cp310-cp310-win_amd64.whl", hash = "sha256:19407ced081d7e2e4b8d8c36aa57b7452e0283871c296e12d798852bc7d7f198"}, + {file = "numba-0.60.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a17b70fc9e380ee29c42717e8cc0bfaa5556c416d94f9aa96ba13acb41bdece8"}, + {file = "numba-0.60.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3fb02b344a2a80efa6f677aa5c40cd5dd452e1b35f8d1c2af0dfd9ada9978e4b"}, + {file = "numba-0.60.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5f4fde652ea604ea3c86508a3fb31556a6157b2c76c8b51b1d45eb40c8598703"}, + {file = "numba-0.60.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4142d7ac0210cc86432b818338a2bc368dc773a2f5cf1e32ff7c5b378bd63ee8"}, + {file = "numba-0.60.0-cp311-cp311-win_amd64.whl", hash = "sha256:cac02c041e9b5bc8cf8f2034ff6f0dbafccd1ae9590dc146b3a02a45e53af4e2"}, + {file = "numba-0.60.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d7da4098db31182fc5ffe4bc42c6f24cd7d1cb8a14b59fd755bfee32e34b8404"}, + {file = "numba-0.60.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:38d6ea4c1f56417076ecf8fc327c831ae793282e0ff51080c5094cb726507b1c"}, + {file = "numba-0.60.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:62908d29fb6a3229c242e981ca27e32a6e606cc253fc9e8faeb0e48760de241e"}, + {file = "numba-0.60.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0ebaa91538e996f708f1ab30ef4d3ddc344b64b5227b67a57aa74f401bb68b9d"}, + {file = "numba-0.60.0-cp312-cp312-win_amd64.whl", hash = "sha256:f75262e8fe7fa96db1dca93d53a194a38c46da28b112b8a4aca168f0df860347"}, + {file = "numba-0.60.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:01ef4cd7d83abe087d644eaa3d95831b777aa21d441a23703d649e06b8e06b74"}, + {file = "numba-0.60.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:819a3dfd4630d95fd574036f99e47212a1af41cbcb019bf8afac63ff56834449"}, + {file = "numba-0.60.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b983bd6ad82fe868493012487f34eae8bf7dd94654951404114f23c3466d34b"}, + {file = "numba-0.60.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c151748cd269ddeab66334bd754817ffc0cabd9433acb0f551697e5151917d25"}, + {file = "numba-0.60.0-cp39-cp39-win_amd64.whl", hash = "sha256:3031547a015710140e8c87226b4cfe927cac199835e5bf7d4fe5cb64e814e3ab"}, + {file = "numba-0.60.0.tar.gz", hash = "sha256:5df6158e5584eece5fc83294b949fd30b9f1125df7708862205217e068aabf16"}, +] + +[package.dependencies] +llvmlite = "==0.43.*" +numpy = ">=1.22,<2.1" + [[package]] name = "numpy" version = "1.26.4" @@ -2932,57 +3376,69 @@ PyYAML = ">=5.1.0" [[package]] name = "orjson" -version = "3.10.5" +version = "3.10.10" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" optional = false python-versions = ">=3.8" files = [ - {file = "orjson-3.10.5-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:545d493c1f560d5ccfc134803ceb8955a14c3fcb47bbb4b2fee0232646d0b932"}, - {file = "orjson-3.10.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4324929c2dd917598212bfd554757feca3e5e0fa60da08be11b4aa8b90013c1"}, - {file = "orjson-3.10.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8c13ca5e2ddded0ce6a927ea5a9f27cae77eee4c75547b4297252cb20c4d30e6"}, - {file = "orjson-3.10.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b6c8e30adfa52c025f042a87f450a6b9ea29649d828e0fec4858ed5e6caecf63"}, - {file = "orjson-3.10.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:338fd4f071b242f26e9ca802f443edc588fa4ab60bfa81f38beaedf42eda226c"}, - {file = "orjson-3.10.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6970ed7a3126cfed873c5d21ece1cd5d6f83ca6c9afb71bbae21a0b034588d96"}, - {file = "orjson-3.10.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:235dadefb793ad12f7fa11e98a480db1f7c6469ff9e3da5e73c7809c700d746b"}, - {file = "orjson-3.10.5-cp310-none-win32.whl", hash = "sha256:be79e2393679eda6a590638abda16d167754393f5d0850dcbca2d0c3735cebe2"}, - {file = "orjson-3.10.5-cp310-none-win_amd64.whl", hash = "sha256:c4a65310ccb5c9910c47b078ba78e2787cb3878cdded1702ac3d0da71ddc5228"}, - {file = "orjson-3.10.5-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:cdf7365063e80899ae3a697def1277c17a7df7ccfc979990a403dfe77bb54d40"}, - {file = "orjson-3.10.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b68742c469745d0e6ca5724506858f75e2f1e5b59a4315861f9e2b1df77775a"}, - {file = "orjson-3.10.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7d10cc1b594951522e35a3463da19e899abe6ca95f3c84c69e9e901e0bd93d38"}, - {file = "orjson-3.10.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dcbe82b35d1ac43b0d84072408330fd3295c2896973112d495e7234f7e3da2e1"}, - {file = "orjson-3.10.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10c0eb7e0c75e1e486c7563fe231b40fdd658a035ae125c6ba651ca3b07936f5"}, - {file = "orjson-3.10.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:53ed1c879b10de56f35daf06dbc4a0d9a5db98f6ee853c2dbd3ee9d13e6f302f"}, - {file = "orjson-3.10.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:099e81a5975237fda3100f918839af95f42f981447ba8f47adb7b6a3cdb078fa"}, - {file = "orjson-3.10.5-cp311-none-win32.whl", hash = "sha256:1146bf85ea37ac421594107195db8bc77104f74bc83e8ee21a2e58596bfb2f04"}, - {file = "orjson-3.10.5-cp311-none-win_amd64.whl", hash = "sha256:36a10f43c5f3a55c2f680efe07aa93ef4a342d2960dd2b1b7ea2dd764fe4a37c"}, - {file = "orjson-3.10.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:68f85ecae7af14a585a563ac741b0547a3f291de81cd1e20903e79f25170458f"}, - {file = "orjson-3.10.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28afa96f496474ce60d3340fe8d9a263aa93ea01201cd2bad844c45cd21f5268"}, - {file = "orjson-3.10.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9cd684927af3e11b6e754df80b9ffafd9fb6adcaa9d3e8fdd5891be5a5cad51e"}, - {file = "orjson-3.10.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d21b9983da032505f7050795e98b5d9eee0df903258951566ecc358f6696969"}, - {file = "orjson-3.10.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ad1de7fef79736dde8c3554e75361ec351158a906d747bd901a52a5c9c8d24b"}, - {file = "orjson-3.10.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2d97531cdfe9bdd76d492e69800afd97e5930cb0da6a825646667b2c6c6c0211"}, - {file = "orjson-3.10.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d69858c32f09c3e1ce44b617b3ebba1aba030e777000ebdf72b0d8e365d0b2b3"}, - {file = "orjson-3.10.5-cp312-none-win32.whl", hash = "sha256:64c9cc089f127e5875901ac05e5c25aa13cfa5dbbbd9602bda51e5c611d6e3e2"}, - {file = "orjson-3.10.5-cp312-none-win_amd64.whl", hash = "sha256:b2efbd67feff8c1f7728937c0d7f6ca8c25ec81373dc8db4ef394c1d93d13dc5"}, - {file = "orjson-3.10.5-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:03b565c3b93f5d6e001db48b747d31ea3819b89abf041ee10ac6988886d18e01"}, - {file = "orjson-3.10.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:584c902ec19ab7928fd5add1783c909094cc53f31ac7acfada817b0847975f26"}, - {file = "orjson-3.10.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a35455cc0b0b3a1eaf67224035f5388591ec72b9b6136d66b49a553ce9eb1e6"}, - {file = "orjson-3.10.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1670fe88b116c2745a3a30b0f099b699a02bb3482c2591514baf5433819e4f4d"}, - {file = "orjson-3.10.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:185c394ef45b18b9a7d8e8f333606e2e8194a50c6e3c664215aae8cf42c5385e"}, - {file = "orjson-3.10.5-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:ca0b3a94ac8d3886c9581b9f9de3ce858263865fdaa383fbc31c310b9eac07c9"}, - {file = "orjson-3.10.5-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:dfc91d4720d48e2a709e9c368d5125b4b5899dced34b5400c3837dadc7d6271b"}, - {file = "orjson-3.10.5-cp38-none-win32.whl", hash = "sha256:c05f16701ab2a4ca146d0bca950af254cb7c02f3c01fca8efbbad82d23b3d9d4"}, - {file = "orjson-3.10.5-cp38-none-win_amd64.whl", hash = "sha256:8a11d459338f96a9aa7f232ba95679fc0c7cedbd1b990d736467894210205c09"}, - {file = "orjson-3.10.5-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:85c89131d7b3218db1b24c4abecea92fd6c7f9fab87441cfc342d3acc725d807"}, - {file = "orjson-3.10.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb66215277a230c456f9038d5e2d84778141643207f85336ef8d2a9da26bd7ca"}, - {file = "orjson-3.10.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:51bbcdea96cdefa4a9b4461e690c75ad4e33796530d182bdd5c38980202c134a"}, - {file = "orjson-3.10.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbead71dbe65f959b7bd8cf91e0e11d5338033eba34c114f69078d59827ee139"}, - {file = "orjson-3.10.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5df58d206e78c40da118a8c14fc189207fffdcb1f21b3b4c9c0c18e839b5a214"}, - {file = "orjson-3.10.5-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c4057c3b511bb8aef605616bd3f1f002a697c7e4da6adf095ca5b84c0fd43595"}, - {file = "orjson-3.10.5-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b39e006b00c57125ab974362e740c14a0c6a66ff695bff44615dcf4a70ce2b86"}, - {file = "orjson-3.10.5-cp39-none-win32.whl", hash = "sha256:eded5138cc565a9d618e111c6d5c2547bbdd951114eb822f7f6309e04db0fb47"}, - {file = "orjson-3.10.5-cp39-none-win_amd64.whl", hash = "sha256:cc28e90a7cae7fcba2493953cff61da5a52950e78dc2dacfe931a317ee3d8de7"}, - {file = "orjson-3.10.5.tar.gz", hash = "sha256:7a5baef8a4284405d96c90c7c62b755e9ef1ada84c2406c24a9ebec86b89f46d"}, + {file = "orjson-3.10.10-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:b788a579b113acf1c57e0a68e558be71d5d09aa67f62ca1f68e01117e550a998"}, + {file = "orjson-3.10.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:804b18e2b88022c8905bb79bd2cbe59c0cd014b9328f43da8d3b28441995cda4"}, + {file = "orjson-3.10.10-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9972572a1d042ec9ee421b6da69f7cc823da5962237563fa548ab17f152f0b9b"}, + {file = "orjson-3.10.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc6993ab1c2ae7dd0711161e303f1db69062955ac2668181bfdf2dd410e65258"}, + {file = "orjson-3.10.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d78e4cacced5781b01d9bc0f0cd8b70b906a0e109825cb41c1b03f9c41e4ce86"}, + {file = "orjson-3.10.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6eb2598df518281ba0cbc30d24c5b06124ccf7e19169e883c14e0831217a0bc"}, + {file = "orjson-3.10.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:23776265c5215ec532de6238a52707048401a568f0fa0d938008e92a147fe2c7"}, + {file = "orjson-3.10.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8cc2a654c08755cef90b468ff17c102e2def0edd62898b2486767204a7f5cc9c"}, + {file = "orjson-3.10.10-cp310-none-win32.whl", hash = "sha256:081b3fc6a86d72efeb67c13d0ea7c030017bd95f9868b1e329a376edc456153b"}, + {file = "orjson-3.10.10-cp310-none-win_amd64.whl", hash = "sha256:ff38c5fb749347768a603be1fb8a31856458af839f31f064c5aa74aca5be9efe"}, + {file = "orjson-3.10.10-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:879e99486c0fbb256266c7c6a67ff84f46035e4f8749ac6317cc83dacd7f993a"}, + {file = "orjson-3.10.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:019481fa9ea5ff13b5d5d95e6fd5ab25ded0810c80b150c2c7b1cc8660b662a7"}, + {file = "orjson-3.10.10-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0dd57eff09894938b4c86d4b871a479260f9e156fa7f12f8cad4b39ea8028bb5"}, + {file = "orjson-3.10.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dbde6d70cd95ab4d11ea8ac5e738e30764e510fc54d777336eec09bb93b8576c"}, + {file = "orjson-3.10.10-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b2625cb37b8fb42e2147404e5ff7ef08712099197a9cd38895006d7053e69d6"}, + {file = "orjson-3.10.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbf3c20c6a7db69df58672a0d5815647ecf78c8e62a4d9bd284e8621c1fe5ccb"}, + {file = "orjson-3.10.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:75c38f5647e02d423807d252ce4528bf6a95bd776af999cb1fb48867ed01d1f6"}, + {file = "orjson-3.10.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:23458d31fa50ec18e0ec4b0b4343730928296b11111df5f547c75913714116b2"}, + {file = "orjson-3.10.10-cp311-none-win32.whl", hash = "sha256:2787cd9dedc591c989f3facd7e3e86508eafdc9536a26ec277699c0aa63c685b"}, + {file = "orjson-3.10.10-cp311-none-win_amd64.whl", hash = "sha256:6514449d2c202a75183f807bc755167713297c69f1db57a89a1ef4a0170ee269"}, + {file = "orjson-3.10.10-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8564f48f3620861f5ef1e080ce7cd122ee89d7d6dacf25fcae675ff63b4d6e05"}, + {file = "orjson-3.10.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5bf161a32b479034098c5b81f2608f09167ad2fa1c06abd4e527ea6bf4837a9"}, + {file = "orjson-3.10.10-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:68b65c93617bcafa7f04b74ae8bc2cc214bd5cb45168a953256ff83015c6747d"}, + {file = "orjson-3.10.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e8e28406f97fc2ea0c6150f4c1b6e8261453318930b334abc419214c82314f85"}, + {file = "orjson-3.10.10-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4d0d9fe174cc7a5bdce2e6c378bcdb4c49b2bf522a8f996aa586020e1b96cee"}, + {file = "orjson-3.10.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3be81c42f1242cbed03cbb3973501fcaa2675a0af638f8be494eaf37143d999"}, + {file = "orjson-3.10.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:65f9886d3bae65be026219c0a5f32dbbe91a9e6272f56d092ab22561ad0ea33b"}, + {file = "orjson-3.10.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:730ed5350147db7beb23ddaf072f490329e90a1d059711d364b49fe352ec987b"}, + {file = "orjson-3.10.10-cp312-none-win32.whl", hash = "sha256:a8f4bf5f1c85bea2170800020d53a8877812892697f9c2de73d576c9307a8a5f"}, + {file = "orjson-3.10.10-cp312-none-win_amd64.whl", hash = "sha256:384cd13579a1b4cd689d218e329f459eb9ddc504fa48c5a83ef4889db7fd7a4f"}, + {file = "orjson-3.10.10-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:44bffae68c291f94ff5a9b4149fe9d1bdd4cd0ff0fb575bcea8351d48db629a1"}, + {file = "orjson-3.10.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e27b4c6437315df3024f0835887127dac2a0a3ff643500ec27088d2588fa5ae1"}, + {file = "orjson-3.10.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bca84df16d6b49325a4084fd8b2fe2229cb415e15c46c529f868c3387bb1339d"}, + {file = "orjson-3.10.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c14ce70e8f39bd71f9f80423801b5d10bf93d1dceffdecd04df0f64d2c69bc01"}, + {file = "orjson-3.10.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:24ac62336da9bda1bd93c0491eff0613003b48d3cb5d01470842e7b52a40d5b4"}, + {file = "orjson-3.10.10-cp313-none-win32.whl", hash = "sha256:eb0a42831372ec2b05acc9ee45af77bcaccbd91257345f93780a8e654efc75db"}, + {file = "orjson-3.10.10-cp313-none-win_amd64.whl", hash = "sha256:f0c4f37f8bf3f1075c6cc8dd8a9f843689a4b618628f8812d0a71e6968b95ffd"}, + {file = "orjson-3.10.10-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:829700cc18503efc0cf502d630f612884258020d98a317679cd2054af0259568"}, + {file = "orjson-3.10.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0ceb5e0e8c4f010ac787d29ae6299846935044686509e2f0f06ed441c1ca949"}, + {file = "orjson-3.10.10-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0c25908eb86968613216f3db4d3003f1c45d78eb9046b71056ca327ff92bdbd4"}, + {file = "orjson-3.10.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:218cb0bc03340144b6328a9ff78f0932e642199ac184dd74b01ad691f42f93ff"}, + {file = "orjson-3.10.10-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e2277ec2cea3775640dc81ab5195bb5b2ada2fe0ea6eee4677474edc75ea6785"}, + {file = "orjson-3.10.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:848ea3b55ab5ccc9d7bbd420d69432628b691fba3ca8ae3148c35156cbd282aa"}, + {file = "orjson-3.10.10-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:e3e67b537ac0c835b25b5f7d40d83816abd2d3f4c0b0866ee981a045287a54f3"}, + {file = "orjson-3.10.10-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:7948cfb909353fce2135dcdbe4521a5e7e1159484e0bb024c1722f272488f2b8"}, + {file = "orjson-3.10.10-cp38-none-win32.whl", hash = "sha256:78bee66a988f1a333dc0b6257503d63553b1957889c17b2c4ed72385cd1b96ae"}, + {file = "orjson-3.10.10-cp38-none-win_amd64.whl", hash = "sha256:f1d647ca8d62afeb774340a343c7fc023efacfd3a39f70c798991063f0c681dd"}, + {file = "orjson-3.10.10-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:5a059afddbaa6dd733b5a2d76a90dbc8af790b993b1b5cb97a1176ca713b5df8"}, + {file = "orjson-3.10.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f9b5c59f7e2a1a410f971c5ebc68f1995822837cd10905ee255f96074537ee6"}, + {file = "orjson-3.10.10-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d5ef198bafdef4aa9d49a4165ba53ffdc0a9e1c7b6f76178572ab33118afea25"}, + {file = "orjson-3.10.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aaf29ce0bb5d3320824ec3d1508652421000ba466abd63bdd52c64bcce9eb1fa"}, + {file = "orjson-3.10.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dddd5516bcc93e723d029c1633ae79c4417477b4f57dad9bfeeb6bc0315e654a"}, + {file = "orjson-3.10.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12f2003695b10817f0fa8b8fca982ed7f5761dcb0d93cff4f2f9f6709903fd7"}, + {file = "orjson-3.10.10-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:672f9874a8a8fb9bb1b771331d31ba27f57702c8106cdbadad8bda5d10bc1019"}, + {file = "orjson-3.10.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1dcbb0ca5fafb2b378b2c74419480ab2486326974826bbf6588f4dc62137570a"}, + {file = "orjson-3.10.10-cp39-none-win32.whl", hash = "sha256:d9bbd3a4b92256875cb058c3381b782649b9a3c68a4aa9a2fff020c2f9cfc1be"}, + {file = "orjson-3.10.10-cp39-none-win_amd64.whl", hash = "sha256:766f21487a53aee8524b97ca9582d5c6541b03ab6210fbaf10142ae2f3ced2aa"}, + {file = "orjson-3.10.10.tar.gz", hash = "sha256:37949383c4df7b4337ce82ee35b6d7471e55195efa7dcb45ab8226ceadb0fe3b"}, ] [[package]] @@ -2998,50 +3454,68 @@ files = [ [[package]] name = "paginate" -version = "0.5.6" +version = "0.5.7" description = "Divides large result sets into pages for easier browsing" optional = false python-versions = "*" files = [ - {file = "paginate-0.5.6.tar.gz", hash = "sha256:5e6007b6a9398177a7e1648d04fdd9f8c9766a1a945bceac82f1929e8c78af2d"}, + {file = "paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591"}, + {file = "paginate-0.5.7.tar.gz", hash = "sha256:22bd083ab41e1a8b4f3690544afb2c60c25e5c9a63a30fa2f483f6c60c8e5945"}, ] +[package.extras] +dev = ["pytest", "tox"] +lint = ["black"] + [[package]] name = "pandas" -version = "2.2.2" +version = "2.2.3" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" files = [ - {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, - {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, - {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"}, - {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"}, - {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"}, - {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"}, - {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"}, - {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"}, - {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"}, - {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"}, - {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"}, - {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"}, - {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"}, - {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"}, - {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"}, - {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"}, - {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"}, - {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"}, - {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"}, - {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"}, - {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"}, - {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"}, - {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"}, - {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"}, - {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"}, - {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"}, - {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"}, - {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"}, - {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"}, + {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, + {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"}, + {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"}, + {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, + {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"}, + {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"}, + {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"}, + {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, ] [package.dependencies] @@ -3080,13 +3554,13 @@ xml = ["lxml (>=4.9.2)"] [[package]] name = "pandas-gbq" -version = "0.23.1" +version = "0.24.0" description = "Google BigQuery connector for pandas" optional = false python-versions = ">=3.8" files = [ - {file = "pandas-gbq-0.23.1.tar.gz", hash = "sha256:6a4942b574643af1372360c807616a98b3f9ed9ca328794daa66d4f28af879be"}, - {file = "pandas_gbq-0.23.1-py2.py3-none-any.whl", hash = "sha256:74165384b6718c75a08cf434fb78f076200aa25d32705c4634eaeedbc4286b6a"}, + {file = "pandas_gbq-0.24.0-py2.py3-none-any.whl", hash = "sha256:e3e39f4fd8651ced033515e5e63a6c19bc757b26155fac01c4367e1aa74ee122"}, + {file = "pandas_gbq-0.24.0.tar.gz", hash = "sha256:264415277059821497e74ed0b2240b538cba9646ee627fb4668cac5cf274e4c3"}, ] [package.dependencies] @@ -3094,16 +3568,17 @@ db-dtypes = ">=1.0.4,<2.0.0" google-api-core = ">=2.10.2,<3.0.0dev" google-auth = ">=2.13.0" google-auth-oauthlib = ">=0.7.0" -google-cloud-bigquery = ">=3.3.5,<4.0.0dev" -numpy = ">=1.16.6" -packaging = ">=20.0.0" +google-cloud-bigquery = ">=3.4.2,<4.0.0dev" +numpy = ">=1.18.1" +packaging = ">=22.0.0" pandas = ">=1.1.4" -pyarrow = ">=3.0.0" +pyarrow = ">=4.0.0" pydata-google-auth = ">=1.5.0" setuptools = "*" [package.extras] bqstorage = ["google-cloud-bigquery-storage (>=2.16.2,<3.0.0dev)"] +geopandas = ["Shapely (>=1.8.4)", "geopandas (>=0.9.0)"] tqdm = ["tqdm (>=4.23.0)"] [[package]] @@ -3176,84 +3651,90 @@ ptyprocess = ">=0.5" [[package]] name = "pillow" -version = "10.3.0" +version = "11.0.0" description = "Python Imaging Library (Fork)" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "pillow-10.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:90b9e29824800e90c84e4022dd5cc16eb2d9605ee13f05d47641eb183cd73d45"}, - {file = "pillow-10.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a2c405445c79c3f5a124573a051062300936b0281fee57637e706453e452746c"}, - {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78618cdbccaa74d3f88d0ad6cb8ac3007f1a6fa5c6f19af64b55ca170bfa1edf"}, - {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261ddb7ca91fcf71757979534fb4c128448b5b4c55cb6152d280312062f69599"}, - {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ce49c67f4ea0609933d01c0731b34b8695a7a748d6c8d186f95e7d085d2fe475"}, - {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b14f16f94cbc61215115b9b1236f9c18403c15dd3c52cf629072afa9d54c1cbf"}, - {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d33891be6df59d93df4d846640f0e46f1a807339f09e79a8040bc887bdcd7ed3"}, - {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b50811d664d392f02f7761621303eba9d1b056fb1868c8cdf4231279645c25f5"}, - {file = "pillow-10.3.0-cp310-cp310-win32.whl", hash = "sha256:ca2870d5d10d8726a27396d3ca4cf7976cec0f3cb706debe88e3a5bd4610f7d2"}, - {file = "pillow-10.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f"}, - {file = "pillow-10.3.0-cp310-cp310-win_arm64.whl", hash = "sha256:ccce24b7ad89adb5a1e34a6ba96ac2530046763912806ad4c247356a8f33a67b"}, - {file = "pillow-10.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:5f77cf66e96ae734717d341c145c5949c63180842a545c47a0ce7ae52ca83795"}, - {file = "pillow-10.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4b878386c4bf293578b48fc570b84ecfe477d3b77ba39a6e87150af77f40c57"}, - {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdcbb4068117dfd9ce0138d068ac512843c52295ed996ae6dd1faf537b6dbc27"}, - {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9797a6c8fe16f25749b371c02e2ade0efb51155e767a971c61734b1bf6293994"}, - {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:9e91179a242bbc99be65e139e30690e081fe6cb91a8e77faf4c409653de39451"}, - {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b87bd9d81d179bd8ab871603bd80d8645729939f90b71e62914e816a76fc6bd"}, - {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:81d09caa7b27ef4e61cb7d8fbf1714f5aec1c6b6c5270ee53504981e6e9121ad"}, - {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:048ad577748b9fa4a99a0548c64f2cb8d672d5bf2e643a739ac8faff1164238c"}, - {file = "pillow-10.3.0-cp311-cp311-win32.whl", hash = "sha256:7161ec49ef0800947dc5570f86568a7bb36fa97dd09e9827dc02b718c5643f09"}, - {file = "pillow-10.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:8eb0908e954d093b02a543dc963984d6e99ad2b5e36503d8a0aaf040505f747d"}, - {file = "pillow-10.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:4e6f7d1c414191c1199f8996d3f2282b9ebea0945693fb67392c75a3a320941f"}, - {file = "pillow-10.3.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:e46f38133e5a060d46bd630faa4d9fa0202377495df1f068a8299fd78c84de84"}, - {file = "pillow-10.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:50b8eae8f7334ec826d6eeffaeeb00e36b5e24aa0b9df322c247539714c6df19"}, - {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d3bea1c75f8c53ee4d505c3e67d8c158ad4df0d83170605b50b64025917f338"}, - {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19aeb96d43902f0a783946a0a87dbdad5c84c936025b8419da0a0cd7724356b1"}, - {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:74d28c17412d9caa1066f7a31df8403ec23d5268ba46cd0ad2c50fb82ae40462"}, - {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ff61bfd9253c3915e6d41c651d5f962da23eda633cf02262990094a18a55371a"}, - {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d886f5d353333b4771d21267c7ecc75b710f1a73d72d03ca06df49b09015a9ef"}, - {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b5ec25d8b17217d635f8935dbc1b9aa5907962fae29dff220f2659487891cd3"}, - {file = "pillow-10.3.0-cp312-cp312-win32.whl", hash = "sha256:51243f1ed5161b9945011a7360e997729776f6e5d7005ba0c6879267d4c5139d"}, - {file = "pillow-10.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:412444afb8c4c7a6cc11a47dade32982439925537e483be7c0ae0cf96c4f6a0b"}, - {file = "pillow-10.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:798232c92e7665fe82ac085f9d8e8ca98826f8e27859d9a96b41d519ecd2e49a"}, - {file = "pillow-10.3.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:4eaa22f0d22b1a7e93ff0a596d57fdede2e550aecffb5a1ef1106aaece48e96b"}, - {file = "pillow-10.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cd5e14fbf22a87321b24c88669aad3a51ec052eb145315b3da3b7e3cc105b9a2"}, - {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1530e8f3a4b965eb6a7785cf17a426c779333eb62c9a7d1bbcf3ffd5bf77a4aa"}, - {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d512aafa1d32efa014fa041d38868fda85028e3f930a96f85d49c7d8ddc0383"}, - {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:339894035d0ede518b16073bdc2feef4c991ee991a29774b33e515f1d308e08d"}, - {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:aa7e402ce11f0885305bfb6afb3434b3cd8f53b563ac065452d9d5654c7b86fd"}, - {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0ea2a783a2bdf2a561808fe4a7a12e9aa3799b701ba305de596bc48b8bdfce9d"}, - {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c78e1b00a87ce43bb37642c0812315b411e856a905d58d597750eb79802aaaa3"}, - {file = "pillow-10.3.0-cp38-cp38-win32.whl", hash = "sha256:72d622d262e463dfb7595202d229f5f3ab4b852289a1cd09650362db23b9eb0b"}, - {file = "pillow-10.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:2034f6759a722da3a3dbd91a81148cf884e91d1b747992ca288ab88c1de15999"}, - {file = "pillow-10.3.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2ed854e716a89b1afcedea551cd85f2eb2a807613752ab997b9974aaa0d56936"}, - {file = "pillow-10.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dc1a390a82755a8c26c9964d457d4c9cbec5405896cba94cf51f36ea0d855002"}, - {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4203efca580f0dd6f882ca211f923168548f7ba334c189e9eab1178ab840bf60"}, - {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3102045a10945173d38336f6e71a8dc71bcaeed55c3123ad4af82c52807b9375"}, - {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:6fb1b30043271ec92dc65f6d9f0b7a830c210b8a96423074b15c7bc999975f57"}, - {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:1dfc94946bc60ea375cc39cff0b8da6c7e5f8fcdc1d946beb8da5c216156ddd8"}, - {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b09b86b27a064c9624d0a6c54da01c1beaf5b6cadfa609cf63789b1d08a797b9"}, - {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d3b2348a78bc939b4fed6552abfd2e7988e0f81443ef3911a4b8498ca084f6eb"}, - {file = "pillow-10.3.0-cp39-cp39-win32.whl", hash = "sha256:45ebc7b45406febf07fef35d856f0293a92e7417ae7933207e90bf9090b70572"}, - {file = "pillow-10.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:0ba26351b137ca4e0db0342d5d00d2e355eb29372c05afd544ebf47c0956ffeb"}, - {file = "pillow-10.3.0-cp39-cp39-win_arm64.whl", hash = "sha256:50fd3f6b26e3441ae07b7c979309638b72abc1a25da31a81a7fbd9495713ef4f"}, - {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:6b02471b72526ab8a18c39cb7967b72d194ec53c1fd0a70b050565a0f366d355"}, - {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8ab74c06ffdab957d7670c2a5a6e1a70181cd10b727cd788c4dd9005b6a8acd9"}, - {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:048eeade4c33fdf7e08da40ef402e748df113fd0b4584e32c4af74fe78baaeb2"}, - {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2ec1e921fd07c7cda7962bad283acc2f2a9ccc1b971ee4b216b75fad6f0463"}, - {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c8e73e99da7db1b4cad7f8d682cf6abad7844da39834c288fbfa394a47bbced"}, - {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:16563993329b79513f59142a6b02055e10514c1a8e86dca8b48a893e33cf91e3"}, - {file = "pillow-10.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170"}, - {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:aff76a55a8aa8364d25400a210a65ff59d0168e0b4285ba6bf2bd83cf675ba32"}, - {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:b7bc2176354defba3edc2b9a777744462da2f8e921fbaf61e52acb95bafa9828"}, - {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:793b4e24db2e8742ca6423d3fde8396db336698c55cd34b660663ee9e45ed37f"}, - {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015"}, - {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c83341b89884e2b2e55886e8fbbf37c3fa5efd6c8907124aeb72f285ae5696e5"}, - {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1a1d1915db1a4fdb2754b9de292642a39a7fb28f1736699527bb649484fb966a"}, - {file = "pillow-10.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a0eaa93d054751ee9964afa21c06247779b90440ca41d184aeb5d410f20ff591"}, - {file = "pillow-10.3.0.tar.gz", hash = "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d"}, + {file = "pillow-11.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:6619654954dc4936fcff82db8eb6401d3159ec6be81e33c6000dfd76ae189947"}, + {file = "pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b3c5ac4bed7519088103d9450a1107f76308ecf91d6dabc8a33a2fcfb18d0fba"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a65149d8ada1055029fcb665452b2814fe7d7082fcb0c5bed6db851cb69b2086"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88a58d8ac0cc0e7f3a014509f0455248a76629ca9b604eca7dc5927cc593c5e9"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c26845094b1af3c91852745ae78e3ea47abf3dbcd1cf962f16b9a5fbe3ee8488"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:1a61b54f87ab5786b8479f81c4b11f4d61702830354520837f8cc791ebba0f5f"}, + {file = "pillow-11.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:674629ff60030d144b7bca2b8330225a9b11c482ed408813924619c6f302fdbb"}, + {file = "pillow-11.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:598b4e238f13276e0008299bd2482003f48158e2b11826862b1eb2ad7c768b97"}, + {file = "pillow-11.0.0-cp310-cp310-win32.whl", hash = "sha256:9a0f748eaa434a41fccf8e1ee7a3eed68af1b690e75328fd7a60af123c193b50"}, + {file = "pillow-11.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:a5629742881bcbc1f42e840af185fd4d83a5edeb96475a575f4da50d6ede337c"}, + {file = "pillow-11.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:ee217c198f2e41f184f3869f3e485557296d505b5195c513b2bfe0062dc537f1"}, + {file = "pillow-11.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1c1d72714f429a521d8d2d018badc42414c3077eb187a59579f28e4270b4b0fc"}, + {file = "pillow-11.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:499c3a1b0d6fc8213519e193796eb1a86a1be4b1877d678b30f83fd979811d1a"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8b2351c85d855293a299038e1f89db92a2f35e8d2f783489c6f0b2b5f3fe8a3"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f4dba50cfa56f910241eb7f883c20f1e7b1d8f7d91c750cd0b318bad443f4d5"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5ddbfd761ee00c12ee1be86c9c0683ecf5bb14c9772ddbd782085779a63dd55b"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:45c566eb10b8967d71bf1ab8e4a525e5a93519e29ea071459ce517f6b903d7fa"}, + {file = "pillow-11.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b4fd7bd29610a83a8c9b564d457cf5bd92b4e11e79a4ee4716a63c959699b306"}, + {file = "pillow-11.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cb929ca942d0ec4fac404cbf520ee6cac37bf35be479b970c4ffadf2b6a1cad9"}, + {file = "pillow-11.0.0-cp311-cp311-win32.whl", hash = "sha256:006bcdd307cc47ba43e924099a038cbf9591062e6c50e570819743f5607404f5"}, + {file = "pillow-11.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:52a2d8323a465f84faaba5236567d212c3668f2ab53e1c74c15583cf507a0291"}, + {file = "pillow-11.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:16095692a253047fe3ec028e951fa4221a1f3ed3d80c397e83541a3037ff67c9"}, + {file = "pillow-11.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d2c0a187a92a1cb5ef2c8ed5412dd8d4334272617f532d4ad4de31e0495bd923"}, + {file = "pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:084a07ef0821cfe4858fe86652fffac8e187b6ae677e9906e192aafcc1b69903"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8069c5179902dcdce0be9bfc8235347fdbac249d23bd90514b7a47a72d9fecf4"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f02541ef64077f22bf4924f225c0fd1248c168f86e4b7abdedd87d6ebaceab0f"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fcb4621042ac4b7865c179bb972ed0da0218a076dc1820ffc48b1d74c1e37fe9"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:00177a63030d612148e659b55ba99527803288cea7c75fb05766ab7981a8c1b7"}, + {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8853a3bf12afddfdf15f57c4b02d7ded92c7a75a5d7331d19f4f9572a89c17e6"}, + {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3107c66e43bda25359d5ef446f59c497de2b5ed4c7fdba0894f8d6cf3822dafc"}, + {file = "pillow-11.0.0-cp312-cp312-win32.whl", hash = "sha256:86510e3f5eca0ab87429dd77fafc04693195eec7fd6a137c389c3eeb4cfb77c6"}, + {file = "pillow-11.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:8ec4a89295cd6cd4d1058a5e6aec6bf51e0eaaf9714774e1bfac7cfc9051db47"}, + {file = "pillow-11.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:27a7860107500d813fcd203b4ea19b04babe79448268403172782754870dac25"}, + {file = "pillow-11.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bcd1fb5bb7b07f64c15618c89efcc2cfa3e95f0e3bcdbaf4642509de1942a699"}, + {file = "pillow-11.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0e038b0745997c7dcaae350d35859c9715c71e92ffb7e0f4a8e8a16732150f38"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ae08bd8ffc41aebf578c2af2f9d8749d91f448b3bfd41d7d9ff573d74f2a6b2"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d69bfd8ec3219ae71bcde1f942b728903cad25fafe3100ba2258b973bd2bc1b2"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:61b887f9ddba63ddf62fd02a3ba7add935d053b6dd7d58998c630e6dbade8527"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:c6a660307ca9d4867caa8d9ca2c2658ab685de83792d1876274991adec7b93fa"}, + {file = "pillow-11.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:73e3a0200cdda995c7e43dd47436c1548f87a30bb27fb871f352a22ab8dcf45f"}, + {file = "pillow-11.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fba162b8872d30fea8c52b258a542c5dfd7b235fb5cb352240c8d63b414013eb"}, + {file = "pillow-11.0.0-cp313-cp313-win32.whl", hash = "sha256:f1b82c27e89fffc6da125d5eb0ca6e68017faf5efc078128cfaa42cf5cb38798"}, + {file = "pillow-11.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:8ba470552b48e5835f1d23ecb936bb7f71d206f9dfeee64245f30c3270b994de"}, + {file = "pillow-11.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:846e193e103b41e984ac921b335df59195356ce3f71dcfd155aa79c603873b84"}, + {file = "pillow-11.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4ad70c4214f67d7466bea6a08061eba35c01b1b89eaa098040a35272a8efb22b"}, + {file = "pillow-11.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6ec0d5af64f2e3d64a165f490d96368bb5dea8b8f9ad04487f9ab60dc4bb6003"}, + {file = "pillow-11.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c809a70e43c7977c4a42aefd62f0131823ebf7dd73556fa5d5950f5b354087e2"}, + {file = "pillow-11.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:4b60c9520f7207aaf2e1d94de026682fc227806c6e1f55bba7606d1c94dd623a"}, + {file = "pillow-11.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:1e2688958a840c822279fda0086fec1fdab2f95bf2b717b66871c4ad9859d7e8"}, + {file = "pillow-11.0.0-cp313-cp313t-win32.whl", hash = "sha256:607bbe123c74e272e381a8d1957083a9463401f7bd01287f50521ecb05a313f8"}, + {file = "pillow-11.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5c39ed17edea3bc69c743a8dd3e9853b7509625c2462532e62baa0732163a904"}, + {file = "pillow-11.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:75acbbeb05b86bc53cbe7b7e6fe00fbcf82ad7c684b3ad82e3d711da9ba287d3"}, + {file = "pillow-11.0.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2e46773dc9f35a1dd28bd6981332fd7f27bec001a918a72a79b4133cf5291dba"}, + {file = "pillow-11.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2679d2258b7f1192b378e2893a8a0a0ca472234d4c2c0e6bdd3380e8dfa21b6a"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eda2616eb2313cbb3eebbe51f19362eb434b18e3bb599466a1ffa76a033fb916"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20ec184af98a121fb2da42642dea8a29ec80fc3efbaefb86d8fdd2606619045d"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:8594f42df584e5b4bb9281799698403f7af489fba84c34d53d1c4bfb71b7c4e7"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:c12b5ae868897c7338519c03049a806af85b9b8c237b7d675b8c5e089e4a618e"}, + {file = "pillow-11.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:70fbbdacd1d271b77b7721fe3cdd2d537bbbd75d29e6300c672ec6bb38d9672f"}, + {file = "pillow-11.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5178952973e588b3f1360868847334e9e3bf49d19e169bbbdfaf8398002419ae"}, + {file = "pillow-11.0.0-cp39-cp39-win32.whl", hash = "sha256:8c676b587da5673d3c75bd67dd2a8cdfeb282ca38a30f37950511766b26858c4"}, + {file = "pillow-11.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:94f3e1780abb45062287b4614a5bc0874519c86a777d4a7ad34978e86428b8dd"}, + {file = "pillow-11.0.0-cp39-cp39-win_arm64.whl", hash = "sha256:290f2cc809f9da7d6d622550bbf4c1e57518212da51b6a30fe8e0a270a5b78bd"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1187739620f2b365de756ce086fdb3604573337cc28a0d3ac4a01ab6b2d2a6d2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fbbcb7b57dc9c794843e3d1258c0fbf0f48656d46ffe9e09b63bbd6e8cd5d0a2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d203af30149ae339ad1b4f710d9844ed8796e97fda23ffbc4cc472968a47d0b"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21a0d3b115009ebb8ac3d2ebec5c2982cc693da935f4ab7bb5c8ebe2f47d36f2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:73853108f56df97baf2bb8b522f3578221e56f646ba345a372c78326710d3830"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e58876c91f97b0952eb766123bfef372792ab3f4e3e1f1a2267834c2ab131734"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:224aaa38177597bb179f3ec87eeefcce8e4f85e608025e9cfac60de237ba6316"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:5bd2d3bdb846d757055910f0a59792d33b555800813c3b39ada1829c372ccb06"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:375b8dd15a1f5d2feafff536d47e22f69625c1aa92f12b339ec0b2ca40263273"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:daffdf51ee5db69a82dd127eabecce20729e21f7a3680cf7cbb23f0829189790"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7326a1787e3c7b0429659e0a944725e1b03eeaa10edd945a86dead1913383944"}, + {file = "pillow-11.0.0.tar.gz", hash = "sha256:72bacbaf24ac003fea9bff9837d1eedb6088758d41e100c1552930151f677739"}, ] [package.extras] -docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"] +docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] fpx = ["olefile"] mic = ["olefile"] tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] @@ -3262,29 +3743,29 @@ xmp = ["defusedxml"] [[package]] name = "platformdirs" -version = "4.2.2" +version = "4.3.6" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" files = [ - {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"}, - {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"}, + {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, + {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, ] [package.extras] -docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] -type = ["mypy (>=1.8)"] +docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"] +type = ["mypy (>=1.11.2)"] [[package]] name = "plotly" -version = "5.22.0" +version = "5.24.1" description = "An open-source, interactive data visualization library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "plotly-5.22.0-py3-none-any.whl", hash = "sha256:68fc1901f098daeb233cc3dd44ec9dc31fb3ca4f4e53189344199c43496ed006"}, - {file = "plotly-5.22.0.tar.gz", hash = "sha256:859fdadbd86b5770ae2466e542b761b247d1c6b49daed765b95bb8c7063e7469"}, + {file = "plotly-5.24.1-py3-none-any.whl", hash = "sha256:f67073a1e637eb0dc3e46324d9d51e2fe76e9727c892dde64ddf1e1b51f29089"}, + {file = "plotly-5.24.1.tar.gz", hash = "sha256:dbc8ac8339d248a4bcc36e08a5659bacfe1b079390b8953533f4eb22169b4bae"}, ] [package.dependencies] @@ -3308,13 +3789,13 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "portalocker" -version = "2.10.0" +version = "2.10.1" description = "Wraps the portalocker recipe for easy usage" optional = false python-versions = ">=3.8" files = [ - {file = "portalocker-2.10.0-py3-none-any.whl", hash = "sha256:48944147b2cd42520549bc1bb8fe44e220296e56f7c3d551bc6ecce69d9b0de1"}, - {file = "portalocker-2.10.0.tar.gz", hash = "sha256:49de8bc0a2f68ca98bf9e219c81a3e6b27097c7bf505a87c5a112ce1aaeb9b81"}, + {file = "portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf"}, + {file = "portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f"}, ] [package.dependencies] @@ -3327,13 +3808,13 @@ tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "p [[package]] name = "pre-commit" -version = "4.0.0" +version = "4.0.1" description = "A framework for managing and maintaining multi-language pre-commit hooks." optional = false python-versions = ">=3.9" files = [ - {file = "pre_commit-4.0.0-py2.py3-none-any.whl", hash = "sha256:0ca2341cf94ac1865350970951e54b1a50521e57b7b500403307aed4315a1234"}, - {file = "pre_commit-4.0.0.tar.gz", hash = "sha256:5d9807162cc5537940f94f266cbe2d716a75cfad0d78a317a92cac16287cfed6"}, + {file = "pre_commit-4.0.1-py2.py3-none-any.whl", hash = "sha256:efde913840816312445dc98787724647c65473daefe420785f885e8ed9a06878"}, + {file = "pre_commit-4.0.1.tar.gz", hash = "sha256:80905ac375958c0444c65e9cebebd948b3cdb518f335a091a670a89d652139d2"}, ] [package.dependencies] @@ -3356,27 +3837,134 @@ files = [ [[package]] name = "prompt-toolkit" -version = "3.0.47" +version = "3.0.48" description = "Library for building powerful interactive command lines in Python" optional = false python-versions = ">=3.7.0" files = [ - {file = "prompt_toolkit-3.0.47-py3-none-any.whl", hash = "sha256:0d7bfa67001d5e39d02c224b663abc33687405033a8c422d0d675a5a13361d10"}, - {file = "prompt_toolkit-3.0.47.tar.gz", hash = "sha256:1e1b29cb58080b1e69f207c893a1a7bf16d127a5c30c9d17a25a5d77792e5360"}, + {file = "prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e"}, + {file = "prompt_toolkit-3.0.48.tar.gz", hash = "sha256:d6623ab0477a80df74e646bdbc93621143f5caf104206aa29294d53de1a03d90"}, ] [package.dependencies] wcwidth = "*" +[[package]] +name = "propcache" +version = "0.2.0" +description = "Accelerated property cache" +optional = false +python-versions = ">=3.8" +files = [ + {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"}, + {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:952e0d9d07609d9c5be361f33b0d6d650cd2bae393aabb11d9b719364521984b"}, + {file = "propcache-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:33ac8f098df0585c0b53009f039dfd913b38c1d2edafed0cedcc0c32a05aa110"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97e48e8875e6c13909c800fa344cd54cc4b2b0db1d5f911f840458a500fde2c2"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388f3217649d6d59292b722d940d4d2e1e6a7003259eb835724092a1cca0203a"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f571aea50ba5623c308aa146eb650eebf7dbe0fd8c5d946e28343cb3b5aad577"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3dfafb44f7bb35c0c06eda6b2ab4bfd58f02729e7c4045e179f9a861b07c9850"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3ebe9a75be7ab0b7da2464a77bb27febcb4fab46a34f9288f39d74833db7f61"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d2f0d0f976985f85dfb5f3d685697ef769faa6b71993b46b295cdbbd6be8cc37"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:a3dc1a4b165283bd865e8f8cb5f0c64c05001e0718ed06250d8cac9bec115b48"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9e0f07b42d2a50c7dd2d8675d50f7343d998c64008f1da5fef888396b7f84630"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e63e3e1e0271f374ed489ff5ee73d4b6e7c60710e1f76af5f0e1a6117cd26394"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:56bb5c98f058a41bb58eead194b4db8c05b088c93d94d5161728515bd52b052b"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7665f04d0c7f26ff8bb534e1c65068409bf4687aa2534faf7104d7182debb336"}, + {file = "propcache-0.2.0-cp310-cp310-win32.whl", hash = "sha256:7cf18abf9764746b9c8704774d8b06714bcb0a63641518a3a89c7f85cc02c2ad"}, + {file = "propcache-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:cfac69017ef97db2438efb854edf24f5a29fd09a536ff3a992b75990720cdc99"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:63f13bf09cc3336eb04a837490b8f332e0db41da66995c9fd1ba04552e516354"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:608cce1da6f2672a56b24a015b42db4ac612ee709f3d29f27a00c943d9e851de"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:466c219deee4536fbc83c08d09115249db301550625c7fef1c5563a584c9bc87"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc2db02409338bf36590aa985a461b2c96fce91f8e7e0f14c50c5fcc4f229016"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a6ed8db0a556343d566a5c124ee483ae113acc9a557a807d439bcecc44e7dfbb"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91997d9cb4a325b60d4e3f20967f8eb08dfcb32b22554d5ef78e6fd1dda743a2"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c7dde9e533c0a49d802b4f3f218fa9ad0a1ce21f2c2eb80d5216565202acab4"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffcad6c564fe6b9b8916c1aefbb37a362deebf9394bd2974e9d84232e3e08504"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:97a58a28bcf63284e8b4d7b460cbee1edaab24634e82059c7b8c09e65284f178"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:945db8ee295d3af9dbdbb698cce9bbc5c59b5c3fe328bbc4387f59a8a35f998d"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:39e104da444a34830751715f45ef9fc537475ba21b7f1f5b0f4d71a3b60d7fe2"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c5ecca8f9bab618340c8e848d340baf68bcd8ad90a8ecd7a4524a81c1764b3db"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:c436130cc779806bdf5d5fae0d848713105472b8566b75ff70048c47d3961c5b"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:191db28dc6dcd29d1a3e063c3be0b40688ed76434622c53a284e5427565bbd9b"}, + {file = "propcache-0.2.0-cp311-cp311-win32.whl", hash = "sha256:5f2564ec89058ee7c7989a7b719115bdfe2a2fb8e7a4543b8d1c0cc4cf6478c1"}, + {file = "propcache-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:6e2e54267980349b723cff366d1e29b138b9a60fa376664a157a342689553f71"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2ee7606193fb267be4b2e3b32714f2d58cad27217638db98a60f9efb5efeccc2"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:91ee8fc02ca52e24bcb77b234f22afc03288e1dafbb1f88fe24db308910c4ac7"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2e900bad2a8456d00a113cad8c13343f3b1f327534e3589acc2219729237a2e8"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f52a68c21363c45297aca15561812d542f8fc683c85201df0bebe209e349f793"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e41d67757ff4fbc8ef2af99b338bfb955010444b92929e9e55a6d4dcc3c4f09"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a64e32f8bd94c105cc27f42d3b658902b5bcc947ece3c8fe7bc1b05982f60e89"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55346705687dbd7ef0d77883ab4f6fabc48232f587925bdaf95219bae072491e"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00181262b17e517df2cd85656fcd6b4e70946fe62cd625b9d74ac9977b64d8d9"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6994984550eaf25dd7fc7bd1b700ff45c894149341725bb4edc67f0ffa94efa4"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:56295eb1e5f3aecd516d91b00cfd8bf3a13991de5a479df9e27dd569ea23959c"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:439e76255daa0f8151d3cb325f6dd4a3e93043e6403e6491813bcaaaa8733887"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f6475a1b2ecb310c98c28d271a30df74f9dd436ee46d09236a6b750a7599ce57"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3444cdba6628accf384e349014084b1cacd866fbb88433cd9d279d90a54e0b23"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4a9d9b4d0a9b38d1c391bb4ad24aa65f306c6f01b512e10a8a34a2dc5675d348"}, + {file = "propcache-0.2.0-cp312-cp312-win32.whl", hash = "sha256:69d3a98eebae99a420d4b28756c8ce6ea5a29291baf2dc9ff9414b42676f61d5"}, + {file = "propcache-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:ad9c9b99b05f163109466638bd30ada1722abb01bbb85c739c50b6dc11f92dc3"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ecddc221a077a8132cf7c747d5352a15ed763b674c0448d811f408bf803d9ad7"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0e53cb83fdd61cbd67202735e6a6687a7b491c8742dfc39c9e01e80354956763"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92fe151145a990c22cbccf9ae15cae8ae9eddabfc949a219c9f667877e40853d"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6a21ef516d36909931a2967621eecb256018aeb11fc48656e3257e73e2e247a"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f88a4095e913f98988f5b338c1d4d5d07dbb0b6bad19892fd447484e483ba6b"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a5b3bb545ead161be780ee85a2b54fdf7092815995661947812dde94a40f6fb"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67aeb72e0f482709991aa91345a831d0b707d16b0257e8ef88a2ad246a7280bf"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c997f8c44ec9b9b0bcbf2d422cc00a1d9b9c681f56efa6ca149a941e5560da2"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a66df3d4992bc1d725b9aa803e8c5a66c010c65c741ad901e260ece77f58d2f"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:3ebbcf2a07621f29638799828b8d8668c421bfb94c6cb04269130d8de4fb7136"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1235c01ddaa80da8235741e80815ce381c5267f96cc49b1477fdcf8c047ef325"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3947483a381259c06921612550867b37d22e1df6d6d7e8361264b6d037595f44"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d5bed7f9805cc29c780f3aee05de3262ee7ce1f47083cfe9f77471e9d6777e83"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4a91d44379f45f5e540971d41e4626dacd7f01004826a18cb048e7da7e96544"}, + {file = "propcache-0.2.0-cp313-cp313-win32.whl", hash = "sha256:f902804113e032e2cdf8c71015651c97af6418363bea8d78dc0911d56c335032"}, + {file = "propcache-0.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:8f188cfcc64fb1266f4684206c9de0e80f54622c3f22a910cbd200478aeae61e"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:53d1bd3f979ed529f0805dd35ddaca330f80a9a6d90bc0121d2ff398f8ed8861"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:83928404adf8fb3d26793665633ea79b7361efa0287dfbd372a7e74311d51ee6"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77a86c261679ea5f3896ec060be9dc8e365788248cc1e049632a1be682442063"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:218db2a3c297a3768c11a34812e63b3ac1c3234c3a086def9c0fee50d35add1f"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7735e82e3498c27bcb2d17cb65d62c14f1100b71723b68362872bca7d0913d90"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:20a617c776f520c3875cf4511e0d1db847a076d720714ae35ffe0df3e440be68"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67b69535c870670c9f9b14a75d28baa32221d06f6b6fa6f77a0a13c5a7b0a5b9"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4569158070180c3855e9c0791c56be3ceeb192defa2cdf6a3f39e54319e56b89"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:db47514ffdbd91ccdc7e6f8407aac4ee94cc871b15b577c1c324236b013ddd04"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:2a60ad3e2553a74168d275a0ef35e8c0a965448ffbc3b300ab3a5bb9956c2162"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:662dd62358bdeaca0aee5761de8727cfd6861432e3bb828dc2a693aa0471a563"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:25a1f88b471b3bc911d18b935ecb7115dff3a192b6fef46f0bfaf71ff4f12418"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:f60f0ac7005b9f5a6091009b09a419ace1610e163fa5deaba5ce3484341840e7"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:74acd6e291f885678631b7ebc85d2d4aec458dd849b8c841b57ef04047833bed"}, + {file = "propcache-0.2.0-cp38-cp38-win32.whl", hash = "sha256:d9b6ddac6408194e934002a69bcaadbc88c10b5f38fb9307779d1c629181815d"}, + {file = "propcache-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:676135dcf3262c9c5081cc8f19ad55c8a64e3f7282a21266d05544450bffc3a5"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:25c8d773a62ce0451b020c7b29a35cfbc05de8b291163a7a0f3b7904f27253e6"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:375a12d7556d462dc64d70475a9ee5982465fbb3d2b364f16b86ba9135793638"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1ec43d76b9677637a89d6ab86e1fef70d739217fefa208c65352ecf0282be957"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f45eec587dafd4b2d41ac189c2156461ebd0c1082d2fe7013571598abb8505d1"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc092ba439d91df90aea38168e11f75c655880c12782facf5cf9c00f3d42b562"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa1076244f54bb76e65e22cb6910365779d5c3d71d1f18b275f1dfc7b0d71b4d"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:682a7c79a2fbf40f5dbb1eb6bfe2cd865376deeac65acf9beb607505dced9e12"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e40876731f99b6f3c897b66b803c9e1c07a989b366c6b5b475fafd1f7ba3fb8"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:363ea8cd3c5cb6679f1c2f5f1f9669587361c062e4899fce56758efa928728f8"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:140fbf08ab3588b3468932974a9331aff43c0ab8a2ec2c608b6d7d1756dbb6cb"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e70fac33e8b4ac63dfc4c956fd7d85a0b1139adcfc0d964ce288b7c527537fea"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b33d7a286c0dc1a15f5fc864cc48ae92a846df287ceac2dd499926c3801054a6"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f6d5749fdd33d90e34c2efb174c7e236829147a2713334d708746e94c4bde40d"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:22aa8f2272d81d9317ff5756bb108021a056805ce63dd3630e27d042c8092798"}, + {file = "propcache-0.2.0-cp39-cp39-win32.whl", hash = "sha256:73e4b40ea0eda421b115248d7e79b59214411109a5bc47d0d48e4c73e3b8fcf9"}, + {file = "propcache-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:9517d5e9e0731957468c29dbfd0f976736a0e55afaea843726e887f36fe017df"}, + {file = "propcache-0.2.0-py3-none-any.whl", hash = "sha256:2ccc28197af5313706511fab3a8b66dcd6da067a1331372c82ea1cb74285e036"}, + {file = "propcache-0.2.0.tar.gz", hash = "sha256:df81779732feb9d01e5d513fad0122efb3d53bbc75f61b2a4f29a020bc985e70"}, +] + [[package]] name = "proto-plus" -version = "1.24.0" +version = "1.25.0" description = "Beautiful, Pythonic protocol buffers." optional = false python-versions = ">=3.7" files = [ - {file = "proto-plus-1.24.0.tar.gz", hash = "sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445"}, - {file = "proto_plus-1.24.0-py3-none-any.whl", hash = "sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12"}, + {file = "proto_plus-1.25.0-py3-none-any.whl", hash = "sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961"}, + {file = "proto_plus-1.25.0.tar.gz", hash = "sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91"}, ] [package.dependencies] @@ -3418,32 +4006,33 @@ files = [ [[package]] name = "psutil" -version = "6.0.0" +version = "6.1.0" description = "Cross-platform lib for process and system monitoring in Python." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ - {file = "psutil-6.0.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a021da3e881cd935e64a3d0a20983bda0bb4cf80e4f74fa9bfcb1bc5785360c6"}, - {file = "psutil-6.0.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:1287c2b95f1c0a364d23bc6f2ea2365a8d4d9b726a3be7294296ff7ba97c17f0"}, - {file = "psutil-6.0.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:a9a3dbfb4de4f18174528d87cc352d1f788b7496991cca33c6996f40c9e3c92c"}, - {file = "psutil-6.0.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:6ec7588fb3ddaec7344a825afe298db83fe01bfaaab39155fa84cf1c0d6b13c3"}, - {file = "psutil-6.0.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:1e7c870afcb7d91fdea2b37c24aeb08f98b6d67257a5cb0a8bc3ac68d0f1a68c"}, - {file = "psutil-6.0.0-cp27-none-win32.whl", hash = "sha256:02b69001f44cc73c1c5279d02b30a817e339ceb258ad75997325e0e6169d8b35"}, - {file = "psutil-6.0.0-cp27-none-win_amd64.whl", hash = "sha256:21f1fb635deccd510f69f485b87433460a603919b45e2a324ad65b0cc74f8fb1"}, - {file = "psutil-6.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c588a7e9b1173b6e866756dde596fd4cad94f9399daf99ad8c3258b3cb2b47a0"}, - {file = "psutil-6.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ed2440ada7ef7d0d608f20ad89a04ec47d2d3ab7190896cd62ca5fc4fe08bf0"}, - {file = "psutil-6.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fd9a97c8e94059b0ef54a7d4baf13b405011176c3b6ff257c247cae0d560ecd"}, - {file = "psutil-6.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2e8d0054fc88153ca0544f5c4d554d42e33df2e009c4ff42284ac9ebdef4132"}, - {file = "psutil-6.0.0-cp36-cp36m-win32.whl", hash = "sha256:fc8c9510cde0146432bbdb433322861ee8c3efbf8589865c8bf8d21cb30c4d14"}, - {file = "psutil-6.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:34859b8d8f423b86e4385ff3665d3f4d94be3cdf48221fbe476e883514fdb71c"}, - {file = "psutil-6.0.0-cp37-abi3-win32.whl", hash = "sha256:a495580d6bae27291324fe60cea0b5a7c23fa36a7cd35035a16d93bdcf076b9d"}, - {file = "psutil-6.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:33ea5e1c975250a720b3a6609c490db40dae5d83a4eb315170c4fe0d8b1f34b3"}, - {file = "psutil-6.0.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:ffe7fc9b6b36beadc8c322f84e1caff51e8703b88eee1da46d1e3a6ae11b4fd0"}, - {file = "psutil-6.0.0.tar.gz", hash = "sha256:8faae4f310b6d969fa26ca0545338b21f73c6b15db7c4a8d934a5482faa818f2"}, + {file = "psutil-6.1.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ff34df86226c0227c52f38b919213157588a678d049688eded74c76c8ba4a5d0"}, + {file = "psutil-6.1.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:c0e0c00aa18ca2d3b2b991643b799a15fc8f0563d2ebb6040f64ce8dc027b942"}, + {file = "psutil-6.1.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:000d1d1ebd634b4efb383f4034437384e44a6d455260aaee2eca1e9c1b55f047"}, + {file = "psutil-6.1.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:5cd2bcdc75b452ba2e10f0e8ecc0b57b827dd5d7aaffbc6821b2a9a242823a76"}, + {file = "psutil-6.1.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:045f00a43c737f960d273a83973b2511430d61f283a44c96bf13a6e829ba8fdc"}, + {file = "psutil-6.1.0-cp27-none-win32.whl", hash = "sha256:9118f27452b70bb1d9ab3198c1f626c2499384935aaf55388211ad982611407e"}, + {file = "psutil-6.1.0-cp27-none-win_amd64.whl", hash = "sha256:a8506f6119cff7015678e2bce904a4da21025cc70ad283a53b099e7620061d85"}, + {file = "psutil-6.1.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6e2dcd475ce8b80522e51d923d10c7871e45f20918e027ab682f94f1c6351688"}, + {file = "psutil-6.1.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:0895b8414afafc526712c498bd9de2b063deaac4021a3b3c34566283464aff8e"}, + {file = "psutil-6.1.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9dcbfce5d89f1d1f2546a2090f4fcf87c7f669d1d90aacb7d7582addece9fb38"}, + {file = "psutil-6.1.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:498c6979f9c6637ebc3a73b3f87f9eb1ec24e1ce53a7c5173b8508981614a90b"}, + {file = "psutil-6.1.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d905186d647b16755a800e7263d43df08b790d709d575105d419f8b6ef65423a"}, + {file = "psutil-6.1.0-cp36-cp36m-win32.whl", hash = "sha256:6d3fbbc8d23fcdcb500d2c9f94e07b1342df8ed71b948a2649b5cb060a7c94ca"}, + {file = "psutil-6.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:1209036fbd0421afde505a4879dee3b2fd7b1e14fee81c0069807adcbbcca747"}, + {file = "psutil-6.1.0-cp37-abi3-win32.whl", hash = "sha256:1ad45a1f5d0b608253b11508f80940985d1d0c8f6111b5cb637533a0e6ddc13e"}, + {file = "psutil-6.1.0-cp37-abi3-win_amd64.whl", hash = "sha256:a8fb3752b491d246034fa4d279ff076501588ce8cbcdbb62c32fd7a377d996be"}, + {file = "psutil-6.1.0.tar.gz", hash = "sha256:353815f59a7f64cdaca1c0307ee13558a0512f6db064e92fe833784f08539c7a"}, ] [package.extras] -test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] +dev = ["black", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest-cov", "requests", "rstcheck", "ruff", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "wheel"] +test = ["pytest", "pytest-xdist", "setuptools"] [[package]] name = "ptyprocess" @@ -3458,13 +4047,13 @@ files = [ [[package]] name = "pure-eval" -version = "0.2.2" +version = "0.2.3" description = "Safely evaluate AST nodes without side effects" optional = false python-versions = "*" files = [ - {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"}, - {file = "pure_eval-0.2.2.tar.gz", hash = "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3"}, + {file = "pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0"}, + {file = "pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42"}, ] [package.extras] @@ -3494,76 +4083,82 @@ files = [ [[package]] name = "pyarrow" -version = "16.1.0" +version = "18.0.0" description = "Python library for Apache Arrow" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "pyarrow-16.1.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9"}, - {file = "pyarrow-16.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a"}, - {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef"}, - {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848"}, - {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c"}, - {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd"}, - {file = "pyarrow-16.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff"}, - {file = "pyarrow-16.1.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:d0ebea336b535b37eee9eee31761813086d33ed06de9ab6fc6aaa0bace7b250c"}, - {file = "pyarrow-16.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e73cfc4a99e796727919c5541c65bb88b973377501e39b9842ea71401ca6c1c"}, - {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf9251264247ecfe93e5f5a0cd43b8ae834f1e61d1abca22da55b20c788417f6"}, - {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddf5aace92d520d3d2a20031d8b0ec27b4395cab9f74e07cc95edf42a5cc0147"}, - {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:25233642583bf658f629eb230b9bb79d9af4d9f9229890b3c878699c82f7d11e"}, - {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a33a64576fddfbec0a44112eaf844c20853647ca833e9a647bfae0582b2ff94b"}, - {file = "pyarrow-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:185d121b50836379fe012753cf15c4ba9638bda9645183ab36246923875f8d1b"}, - {file = "pyarrow-16.1.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:2e51ca1d6ed7f2e9d5c3c83decf27b0d17bb207a7dea986e8dc3e24f80ff7d6f"}, - {file = "pyarrow-16.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:06ebccb6f8cb7357de85f60d5da50e83507954af617d7b05f48af1621d331c9a"}, - {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b04707f1979815f5e49824ce52d1dceb46e2f12909a48a6a753fe7cafbc44a0c"}, - {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d32000693deff8dc5df444b032b5985a48592c0697cb6e3071a5d59888714e2"}, - {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8785bb10d5d6fd5e15d718ee1d1f914fe768bf8b4d1e5e9bf253de8a26cb1628"}, - {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:e1369af39587b794873b8a307cc6623a3b1194e69399af0efd05bb202195a5a7"}, - {file = "pyarrow-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:febde33305f1498f6df85e8020bca496d0e9ebf2093bab9e0f65e2b4ae2b3444"}, - {file = "pyarrow-16.1.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b5f5705ab977947a43ac83b52ade3b881eb6e95fcc02d76f501d549a210ba77f"}, - {file = "pyarrow-16.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0d27bf89dfc2576f6206e9cd6cf7a107c9c06dc13d53bbc25b0bd4556f19cf5f"}, - {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d07de3ee730647a600037bc1d7b7994067ed64d0eba797ac74b2bc77384f4c2"}, - {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbef391b63f708e103df99fbaa3acf9f671d77a183a07546ba2f2c297b361e83"}, - {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:19741c4dbbbc986d38856ee7ddfdd6a00fc3b0fc2d928795b95410d38bb97d15"}, - {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f2c5fb249caa17b94e2b9278b36a05ce03d3180e6da0c4c3b3ce5b2788f30eed"}, - {file = "pyarrow-16.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:e6b6d3cd35fbb93b70ade1336022cc1147b95ec6af7d36906ca7fe432eb09710"}, - {file = "pyarrow-16.1.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:18da9b76a36a954665ccca8aa6bd9f46c1145f79c0bb8f4f244f5f8e799bca55"}, - {file = "pyarrow-16.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:99f7549779b6e434467d2aa43ab2b7224dd9e41bdde486020bae198978c9e05e"}, - {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f07fdffe4fd5b15f5ec15c8b64584868d063bc22b86b46c9695624ca3505b7b4"}, - {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddfe389a08ea374972bd4065d5f25d14e36b43ebc22fc75f7b951f24378bf0b5"}, - {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3b20bd67c94b3a2ea0a749d2a5712fc845a69cb5d52e78e6449bbd295611f3aa"}, - {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:ba8ac20693c0bb0bf4b238751d4409e62852004a8cf031c73b0e0962b03e45e3"}, - {file = "pyarrow-16.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:31a1851751433d89a986616015841977e0a188662fcffd1a5677453f1df2de0a"}, - {file = "pyarrow-16.1.0.tar.gz", hash = "sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315"}, + {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2333f93260674e185cfbf208d2da3007132572e56871f451ba1a556b45dae6e2"}, + {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:4c381857754da44326f3a49b8b199f7f87a51c2faacd5114352fc78de30d3aba"}, + {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:603cd8ad4976568954598ef0a6d4ed3dfb78aff3d57fa8d6271f470f0ce7d34f"}, + {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58a62549a3e0bc9e03df32f350e10e1efb94ec6cf63e3920c3385b26663948ce"}, + {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bc97316840a349485fbb137eb8d0f4d7057e1b2c1272b1a20eebbbe1848f5122"}, + {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:2e549a748fa8b8715e734919923f69318c953e077e9c02140ada13e59d043310"}, + {file = "pyarrow-18.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:606e9a3dcb0f52307c5040698ea962685fb1c852d72379ee9412be7de9c5f9e2"}, + {file = "pyarrow-18.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d5795e37c0a33baa618c5e054cd61f586cf76850a251e2b21355e4085def6280"}, + {file = "pyarrow-18.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:5f0510608ccd6e7f02ca8596962afb8c6cc84c453e7be0da4d85f5f4f7b0328a"}, + {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:616ea2826c03c16e87f517c46296621a7c51e30400f6d0a61be645f203aa2b93"}, + {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1824f5b029ddd289919f354bc285992cb4e32da518758c136271cf66046ef22"}, + {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6dd1b52d0d58dd8f685ced9971eb49f697d753aa7912f0a8f50833c7a7426319"}, + {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:320ae9bd45ad7ecc12ec858b3e8e462578de060832b98fc4d671dee9f10d9954"}, + {file = "pyarrow-18.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:2c992716cffb1088414f2b478f7af0175fd0a76fea80841b1706baa8fb0ebaad"}, + {file = "pyarrow-18.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:e7ab04f272f98ebffd2a0661e4e126036f6936391ba2889ed2d44c5006237802"}, + {file = "pyarrow-18.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:03f40b65a43be159d2f97fd64dc998f769d0995a50c00f07aab58b0b3da87e1f"}, + {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be08af84808dff63a76860847c48ec0416928a7b3a17c2f49a072cac7c45efbd"}, + {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c70c1965cde991b711a98448ccda3486f2a336457cf4ec4dca257a926e149c9"}, + {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:00178509f379415a3fcf855af020e3340254f990a8534294ec3cf674d6e255fd"}, + {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a71ab0589a63a3e987beb2bc172e05f000a5c5be2636b4b263c44034e215b5d7"}, + {file = "pyarrow-18.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:fe92efcdbfa0bcf2fa602e466d7f2905500f33f09eb90bf0bcf2e6ca41b574c8"}, + {file = "pyarrow-18.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:907ee0aa8ca576f5e0cdc20b5aeb2ad4d3953a3b4769fc4b499e00ef0266f02f"}, + {file = "pyarrow-18.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:66dcc216ebae2eb4c37b223feaf82f15b69d502821dde2da138ec5a3716e7463"}, + {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc1daf7c425f58527900876354390ee41b0ae962a73ad0959b9d829def583bb1"}, + {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:871b292d4b696b09120ed5bde894f79ee2a5f109cb84470546471df264cae136"}, + {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:082ba62bdcb939824ba1ce10b8acef5ab621da1f4c4805e07bfd153617ac19d4"}, + {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:2c664ab88b9766413197733c1720d3dcd4190e8fa3bbdc3710384630a0a7207b"}, + {file = "pyarrow-18.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc892be34dbd058e8d189b47db1e33a227d965ea8805a235c8a7286f7fd17d3a"}, + {file = "pyarrow-18.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:28f9c39a56d2c78bf6b87dcc699d520ab850919d4a8c7418cd20eda49874a2ea"}, + {file = "pyarrow-18.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:f1a198a50c409ab2d009fbf20956ace84567d67f2c5701511d4dd561fae6f32e"}, + {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5bd7fd32e3ace012d43925ea4fc8bd1b02cc6cc1e9813b518302950e89b5a22"}, + {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:336addb8b6f5208be1b2398442c703a710b6b937b1a046065ee4db65e782ff5a"}, + {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:45476490dd4adec5472c92b4d253e245258745d0ccaabe706f8d03288ed60a79"}, + {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b46591222c864e7da7faa3b19455196416cd8355ff6c2cc2e65726a760a3c420"}, + {file = "pyarrow-18.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb7e3abcda7e1e6b83c2dc2909c8d045881017270a119cc6ee7fdcfe71d02df8"}, + {file = "pyarrow-18.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:09f30690b99ce34e0da64d20dab372ee54431745e4efb78ac938234a282d15f9"}, + {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d5ca5d707e158540312e09fd907f9f49bacbe779ab5236d9699ced14d2293b8"}, + {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6331f280c6e4521c69b201a42dd978f60f7e129511a55da9e0bfe426b4ebb8d"}, + {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3ac24b2be732e78a5a3ac0b3aa870d73766dd00beba6e015ea2ea7394f8b4e55"}, + {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b30a927c6dff89ee702686596f27c25160dd6c99be5bcc1513a763ae5b1bfc03"}, + {file = "pyarrow-18.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:8f40ec677e942374e3d7f2fad6a67a4c2811a8b975e8703c6fd26d3b168a90e2"}, + {file = "pyarrow-18.0.0.tar.gz", hash = "sha256:a6aa027b1a9d2970cf328ccd6dbe4a996bc13c39fd427f502782f5bdb9ca20f5"}, ] -[package.dependencies] -numpy = ">=1.16.6" +[package.extras] +test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"] [[package]] name = "pyasn1" -version = "0.5.1" +version = "0.6.1" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +python-versions = ">=3.8" files = [ - {file = "pyasn1-0.5.1-py2.py3-none-any.whl", hash = "sha256:4439847c58d40b1d0a573d07e3856e95333f1976294494c325775aeca506eb58"}, - {file = "pyasn1-0.5.1.tar.gz", hash = "sha256:6d391a96e59b23130a5cfa74d6fd7f388dbbe26cc8f1edf39fdddf08d9d6676c"}, + {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, + {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, ] [[package]] name = "pyasn1-modules" -version = "0.3.0" +version = "0.4.1" description = "A collection of ASN.1-based protocols modules" optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +python-versions = ">=3.8" files = [ - {file = "pyasn1_modules-0.3.0-py2.py3-none-any.whl", hash = "sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d"}, - {file = "pyasn1_modules-0.3.0.tar.gz", hash = "sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c"}, + {file = "pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd"}, + {file = "pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c"}, ] [package.dependencies] -pyasn1 = ">=0.4.6,<0.6.0" +pyasn1 = ">=0.4.6,<0.7.0" [[package]] name = "pycares" @@ -3633,13 +4228,13 @@ idna = ["idna (>=2.1)"] [[package]] name = "pycodestyle" -version = "2.12.0" +version = "2.12.1" description = "Python style guide checker" optional = false python-versions = ">=3.8" files = [ - {file = "pycodestyle-2.12.0-py2.py3-none-any.whl", hash = "sha256:949a39f6b86c3e1515ba1787c2022131d165a8ad271b11370a8819aa070269e4"}, - {file = "pycodestyle-2.12.0.tar.gz", hash = "sha256:442f950141b4f43df752dd303511ffded3a04c2b6fb7f65980574f0c31e6e79c"}, + {file = "pycodestyle-2.12.1-py2.py3-none-any.whl", hash = "sha256:46f0fb92069a7c28ab7bb558f05bfc0110dac69a0cd23c61ea0040283a9d78b3"}, + {file = "pycodestyle-2.12.1.tar.gz", hash = "sha256:6838eae08bbce4f6accd5d5572075c63626a15ee3e6f842df996bf62f6d73521"}, ] [[package]] @@ -3671,18 +4266,18 @@ setuptools = "*" [[package]] name = "pydoclint" -version = "0.5.1" +version = "0.5.9" description = "A Python docstring linter that checks arguments, returns, yields, and raises sections" optional = false python-versions = ">=3.8" files = [ - {file = "pydoclint-0.5.1-py2.py3-none-any.whl", hash = "sha256:198cc7d80e7701e340466783b5c461d062622832f8ea7b4af66a1901c7df8a95"}, - {file = "pydoclint-0.5.1.tar.gz", hash = "sha256:380fcd5f14ef9781605f81db76a55f5781972cbf2a902282eab0c6566743860b"}, + {file = "pydoclint-0.5.9-py2.py3-none-any.whl", hash = "sha256:089327003cef6fe5605cbaa9887859ea5229ce0c9abb52775ffd57513094c1ae"}, + {file = "pydoclint-0.5.9.tar.gz", hash = "sha256:e200f964a5d9fbbb2ff1078bd7cb5433a0564d2482b6a1ba1be848f66bc4924f"}, ] [package.dependencies] click = ">=8.1.0" -docstring-parser-fork = ">=0.0.8" +docstring-parser-fork = ">=0.0.9" tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""} [package.extras] @@ -3715,13 +4310,13 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pyjwt" -version = "2.8.0" +version = "2.9.0" description = "JSON Web Token implementation in Python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, - {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, + {file = "PyJWT-2.9.0-py3-none-any.whl", hash = "sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850"}, + {file = "pyjwt-2.9.0.tar.gz", hash = "sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c"}, ] [package.dependencies] @@ -3729,8 +4324,8 @@ cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"cryp [package.extras] crypto = ["cryptography (>=3.4.0)"] -dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] -docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx", "sphinx-rtd-theme", "zope.interface"] +docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"] tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] [[package]] @@ -3746,13 +4341,13 @@ files = [ [[package]] name = "pymdown-extensions" -version = "10.11.2" +version = "10.12" description = "Extension pack for Python Markdown." optional = false python-versions = ">=3.8" files = [ - {file = "pymdown_extensions-10.11.2-py3-none-any.whl", hash = "sha256:41cdde0a77290e480cf53892f5c5e50921a7ee3e5cd60ba91bf19837b33badcf"}, - {file = "pymdown_extensions-10.11.2.tar.gz", hash = "sha256:bc8847ecc9e784a098efd35e20cba772bc5a1b529dfcef9dc1972db9021a1049"}, + {file = "pymdown_extensions-10.12-py3-none-any.whl", hash = "sha256:49f81412242d3527b8b4967b990df395c89563043bc51a3d2d7d500e52123b77"}, + {file = "pymdown_extensions-10.12.tar.gz", hash = "sha256:b0ee1e0b2bef1071a47891ab17003bfe5bf824a398e13f49f8ed653b699369a7"}, ] [package.dependencies] @@ -3797,13 +4392,13 @@ sql = ["pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] [[package]] name = "pytest" -version = "8.3.1" +version = "8.3.3" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.3.1-py3-none-any.whl", hash = "sha256:e9600ccf4f563976e2c99fa02c7624ab938296551f280835ee6516df8bc4ae8c"}, - {file = "pytest-8.3.1.tar.gz", hash = "sha256:7e8e5c5abd6e93cb1cc151f23e57adc31fcf8cfd2a3ff2da63e23f732de35db6"}, + {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"}, + {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"}, ] [package.dependencies] @@ -3901,96 +4496,102 @@ files = [ [[package]] name = "pytz" -version = "2024.1" +version = "2024.2" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" files = [ - {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, - {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, + {file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"}, + {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"}, ] [[package]] name = "pywin32" -version = "306" +version = "308" description = "Python for Window Extensions" optional = false python-versions = "*" files = [ - {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, - {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, - {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, - {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, - {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, - {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, - {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, - {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, - {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, - {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, - {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, - {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, - {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, - {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, + {file = "pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e"}, + {file = "pywin32-308-cp310-cp310-win_amd64.whl", hash = "sha256:4fc888c59b3c0bef905ce7eb7e2106a07712015ea1c8234b703a088d46110e8e"}, + {file = "pywin32-308-cp310-cp310-win_arm64.whl", hash = "sha256:a5ab5381813b40f264fa3495b98af850098f814a25a63589a8e9eb12560f450c"}, + {file = "pywin32-308-cp311-cp311-win32.whl", hash = "sha256:5d8c8015b24a7d6855b1550d8e660d8daa09983c80e5daf89a273e5c6fb5095a"}, + {file = "pywin32-308-cp311-cp311-win_amd64.whl", hash = "sha256:575621b90f0dc2695fec346b2d6302faebd4f0f45c05ea29404cefe35d89442b"}, + {file = "pywin32-308-cp311-cp311-win_arm64.whl", hash = "sha256:100a5442b7332070983c4cd03f2e906a5648a5104b8a7f50175f7906efd16bb6"}, + {file = "pywin32-308-cp312-cp312-win32.whl", hash = "sha256:587f3e19696f4bf96fde9d8a57cec74a57021ad5f204c9e627e15c33ff568897"}, + {file = "pywin32-308-cp312-cp312-win_amd64.whl", hash = "sha256:00b3e11ef09ede56c6a43c71f2d31857cf7c54b0ab6e78ac659497abd2834f47"}, + {file = "pywin32-308-cp312-cp312-win_arm64.whl", hash = "sha256:9b4de86c8d909aed15b7011182c8cab38c8850de36e6afb1f0db22b8959e3091"}, + {file = "pywin32-308-cp313-cp313-win32.whl", hash = "sha256:1c44539a37a5b7b21d02ab34e6a4d314e0788f1690d65b48e9b0b89f31abbbed"}, + {file = "pywin32-308-cp313-cp313-win_amd64.whl", hash = "sha256:fd380990e792eaf6827fcb7e187b2b4b1cede0585e3d0c9e84201ec27b9905e4"}, + {file = "pywin32-308-cp313-cp313-win_arm64.whl", hash = "sha256:ef313c46d4c18dfb82a2431e3051ac8f112ccee1a34f29c263c583c568db63cd"}, + {file = "pywin32-308-cp37-cp37m-win32.whl", hash = "sha256:1f696ab352a2ddd63bd07430080dd598e6369152ea13a25ebcdd2f503a38f1ff"}, + {file = "pywin32-308-cp37-cp37m-win_amd64.whl", hash = "sha256:13dcb914ed4347019fbec6697a01a0aec61019c1046c2b905410d197856326a6"}, + {file = "pywin32-308-cp38-cp38-win32.whl", hash = "sha256:5794e764ebcabf4ff08c555b31bd348c9025929371763b2183172ff4708152f0"}, + {file = "pywin32-308-cp38-cp38-win_amd64.whl", hash = "sha256:3b92622e29d651c6b783e368ba7d6722b1634b8e70bd376fd7610fe1992e19de"}, + {file = "pywin32-308-cp39-cp39-win32.whl", hash = "sha256:7873ca4dc60ab3287919881a7d4f88baee4a6e639aa6962de25a98ba6b193341"}, + {file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"}, ] [[package]] name = "pyyaml" -version = "6.0.1" +version = "6.0.2" description = "YAML parser and emitter for Python" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, - {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, - {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, - {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, - {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, - {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, - {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, - {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, - {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, - {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, - {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, - {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, - {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, - {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, - {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, - {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, - {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, - {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, - {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, - {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, - {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, - {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, + {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, + {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"}, + {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"}, + {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"}, + {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"}, + {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"}, + {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"}, + {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"}, + {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"}, + {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"}, + {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"}, + {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"}, + {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"}, + {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"}, + {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"}, + {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, + {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] [[package]] @@ -4009,99 +4610,120 @@ pyyaml = "*" [[package]] name = "pyzmq" -version = "26.0.3" +version = "26.2.0" description = "Python bindings for 0MQ" optional = false python-versions = ">=3.7" files = [ - {file = "pyzmq-26.0.3-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:44dd6fc3034f1eaa72ece33588867df9e006a7303725a12d64c3dff92330f625"}, - {file = "pyzmq-26.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:acb704195a71ac5ea5ecf2811c9ee19ecdc62b91878528302dd0be1b9451cc90"}, - {file = "pyzmq-26.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dbb9c997932473a27afa93954bb77a9f9b786b4ccf718d903f35da3232317de"}, - {file = "pyzmq-26.0.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6bcb34f869d431799c3ee7d516554797f7760cb2198ecaa89c3f176f72d062be"}, - {file = "pyzmq-26.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38ece17ec5f20d7d9b442e5174ae9f020365d01ba7c112205a4d59cf19dc38ee"}, - {file = "pyzmq-26.0.3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:ba6e5e6588e49139a0979d03a7deb9c734bde647b9a8808f26acf9c547cab1bf"}, - {file = "pyzmq-26.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3bf8b000a4e2967e6dfdd8656cd0757d18c7e5ce3d16339e550bd462f4857e59"}, - {file = "pyzmq-26.0.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:2136f64fbb86451dbbf70223635a468272dd20075f988a102bf8a3f194a411dc"}, - {file = "pyzmq-26.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e8918973fbd34e7814f59143c5f600ecd38b8038161239fd1a3d33d5817a38b8"}, - {file = "pyzmq-26.0.3-cp310-cp310-win32.whl", hash = "sha256:0aaf982e68a7ac284377d051c742610220fd06d330dcd4c4dbb4cdd77c22a537"}, - {file = "pyzmq-26.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:f1a9b7d00fdf60b4039f4455afd031fe85ee8305b019334b72dcf73c567edc47"}, - {file = "pyzmq-26.0.3-cp310-cp310-win_arm64.whl", hash = "sha256:80b12f25d805a919d53efc0a5ad7c0c0326f13b4eae981a5d7b7cc343318ebb7"}, - {file = "pyzmq-26.0.3-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:a72a84570f84c374b4c287183debc776dc319d3e8ce6b6a0041ce2e400de3f32"}, - {file = "pyzmq-26.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7ca684ee649b55fd8f378127ac8462fb6c85f251c2fb027eb3c887e8ee347bcd"}, - {file = "pyzmq-26.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e222562dc0f38571c8b1ffdae9d7adb866363134299264a1958d077800b193b7"}, - {file = "pyzmq-26.0.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f17cde1db0754c35a91ac00b22b25c11da6eec5746431d6e5092f0cd31a3fea9"}, - {file = "pyzmq-26.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b7c0c0b3244bb2275abe255d4a30c050d541c6cb18b870975553f1fb6f37527"}, - {file = "pyzmq-26.0.3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:ac97a21de3712afe6a6c071abfad40a6224fd14fa6ff0ff8d0c6e6cd4e2f807a"}, - {file = "pyzmq-26.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:88b88282e55fa39dd556d7fc04160bcf39dea015f78e0cecec8ff4f06c1fc2b5"}, - {file = "pyzmq-26.0.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:72b67f966b57dbd18dcc7efbc1c7fc9f5f983e572db1877081f075004614fcdd"}, - {file = "pyzmq-26.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f4b6cecbbf3b7380f3b61de3a7b93cb721125dc125c854c14ddc91225ba52f83"}, - {file = "pyzmq-26.0.3-cp311-cp311-win32.whl", hash = "sha256:eed56b6a39216d31ff8cd2f1d048b5bf1700e4b32a01b14379c3b6dde9ce3aa3"}, - {file = "pyzmq-26.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:3191d312c73e3cfd0f0afdf51df8405aafeb0bad71e7ed8f68b24b63c4f36500"}, - {file = "pyzmq-26.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:b6907da3017ef55139cf0e417c5123a84c7332520e73a6902ff1f79046cd3b94"}, - {file = "pyzmq-26.0.3-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:068ca17214038ae986d68f4a7021f97e187ed278ab6dccb79f837d765a54d753"}, - {file = "pyzmq-26.0.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7821d44fe07335bea256b9f1f41474a642ca55fa671dfd9f00af8d68a920c2d4"}, - {file = "pyzmq-26.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eeb438a26d87c123bb318e5f2b3d86a36060b01f22fbdffd8cf247d52f7c9a2b"}, - {file = "pyzmq-26.0.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:69ea9d6d9baa25a4dc9cef5e2b77b8537827b122214f210dd925132e34ae9b12"}, - {file = "pyzmq-26.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7daa3e1369355766dea11f1d8ef829905c3b9da886ea3152788dc25ee6079e02"}, - {file = "pyzmq-26.0.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6ca7a9a06b52d0e38ccf6bca1aeff7be178917893f3883f37b75589d42c4ac20"}, - {file = "pyzmq-26.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1b7d0e124948daa4d9686d421ef5087c0516bc6179fdcf8828b8444f8e461a77"}, - {file = "pyzmq-26.0.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e746524418b70f38550f2190eeee834db8850088c834d4c8406fbb9bc1ae10b2"}, - {file = "pyzmq-26.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:6b3146f9ae6af82c47a5282ac8803523d381b3b21caeae0327ed2f7ecb718798"}, - {file = "pyzmq-26.0.3-cp312-cp312-win32.whl", hash = "sha256:2b291d1230845871c00c8462c50565a9cd6026fe1228e77ca934470bb7d70ea0"}, - {file = "pyzmq-26.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:926838a535c2c1ea21c903f909a9a54e675c2126728c21381a94ddf37c3cbddf"}, - {file = "pyzmq-26.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:5bf6c237f8c681dfb91b17f8435b2735951f0d1fad10cc5dfd96db110243370b"}, - {file = "pyzmq-26.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c0991f5a96a8e620f7691e61178cd8f457b49e17b7d9cfa2067e2a0a89fc1d5"}, - {file = "pyzmq-26.0.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:dbf012d8fcb9f2cf0643b65df3b355fdd74fc0035d70bb5c845e9e30a3a4654b"}, - {file = "pyzmq-26.0.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:01fbfbeb8249a68d257f601deb50c70c929dc2dfe683b754659569e502fbd3aa"}, - {file = "pyzmq-26.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c8eb19abe87029c18f226d42b8a2c9efdd139d08f8bf6e085dd9075446db450"}, - {file = "pyzmq-26.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5344b896e79800af86ad643408ca9aa303a017f6ebff8cee5a3163c1e9aec987"}, - {file = "pyzmq-26.0.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:204e0f176fd1d067671157d049466869b3ae1fc51e354708b0dc41cf94e23a3a"}, - {file = "pyzmq-26.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a42db008d58530efa3b881eeee4991146de0b790e095f7ae43ba5cc612decbc5"}, - {file = "pyzmq-26.0.3-cp37-cp37m-win32.whl", hash = "sha256:8d7a498671ca87e32b54cb47c82a92b40130a26c5197d392720a1bce1b3c77cf"}, - {file = "pyzmq-26.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:3b4032a96410bdc760061b14ed6a33613ffb7f702181ba999df5d16fb96ba16a"}, - {file = "pyzmq-26.0.3-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:2cc4e280098c1b192c42a849de8de2c8e0f3a84086a76ec5b07bfee29bda7d18"}, - {file = "pyzmq-26.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5bde86a2ed3ce587fa2b207424ce15b9a83a9fa14422dcc1c5356a13aed3df9d"}, - {file = "pyzmq-26.0.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:34106f68e20e6ff253c9f596ea50397dbd8699828d55e8fa18bd4323d8d966e6"}, - {file = "pyzmq-26.0.3-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ebbbd0e728af5db9b04e56389e2299a57ea8b9dd15c9759153ee2455b32be6ad"}, - {file = "pyzmq-26.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6b1d1c631e5940cac5a0b22c5379c86e8df6a4ec277c7a856b714021ab6cfad"}, - {file = "pyzmq-26.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e891ce81edd463b3b4c3b885c5603c00141151dd9c6936d98a680c8c72fe5c67"}, - {file = "pyzmq-26.0.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:9b273ecfbc590a1b98f014ae41e5cf723932f3b53ba9367cfb676f838038b32c"}, - {file = "pyzmq-26.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b32bff85fb02a75ea0b68f21e2412255b5731f3f389ed9aecc13a6752f58ac97"}, - {file = "pyzmq-26.0.3-cp38-cp38-win32.whl", hash = "sha256:f6c21c00478a7bea93caaaef9e7629145d4153b15a8653e8bb4609d4bc70dbfc"}, - {file = "pyzmq-26.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:3401613148d93ef0fd9aabdbddb212de3db7a4475367f49f590c837355343972"}, - {file = "pyzmq-26.0.3-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:2ed8357f4c6e0daa4f3baf31832df8a33334e0fe5b020a61bc8b345a3db7a606"}, - {file = "pyzmq-26.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c1c8f2a2ca45292084c75bb6d3a25545cff0ed931ed228d3a1810ae3758f975f"}, - {file = "pyzmq-26.0.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:b63731993cdddcc8e087c64e9cf003f909262b359110070183d7f3025d1c56b5"}, - {file = "pyzmq-26.0.3-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b3cd31f859b662ac5d7f4226ec7d8bd60384fa037fc02aee6ff0b53ba29a3ba8"}, - {file = "pyzmq-26.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:115f8359402fa527cf47708d6f8a0f8234f0e9ca0cab7c18c9c189c194dbf620"}, - {file = "pyzmq-26.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:715bdf952b9533ba13dfcf1f431a8f49e63cecc31d91d007bc1deb914f47d0e4"}, - {file = "pyzmq-26.0.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:e1258c639e00bf5e8a522fec6c3eaa3e30cf1c23a2f21a586be7e04d50c9acab"}, - {file = "pyzmq-26.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:15c59e780be8f30a60816a9adab900c12a58d79c1ac742b4a8df044ab2a6d920"}, - {file = "pyzmq-26.0.3-cp39-cp39-win32.whl", hash = "sha256:d0cdde3c78d8ab5b46595054e5def32a755fc028685add5ddc7403e9f6de9879"}, - {file = "pyzmq-26.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:ce828058d482ef860746bf532822842e0ff484e27f540ef5c813d516dd8896d2"}, - {file = "pyzmq-26.0.3-cp39-cp39-win_arm64.whl", hash = "sha256:788f15721c64109cf720791714dc14afd0f449d63f3a5487724f024345067381"}, - {file = "pyzmq-26.0.3-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2c18645ef6294d99b256806e34653e86236eb266278c8ec8112622b61db255de"}, - {file = "pyzmq-26.0.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e6bc96ebe49604df3ec2c6389cc3876cabe475e6bfc84ced1bf4e630662cb35"}, - {file = "pyzmq-26.0.3-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:971e8990c5cc4ddcff26e149398fc7b0f6a042306e82500f5e8db3b10ce69f84"}, - {file = "pyzmq-26.0.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8416c23161abd94cc7da80c734ad7c9f5dbebdadfdaa77dad78244457448223"}, - {file = "pyzmq-26.0.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:082a2988364b60bb5de809373098361cf1dbb239623e39e46cb18bc035ed9c0c"}, - {file = "pyzmq-26.0.3-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d57dfbf9737763b3a60d26e6800e02e04284926329aee8fb01049635e957fe81"}, - {file = "pyzmq-26.0.3-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:77a85dca4c2430ac04dc2a2185c2deb3858a34fe7f403d0a946fa56970cf60a1"}, - {file = "pyzmq-26.0.3-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4c82a6d952a1d555bf4be42b6532927d2a5686dd3c3e280e5f63225ab47ac1f5"}, - {file = "pyzmq-26.0.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4496b1282c70c442809fc1b151977c3d967bfb33e4e17cedbf226d97de18f709"}, - {file = "pyzmq-26.0.3-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:e4946d6bdb7ba972dfda282f9127e5756d4f299028b1566d1245fa0d438847e6"}, - {file = "pyzmq-26.0.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:03c0ae165e700364b266876d712acb1ac02693acd920afa67da2ebb91a0b3c09"}, - {file = "pyzmq-26.0.3-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:3e3070e680f79887d60feeda051a58d0ac36622e1759f305a41059eff62c6da7"}, - {file = "pyzmq-26.0.3-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6ca08b840fe95d1c2bd9ab92dac5685f949fc6f9ae820ec16193e5ddf603c3b2"}, - {file = "pyzmq-26.0.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e76654e9dbfb835b3518f9938e565c7806976c07b37c33526b574cc1a1050480"}, - {file = "pyzmq-26.0.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:871587bdadd1075b112e697173e946a07d722459d20716ceb3d1bd6c64bd08ce"}, - {file = "pyzmq-26.0.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d0a2d1bd63a4ad79483049b26514e70fa618ce6115220da9efdff63688808b17"}, - {file = "pyzmq-26.0.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0270b49b6847f0d106d64b5086e9ad5dc8a902413b5dbbb15d12b60f9c1747a4"}, - {file = "pyzmq-26.0.3-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:703c60b9910488d3d0954ca585c34f541e506a091a41930e663a098d3b794c67"}, - {file = "pyzmq-26.0.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74423631b6be371edfbf7eabb02ab995c2563fee60a80a30829176842e71722a"}, - {file = "pyzmq-26.0.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4adfbb5451196842a88fda3612e2c0414134874bffb1c2ce83ab4242ec9e027d"}, - {file = "pyzmq-26.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3516119f4f9b8671083a70b6afaa0a070f5683e431ab3dc26e9215620d7ca1ad"}, - {file = "pyzmq-26.0.3.tar.gz", hash = "sha256:dba7d9f2e047dfa2bca3b01f4f84aa5246725203d6284e3790f2ca15fba6b40a"}, + {file = "pyzmq-26.2.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:ddf33d97d2f52d89f6e6e7ae66ee35a4d9ca6f36eda89c24591b0c40205a3629"}, + {file = "pyzmq-26.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dacd995031a01d16eec825bf30802fceb2c3791ef24bcce48fa98ce40918c27b"}, + {file = "pyzmq-26.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89289a5ee32ef6c439086184529ae060c741334b8970a6855ec0b6ad3ff28764"}, + {file = "pyzmq-26.2.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5506f06d7dc6ecf1efacb4a013b1f05071bb24b76350832c96449f4a2d95091c"}, + {file = "pyzmq-26.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ea039387c10202ce304af74def5021e9adc6297067f3441d348d2b633e8166a"}, + {file = "pyzmq-26.2.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a2224fa4a4c2ee872886ed00a571f5e967c85e078e8e8c2530a2fb01b3309b88"}, + {file = "pyzmq-26.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:28ad5233e9c3b52d76196c696e362508959741e1a005fb8fa03b51aea156088f"}, + {file = "pyzmq-26.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:1c17211bc037c7d88e85ed8b7d8f7e52db6dc8eca5590d162717c654550f7282"}, + {file = "pyzmq-26.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b8f86dd868d41bea9a5f873ee13bf5551c94cf6bc51baebc6f85075971fe6eea"}, + {file = "pyzmq-26.2.0-cp310-cp310-win32.whl", hash = "sha256:46a446c212e58456b23af260f3d9fb785054f3e3653dbf7279d8f2b5546b21c2"}, + {file = "pyzmq-26.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:49d34ab71db5a9c292a7644ce74190b1dd5a3475612eefb1f8be1d6961441971"}, + {file = "pyzmq-26.2.0-cp310-cp310-win_arm64.whl", hash = "sha256:bfa832bfa540e5b5c27dcf5de5d82ebc431b82c453a43d141afb1e5d2de025fa"}, + {file = "pyzmq-26.2.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:8f7e66c7113c684c2b3f1c83cdd3376103ee0ce4c49ff80a648643e57fb22218"}, + {file = "pyzmq-26.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3a495b30fc91db2db25120df5847d9833af237546fd59170701acd816ccc01c4"}, + {file = "pyzmq-26.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77eb0968da535cba0470a5165468b2cac7772cfb569977cff92e240f57e31bef"}, + {file = "pyzmq-26.2.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ace4f71f1900a548f48407fc9be59c6ba9d9aaf658c2eea6cf2779e72f9f317"}, + {file = "pyzmq-26.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:92a78853d7280bffb93df0a4a6a2498cba10ee793cc8076ef797ef2f74d107cf"}, + {file = "pyzmq-26.2.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:689c5d781014956a4a6de61d74ba97b23547e431e9e7d64f27d4922ba96e9d6e"}, + {file = "pyzmq-26.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0aca98bc423eb7d153214b2df397c6421ba6373d3397b26c057af3c904452e37"}, + {file = "pyzmq-26.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1f3496d76b89d9429a656293744ceca4d2ac2a10ae59b84c1da9b5165f429ad3"}, + {file = "pyzmq-26.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5c2b3bfd4b9689919db068ac6c9911f3fcb231c39f7dd30e3138be94896d18e6"}, + {file = "pyzmq-26.2.0-cp311-cp311-win32.whl", hash = "sha256:eac5174677da084abf378739dbf4ad245661635f1600edd1221f150b165343f4"}, + {file = "pyzmq-26.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:5a509df7d0a83a4b178d0f937ef14286659225ef4e8812e05580776c70e155d5"}, + {file = "pyzmq-26.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:c0e6091b157d48cbe37bd67233318dbb53e1e6327d6fc3bb284afd585d141003"}, + {file = "pyzmq-26.2.0-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:ded0fc7d90fe93ae0b18059930086c51e640cdd3baebdc783a695c77f123dcd9"}, + {file = "pyzmq-26.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:17bf5a931c7f6618023cdacc7081f3f266aecb68ca692adac015c383a134ca52"}, + {file = "pyzmq-26.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55cf66647e49d4621a7e20c8d13511ef1fe1efbbccf670811864452487007e08"}, + {file = "pyzmq-26.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4661c88db4a9e0f958c8abc2b97472e23061f0bc737f6f6179d7a27024e1faa5"}, + {file = "pyzmq-26.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea7f69de383cb47522c9c208aec6dd17697db7875a4674c4af3f8cfdac0bdeae"}, + {file = "pyzmq-26.2.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7f98f6dfa8b8ccaf39163ce872bddacca38f6a67289116c8937a02e30bbe9711"}, + {file = "pyzmq-26.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e3e0210287329272539eea617830a6a28161fbbd8a3271bf4150ae3e58c5d0e6"}, + {file = "pyzmq-26.2.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6b274e0762c33c7471f1a7471d1a2085b1a35eba5cdc48d2ae319f28b6fc4de3"}, + {file = "pyzmq-26.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:29c6a4635eef69d68a00321e12a7d2559fe2dfccfa8efae3ffb8e91cd0b36a8b"}, + {file = "pyzmq-26.2.0-cp312-cp312-win32.whl", hash = "sha256:989d842dc06dc59feea09e58c74ca3e1678c812a4a8a2a419046d711031f69c7"}, + {file = "pyzmq-26.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:2a50625acdc7801bc6f74698c5c583a491c61d73c6b7ea4dee3901bb99adb27a"}, + {file = "pyzmq-26.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:4d29ab8592b6ad12ebbf92ac2ed2bedcfd1cec192d8e559e2e099f648570e19b"}, + {file = "pyzmq-26.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9dd8cd1aeb00775f527ec60022004d030ddc51d783d056e3e23e74e623e33726"}, + {file = "pyzmq-26.2.0-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:28c812d9757fe8acecc910c9ac9dafd2ce968c00f9e619db09e9f8f54c3a68a3"}, + {file = "pyzmq-26.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d80b1dd99c1942f74ed608ddb38b181b87476c6a966a88a950c7dee118fdf50"}, + {file = "pyzmq-26.2.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8c997098cc65e3208eca09303630e84d42718620e83b733d0fd69543a9cab9cb"}, + {file = "pyzmq-26.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ad1bc8d1b7a18497dda9600b12dc193c577beb391beae5cd2349184db40f187"}, + {file = "pyzmq-26.2.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:bea2acdd8ea4275e1278350ced63da0b166421928276c7c8e3f9729d7402a57b"}, + {file = "pyzmq-26.2.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:23f4aad749d13698f3f7b64aad34f5fc02d6f20f05999eebc96b89b01262fb18"}, + {file = "pyzmq-26.2.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:a4f96f0d88accc3dbe4a9025f785ba830f968e21e3e2c6321ccdfc9aef755115"}, + {file = "pyzmq-26.2.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ced65e5a985398827cc9276b93ef6dfabe0273c23de8c7931339d7e141c2818e"}, + {file = "pyzmq-26.2.0-cp313-cp313-win32.whl", hash = "sha256:31507f7b47cc1ead1f6e86927f8ebb196a0bab043f6345ce070f412a59bf87b5"}, + {file = "pyzmq-26.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:70fc7fcf0410d16ebdda9b26cbd8bf8d803d220a7f3522e060a69a9c87bf7bad"}, + {file = "pyzmq-26.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:c3789bd5768ab5618ebf09cef6ec2b35fed88709b104351748a63045f0ff9797"}, + {file = "pyzmq-26.2.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:034da5fc55d9f8da09015d368f519478a52675e558c989bfcb5cf6d4e16a7d2a"}, + {file = "pyzmq-26.2.0-cp313-cp313t-macosx_10_15_universal2.whl", hash = "sha256:c92d73464b886931308ccc45b2744e5968cbaade0b1d6aeb40d8ab537765f5bc"}, + {file = "pyzmq-26.2.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:794a4562dcb374f7dbbfb3f51d28fb40123b5a2abadee7b4091f93054909add5"}, + {file = "pyzmq-26.2.0-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aee22939bb6075e7afededabad1a56a905da0b3c4e3e0c45e75810ebe3a52672"}, + {file = "pyzmq-26.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ae90ff9dad33a1cfe947d2c40cb9cb5e600d759ac4f0fd22616ce6540f72797"}, + {file = "pyzmq-26.2.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:43a47408ac52647dfabbc66a25b05b6a61700b5165807e3fbd40063fcaf46386"}, + {file = "pyzmq-26.2.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:25bf2374a2a8433633c65ccb9553350d5e17e60c8eb4de4d92cc6bd60f01d306"}, + {file = "pyzmq-26.2.0-cp313-cp313t-musllinux_1_1_i686.whl", hash = "sha256:007137c9ac9ad5ea21e6ad97d3489af654381324d5d3ba614c323f60dab8fae6"}, + {file = "pyzmq-26.2.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:470d4a4f6d48fb34e92d768b4e8a5cc3780db0d69107abf1cd7ff734b9766eb0"}, + {file = "pyzmq-26.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3b55a4229ce5da9497dd0452b914556ae58e96a4381bb6f59f1305dfd7e53fc8"}, + {file = "pyzmq-26.2.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9cb3a6460cdea8fe8194a76de8895707e61ded10ad0be97188cc8463ffa7e3a8"}, + {file = "pyzmq-26.2.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8ab5cad923cc95c87bffee098a27856c859bd5d0af31bd346035aa816b081fe1"}, + {file = "pyzmq-26.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ed69074a610fad1c2fda66180e7b2edd4d31c53f2d1872bc2d1211563904cd9"}, + {file = "pyzmq-26.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:cccba051221b916a4f5e538997c45d7d136a5646442b1231b916d0164067ea27"}, + {file = "pyzmq-26.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:0eaa83fc4c1e271c24eaf8fb083cbccef8fde77ec8cd45f3c35a9a123e6da097"}, + {file = "pyzmq-26.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:9edda2df81daa129b25a39b86cb57dfdfe16f7ec15b42b19bfac503360d27a93"}, + {file = "pyzmq-26.2.0-cp37-cp37m-win32.whl", hash = "sha256:ea0eb6af8a17fa272f7b98d7bebfab7836a0d62738e16ba380f440fceca2d951"}, + {file = "pyzmq-26.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:4ff9dc6bc1664bb9eec25cd17506ef6672d506115095411e237d571e92a58231"}, + {file = "pyzmq-26.2.0-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:2eb7735ee73ca1b0d71e0e67c3739c689067f055c764f73aac4cc8ecf958ee3f"}, + {file = "pyzmq-26.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a534f43bc738181aa7cbbaf48e3eca62c76453a40a746ab95d4b27b1111a7d2"}, + {file = "pyzmq-26.2.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:aedd5dd8692635813368e558a05266b995d3d020b23e49581ddd5bbe197a8ab6"}, + {file = "pyzmq-26.2.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8be4700cd8bb02cc454f630dcdf7cfa99de96788b80c51b60fe2fe1dac480289"}, + {file = "pyzmq-26.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fcc03fa4997c447dce58264e93b5aa2d57714fbe0f06c07b7785ae131512732"}, + {file = "pyzmq-26.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:402b190912935d3db15b03e8f7485812db350d271b284ded2b80d2e5704be780"}, + {file = "pyzmq-26.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8685fa9c25ff00f550c1fec650430c4b71e4e48e8d852f7ddcf2e48308038640"}, + {file = "pyzmq-26.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:76589c020680778f06b7e0b193f4b6dd66d470234a16e1df90329f5e14a171cd"}, + {file = "pyzmq-26.2.0-cp38-cp38-win32.whl", hash = "sha256:8423c1877d72c041f2c263b1ec6e34360448decfb323fa8b94e85883043ef988"}, + {file = "pyzmq-26.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:76589f2cd6b77b5bdea4fca5992dc1c23389d68b18ccc26a53680ba2dc80ff2f"}, + {file = "pyzmq-26.2.0-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:b1d464cb8d72bfc1a3adc53305a63a8e0cac6bc8c5a07e8ca190ab8d3faa43c2"}, + {file = "pyzmq-26.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4da04c48873a6abdd71811c5e163bd656ee1b957971db7f35140a2d573f6949c"}, + {file = "pyzmq-26.2.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d049df610ac811dcffdc147153b414147428567fbbc8be43bb8885f04db39d98"}, + {file = "pyzmq-26.2.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05590cdbc6b902101d0e65d6a4780af14dc22914cc6ab995d99b85af45362cc9"}, + {file = "pyzmq-26.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c811cfcd6a9bf680236c40c6f617187515269ab2912f3d7e8c0174898e2519db"}, + {file = "pyzmq-26.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6835dd60355593de10350394242b5757fbbd88b25287314316f266e24c61d073"}, + {file = "pyzmq-26.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc6bee759a6bddea5db78d7dcd609397449cb2d2d6587f48f3ca613b19410cfc"}, + {file = "pyzmq-26.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c530e1eecd036ecc83c3407f77bb86feb79916d4a33d11394b8234f3bd35b940"}, + {file = "pyzmq-26.2.0-cp39-cp39-win32.whl", hash = "sha256:367b4f689786fca726ef7a6c5ba606958b145b9340a5e4808132cc65759abd44"}, + {file = "pyzmq-26.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:e6fa2e3e683f34aea77de8112f6483803c96a44fd726d7358b9888ae5bb394ec"}, + {file = "pyzmq-26.2.0-cp39-cp39-win_arm64.whl", hash = "sha256:7445be39143a8aa4faec43b076e06944b8f9d0701b669df4af200531b21e40bb"}, + {file = "pyzmq-26.2.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:706e794564bec25819d21a41c31d4df2d48e1cc4b061e8d345d7fb4dd3e94072"}, + {file = "pyzmq-26.2.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b435f2753621cd36e7c1762156815e21c985c72b19135dac43a7f4f31d28dd1"}, + {file = "pyzmq-26.2.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:160c7e0a5eb178011e72892f99f918c04a131f36056d10d9c1afb223fc952c2d"}, + {file = "pyzmq-26.2.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c4a71d5d6e7b28a47a394c0471b7e77a0661e2d651e7ae91e0cab0a587859ca"}, + {file = "pyzmq-26.2.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:90412f2db8c02a3864cbfc67db0e3dcdbda336acf1c469526d3e869394fe001c"}, + {file = "pyzmq-26.2.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2ea4ad4e6a12e454de05f2949d4beddb52460f3de7c8b9d5c46fbb7d7222e02c"}, + {file = "pyzmq-26.2.0-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fc4f7a173a5609631bb0c42c23d12c49df3966f89f496a51d3eb0ec81f4519d6"}, + {file = "pyzmq-26.2.0-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:878206a45202247781472a2d99df12a176fef806ca175799e1c6ad263510d57c"}, + {file = "pyzmq-26.2.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17c412bad2eb9468e876f556eb4ee910e62d721d2c7a53c7fa31e643d35352e6"}, + {file = "pyzmq-26.2.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:0d987a3ae5a71c6226b203cfd298720e0086c7fe7c74f35fa8edddfbd6597eed"}, + {file = "pyzmq-26.2.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:39887ac397ff35b7b775db7201095fc6310a35fdbae85bac4523f7eb3b840e20"}, + {file = "pyzmq-26.2.0-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fdb5b3e311d4d4b0eb8b3e8b4d1b0a512713ad7e6a68791d0923d1aec433d919"}, + {file = "pyzmq-26.2.0-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:226af7dcb51fdb0109f0016449b357e182ea0ceb6b47dfb5999d569e5db161d5"}, + {file = "pyzmq-26.2.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bed0e799e6120b9c32756203fb9dfe8ca2fb8467fed830c34c877e25638c3fc"}, + {file = "pyzmq-26.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:29c7947c594e105cb9e6c466bace8532dc1ca02d498684128b339799f5248277"}, + {file = "pyzmq-26.2.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:cdeabcff45d1c219636ee2e54d852262e5c2e085d6cb476d938aee8d921356b3"}, + {file = "pyzmq-26.2.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35cffef589bcdc587d06f9149f8d5e9e8859920a071df5a2671de2213bef592a"}, + {file = "pyzmq-26.2.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18c8dc3b7468d8b4bdf60ce9d7141897da103c7a4690157b32b60acb45e333e6"}, + {file = "pyzmq-26.2.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7133d0a1677aec369d67dd78520d3fa96dd7f3dcec99d66c1762870e5ea1a50a"}, + {file = "pyzmq-26.2.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6a96179a24b14fa6428cbfc08641c779a53f8fcec43644030328f44034c7f1f4"}, + {file = "pyzmq-26.2.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4f78c88905461a9203eac9faac157a2a0dbba84a0fd09fd29315db27be40af9f"}, + {file = "pyzmq-26.2.0.tar.gz", hash = "sha256:070672c258581c8e4f640b5159297580a9974b026043bd4ab0470be9ed324f1f"}, ] [package.dependencies] @@ -4109,90 +4731,105 @@ cffi = {version = "*", markers = "implementation_name == \"pypy\""} [[package]] name = "regex" -version = "2024.5.15" +version = "2024.9.11" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.8" files = [ - {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a81e3cfbae20378d75185171587cbf756015ccb14840702944f014e0d93ea09f"}, - {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b59138b219ffa8979013be7bc85bb60c6f7b7575df3d56dc1e403a438c7a3f6"}, - {file = "regex-2024.5.15-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a0bd000c6e266927cb7a1bc39d55be95c4b4f65c5be53e659537537e019232b1"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5eaa7ddaf517aa095fa8da0b5015c44d03da83f5bd49c87961e3c997daed0de7"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba68168daedb2c0bab7fd7e00ced5ba90aebf91024dea3c88ad5063c2a562cca"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6e8d717bca3a6e2064fc3a08df5cbe366369f4b052dcd21b7416e6d71620dca1"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1337b7dbef9b2f71121cdbf1e97e40de33ff114801263b275aafd75303bd62b5"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9ebd0a36102fcad2f03696e8af4ae682793a5d30b46c647eaf280d6cfb32796"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9efa1a32ad3a3ea112224897cdaeb6aa00381627f567179c0314f7b65d354c62"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1595f2d10dff3d805e054ebdc41c124753631b6a471b976963c7b28543cf13b0"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b802512f3e1f480f41ab5f2cfc0e2f761f08a1f41092d6718868082fc0d27143"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:a0981022dccabca811e8171f913de05720590c915b033b7e601f35ce4ea7019f"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:19068a6a79cf99a19ccefa44610491e9ca02c2be3305c7760d3831d38a467a6f"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1b5269484f6126eee5e687785e83c6b60aad7663dafe842b34691157e5083e53"}, - {file = "regex-2024.5.15-cp310-cp310-win32.whl", hash = "sha256:ada150c5adfa8fbcbf321c30c751dc67d2f12f15bd183ffe4ec7cde351d945b3"}, - {file = "regex-2024.5.15-cp310-cp310-win_amd64.whl", hash = "sha256:ac394ff680fc46b97487941f5e6ae49a9f30ea41c6c6804832063f14b2a5a145"}, - {file = "regex-2024.5.15-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f5b1dff3ad008dccf18e652283f5e5339d70bf8ba7c98bf848ac33db10f7bc7a"}, - {file = "regex-2024.5.15-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c6a2b494a76983df8e3d3feea9b9ffdd558b247e60b92f877f93a1ff43d26656"}, - {file = "regex-2024.5.15-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a32b96f15c8ab2e7d27655969a23895eb799de3665fa94349f3b2fbfd547236f"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10002e86e6068d9e1c91eae8295ef690f02f913c57db120b58fdd35a6bb1af35"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec54d5afa89c19c6dd8541a133be51ee1017a38b412b1321ccb8d6ddbeb4cf7d"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10e4ce0dca9ae7a66e6089bb29355d4432caed736acae36fef0fdd7879f0b0cb"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e507ff1e74373c4d3038195fdd2af30d297b4f0950eeda6f515ae3d84a1770f"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1f059a4d795e646e1c37665b9d06062c62d0e8cc3c511fe01315973a6542e40"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0721931ad5fe0dda45d07f9820b90b2148ccdd8e45bb9e9b42a146cb4f695649"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:833616ddc75ad595dee848ad984d067f2f31be645d603e4d158bba656bbf516c"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:287eb7f54fc81546346207c533ad3c2c51a8d61075127d7f6d79aaf96cdee890"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:19dfb1c504781a136a80ecd1fff9f16dddf5bb43cec6871778c8a907a085bb3d"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:119af6e56dce35e8dfb5222573b50c89e5508d94d55713c75126b753f834de68"}, - {file = "regex-2024.5.15-cp311-cp311-win32.whl", hash = "sha256:1c1c174d6ec38d6c8a7504087358ce9213d4332f6293a94fbf5249992ba54efa"}, - {file = "regex-2024.5.15-cp311-cp311-win_amd64.whl", hash = "sha256:9e717956dcfd656f5055cc70996ee2cc82ac5149517fc8e1b60261b907740201"}, - {file = "regex-2024.5.15-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:632b01153e5248c134007209b5c6348a544ce96c46005d8456de1d552455b014"}, - {file = "regex-2024.5.15-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e64198f6b856d48192bf921421fdd8ad8eb35e179086e99e99f711957ffedd6e"}, - {file = "regex-2024.5.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68811ab14087b2f6e0fc0c2bae9ad689ea3584cad6917fc57be6a48bbd012c49"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8ec0c2fea1e886a19c3bee0cd19d862b3aa75dcdfb42ebe8ed30708df64687a"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d0c0c0003c10f54a591d220997dd27d953cd9ccc1a7294b40a4be5312be8797b"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2431b9e263af1953c55abbd3e2efca67ca80a3de8a0437cb58e2421f8184717a"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a605586358893b483976cffc1723fb0f83e526e8f14c6e6614e75919d9862cf"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391d7f7f1e409d192dba8bcd42d3e4cf9e598f3979cdaed6ab11288da88cb9f2"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9ff11639a8d98969c863d4617595eb5425fd12f7c5ef6621a4b74b71ed8726d5"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4eee78a04e6c67e8391edd4dad3279828dd66ac4b79570ec998e2155d2e59fd5"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8fe45aa3f4aa57faabbc9cb46a93363edd6197cbc43523daea044e9ff2fea83e"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d0a3d8d6acf0c78a1fff0e210d224b821081330b8524e3e2bc5a68ef6ab5803d"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c486b4106066d502495b3025a0a7251bf37ea9540433940a23419461ab9f2a80"}, - {file = "regex-2024.5.15-cp312-cp312-win32.whl", hash = "sha256:c49e15eac7c149f3670b3e27f1f28a2c1ddeccd3a2812cba953e01be2ab9b5fe"}, - {file = "regex-2024.5.15-cp312-cp312-win_amd64.whl", hash = "sha256:673b5a6da4557b975c6c90198588181029c60793835ce02f497ea817ff647cb2"}, - {file = "regex-2024.5.15-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:87e2a9c29e672fc65523fb47a90d429b70ef72b901b4e4b1bd42387caf0d6835"}, - {file = "regex-2024.5.15-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c3bea0ba8b73b71b37ac833a7f3fd53825924165da6a924aec78c13032f20850"}, - {file = "regex-2024.5.15-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bfc4f82cabe54f1e7f206fd3d30fda143f84a63fe7d64a81558d6e5f2e5aaba9"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5bb9425fe881d578aeca0b2b4b3d314ec88738706f66f219c194d67179337cb"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:64c65783e96e563103d641760664125e91bd85d8e49566ee560ded4da0d3e704"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cf2430df4148b08fb4324b848672514b1385ae3807651f3567871f130a728cc3"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5397de3219a8b08ae9540c48f602996aa6b0b65d5a61683e233af8605c42b0f2"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:455705d34b4154a80ead722f4f185b04c4237e8e8e33f265cd0798d0e44825fa"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b2b6f1b3bb6f640c1a92be3bbfbcb18657b125b99ecf141fb3310b5282c7d4ed"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:3ad070b823ca5890cab606c940522d05d3d22395d432f4aaaf9d5b1653e47ced"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:5b5467acbfc153847d5adb21e21e29847bcb5870e65c94c9206d20eb4e99a384"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e6662686aeb633ad65be2a42b4cb00178b3fbf7b91878f9446075c404ada552f"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:2b4c884767504c0e2401babe8b5b7aea9148680d2e157fa28f01529d1f7fcf67"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:3cd7874d57f13bf70078f1ff02b8b0aa48d5b9ed25fc48547516c6aba36f5741"}, - {file = "regex-2024.5.15-cp38-cp38-win32.whl", hash = "sha256:e4682f5ba31f475d58884045c1a97a860a007d44938c4c0895f41d64481edbc9"}, - {file = "regex-2024.5.15-cp38-cp38-win_amd64.whl", hash = "sha256:d99ceffa25ac45d150e30bd9ed14ec6039f2aad0ffa6bb87a5936f5782fc1569"}, - {file = "regex-2024.5.15-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:13cdaf31bed30a1e1c2453ef6015aa0983e1366fad2667657dbcac7b02f67133"}, - {file = "regex-2024.5.15-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cac27dcaa821ca271855a32188aa61d12decb6fe45ffe3e722401fe61e323cd1"}, - {file = "regex-2024.5.15-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7dbe2467273b875ea2de38ded4eba86cbcbc9a1a6d0aa11dcf7bd2e67859c435"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64f18a9a3513a99c4bef0e3efd4c4a5b11228b48aa80743be822b71e132ae4f5"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d347a741ea871c2e278fde6c48f85136c96b8659b632fb57a7d1ce1872547600"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1878b8301ed011704aea4c806a3cadbd76f84dece1ec09cc9e4dc934cfa5d4da"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4babf07ad476aaf7830d77000874d7611704a7fcf68c9c2ad151f5d94ae4bfc4"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:35cb514e137cb3488bce23352af3e12fb0dbedd1ee6e60da053c69fb1b29cc6c"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cdd09d47c0b2efee9378679f8510ee6955d329424c659ab3c5e3a6edea696294"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:72d7a99cd6b8f958e85fc6ca5b37c4303294954eac1376535b03c2a43eb72629"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:a094801d379ab20c2135529948cb84d417a2169b9bdceda2a36f5f10977ebc16"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c0c18345010870e58238790a6779a1219b4d97bd2e77e1140e8ee5d14df071aa"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:16093f563098448ff6b1fa68170e4acbef94e6b6a4e25e10eae8598bb1694b5d"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e38a7d4e8f633a33b4c7350fbd8bad3b70bf81439ac67ac38916c4a86b465456"}, - {file = "regex-2024.5.15-cp39-cp39-win32.whl", hash = "sha256:71a455a3c584a88f654b64feccc1e25876066c4f5ef26cd6dd711308aa538694"}, - {file = "regex-2024.5.15-cp39-cp39-win_amd64.whl", hash = "sha256:cab12877a9bdafde5500206d1020a584355a97884dfd388af3699e9137bf7388"}, - {file = "regex-2024.5.15.tar.gz", hash = "sha256:d3ee02d9e5f482cc8309134a91eeaacbdd2261ba111b0fef3748eeb4913e6a2c"}, + {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1494fa8725c285a81d01dc8c06b55287a1ee5e0e382d8413adc0a9197aac6408"}, + {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0e12c481ad92d129c78f13a2a3662317e46ee7ef96c94fd332e1c29131875b7d"}, + {file = "regex-2024.9.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:16e13a7929791ac1216afde26f712802e3df7bf0360b32e4914dca3ab8baeea5"}, + {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46989629904bad940bbec2106528140a218b4a36bb3042d8406980be1941429c"}, + {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a906ed5e47a0ce5f04b2c981af1c9acf9e8696066900bf03b9d7879a6f679fc8"}, + {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a091b0550b3b0207784a7d6d0f1a00d1d1c8a11699c1a4d93db3fbefc3ad35"}, + {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ddcd9a179c0a6fa8add279a4444015acddcd7f232a49071ae57fa6e278f1f71"}, + {file = "regex-2024.9.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6b41e1adc61fa347662b09398e31ad446afadff932a24807d3ceb955ed865cc8"}, + {file = "regex-2024.9.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ced479f601cd2f8ca1fd7b23925a7e0ad512a56d6e9476f79b8f381d9d37090a"}, + {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:635a1d96665f84b292e401c3d62775851aedc31d4f8784117b3c68c4fcd4118d"}, + {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c0256beda696edcf7d97ef16b2a33a8e5a875affd6fa6567b54f7c577b30a137"}, + {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:3ce4f1185db3fbde8ed8aa223fc9620f276c58de8b0d4f8cc86fd1360829edb6"}, + {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:09d77559e80dcc9d24570da3745ab859a9cf91953062e4ab126ba9d5993688ca"}, + {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a22ccefd4db3f12b526eccb129390942fe874a3a9fdbdd24cf55773a1faab1a"}, + {file = "regex-2024.9.11-cp310-cp310-win32.whl", hash = "sha256:f745ec09bc1b0bd15cfc73df6fa4f726dcc26bb16c23a03f9e3367d357eeedd0"}, + {file = "regex-2024.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:01c2acb51f8a7d6494c8c5eafe3d8e06d76563d8a8a4643b37e9b2dd8a2ff623"}, + {file = "regex-2024.9.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2cce2449e5927a0bf084d346da6cd5eb016b2beca10d0013ab50e3c226ffc0df"}, + {file = "regex-2024.9.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b37fa423beefa44919e009745ccbf353d8c981516e807995b2bd11c2c77d268"}, + {file = "regex-2024.9.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:64ce2799bd75039b480cc0360907c4fb2f50022f030bf9e7a8705b636e408fad"}, + {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4cc92bb6db56ab0c1cbd17294e14f5e9224f0cc6521167ef388332604e92679"}, + {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d05ac6fa06959c4172eccd99a222e1fbf17b5670c4d596cb1e5cde99600674c4"}, + {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:040562757795eeea356394a7fb13076ad4f99d3c62ab0f8bdfb21f99a1f85664"}, + {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6113c008a7780792efc80f9dfe10ba0cd043cbf8dc9a76ef757850f51b4edc50"}, + {file = "regex-2024.9.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e5fb5f77c8745a60105403a774fe2c1759b71d3e7b4ca237a5e67ad066c7199"}, + {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:54d9ff35d4515debf14bc27f1e3b38bfc453eff3220f5bce159642fa762fe5d4"}, + {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:df5cbb1fbc74a8305b6065d4ade43b993be03dbe0f8b30032cced0d7740994bd"}, + {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7fb89ee5d106e4a7a51bce305ac4efb981536301895f7bdcf93ec92ae0d91c7f"}, + {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a738b937d512b30bf75995c0159c0ddf9eec0775c9d72ac0202076c72f24aa96"}, + {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e28f9faeb14b6f23ac55bfbbfd3643f5c7c18ede093977f1df249f73fd22c7b1"}, + {file = "regex-2024.9.11-cp311-cp311-win32.whl", hash = "sha256:18e707ce6c92d7282dfce370cd205098384b8ee21544e7cb29b8aab955b66fa9"}, + {file = "regex-2024.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:313ea15e5ff2a8cbbad96ccef6be638393041b0a7863183c2d31e0c6116688cf"}, + {file = "regex-2024.9.11-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b0d0a6c64fcc4ef9c69bd5b3b3626cc3776520a1637d8abaa62b9edc147a58f7"}, + {file = "regex-2024.9.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:49b0e06786ea663f933f3710a51e9385ce0cba0ea56b67107fd841a55d56a231"}, + {file = "regex-2024.9.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5b513b6997a0b2f10e4fd3a1313568e373926e8c252bd76c960f96fd039cd28d"}, + {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee439691d8c23e76f9802c42a95cfeebf9d47cf4ffd06f18489122dbb0a7ad64"}, + {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8f877c89719d759e52783f7fe6e1c67121076b87b40542966c02de5503ace42"}, + {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:23b30c62d0f16827f2ae9f2bb87619bc4fba2044911e2e6c2eb1af0161cdb766"}, + {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85ab7824093d8f10d44330fe1e6493f756f252d145323dd17ab6b48733ff6c0a"}, + {file = "regex-2024.9.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8dee5b4810a89447151999428fe096977346cf2f29f4d5e29609d2e19e0199c9"}, + {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:98eeee2f2e63edae2181c886d7911ce502e1292794f4c5ee71e60e23e8d26b5d"}, + {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:57fdd2e0b2694ce6fc2e5ccf189789c3e2962916fb38779d3e3521ff8fe7a822"}, + {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:d552c78411f60b1fdaafd117a1fca2f02e562e309223b9d44b7de8be451ec5e0"}, + {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a0b2b80321c2ed3fcf0385ec9e51a12253c50f146fddb2abbb10f033fe3d049a"}, + {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:18406efb2f5a0e57e3a5881cd9354c1512d3bb4f5c45d96d110a66114d84d23a"}, + {file = "regex-2024.9.11-cp312-cp312-win32.whl", hash = "sha256:e464b467f1588e2c42d26814231edecbcfe77f5ac414d92cbf4e7b55b2c2a776"}, + {file = "regex-2024.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:9e8719792ca63c6b8340380352c24dcb8cd7ec49dae36e963742a275dfae6009"}, + {file = "regex-2024.9.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c157bb447303070f256e084668b702073db99bbb61d44f85d811025fcf38f784"}, + {file = "regex-2024.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4db21ece84dfeefc5d8a3863f101995de646c6cb0536952c321a2650aa202c36"}, + {file = "regex-2024.9.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:220e92a30b426daf23bb67a7962900ed4613589bab80382be09b48896d211e92"}, + {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1ae19e64c14c7ec1995f40bd932448713d3c73509e82d8cd7744dc00e29e86"}, + {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f47cd43a5bfa48f86925fe26fbdd0a488ff15b62468abb5d2a1e092a4fb10e85"}, + {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9d4a76b96f398697fe01117093613166e6aa8195d63f1b4ec3f21ab637632963"}, + {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ea51dcc0835eea2ea31d66456210a4e01a076d820e9039b04ae8d17ac11dee6"}, + {file = "regex-2024.9.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7aaa315101c6567a9a45d2839322c51c8d6e81f67683d529512f5bcfb99c802"}, + {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c57d08ad67aba97af57a7263c2d9006d5c404d721c5f7542f077f109ec2a4a29"}, + {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f8404bf61298bb6f8224bb9176c1424548ee1181130818fcd2cbffddc768bed8"}, + {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dd4490a33eb909ef5078ab20f5f000087afa2a4daa27b4c072ccb3cb3050ad84"}, + {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:eee9130eaad130649fd73e5cd92f60e55708952260ede70da64de420cdcad554"}, + {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6a2644a93da36c784e546de579ec1806bfd2763ef47babc1b03d765fe560c9f8"}, + {file = "regex-2024.9.11-cp313-cp313-win32.whl", hash = "sha256:e997fd30430c57138adc06bba4c7c2968fb13d101e57dd5bb9355bf8ce3fa7e8"}, + {file = "regex-2024.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:042c55879cfeb21a8adacc84ea347721d3d83a159da6acdf1116859e2427c43f"}, + {file = "regex-2024.9.11-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:35f4a6f96aa6cb3f2f7247027b07b15a374f0d5b912c0001418d1d55024d5cb4"}, + {file = "regex-2024.9.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:55b96e7ce3a69a8449a66984c268062fbaa0d8ae437b285428e12797baefce7e"}, + {file = "regex-2024.9.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cb130fccd1a37ed894824b8c046321540263013da72745d755f2d35114b81a60"}, + {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:323c1f04be6b2968944d730e5c2091c8c89767903ecaa135203eec4565ed2b2b"}, + {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be1c8ed48c4c4065ecb19d882a0ce1afe0745dfad8ce48c49586b90a55f02366"}, + {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b5b029322e6e7b94fff16cd120ab35a253236a5f99a79fb04fda7ae71ca20ae8"}, + {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6fff13ef6b5f29221d6904aa816c34701462956aa72a77f1f151a8ec4f56aeb"}, + {file = "regex-2024.9.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:587d4af3979376652010e400accc30404e6c16b7df574048ab1f581af82065e4"}, + {file = "regex-2024.9.11-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:079400a8269544b955ffa9e31f186f01d96829110a3bf79dc338e9910f794fca"}, + {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:f9268774428ec173654985ce55fc6caf4c6d11ade0f6f914d48ef4719eb05ebb"}, + {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:23f9985c8784e544d53fc2930fc1ac1a7319f5d5332d228437acc9f418f2f168"}, + {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:ae2941333154baff9838e88aa71c1d84f4438189ecc6021a12c7573728b5838e"}, + {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:e93f1c331ca8e86fe877a48ad64e77882c0c4da0097f2212873a69bbfea95d0c"}, + {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:846bc79ee753acf93aef4184c040d709940c9d001029ceb7b7a52747b80ed2dd"}, + {file = "regex-2024.9.11-cp38-cp38-win32.whl", hash = "sha256:c94bb0a9f1db10a1d16c00880bdebd5f9faf267273b8f5bd1878126e0fbde771"}, + {file = "regex-2024.9.11-cp38-cp38-win_amd64.whl", hash = "sha256:2b08fce89fbd45664d3df6ad93e554b6c16933ffa9d55cb7e01182baaf971508"}, + {file = "regex-2024.9.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:07f45f287469039ffc2c53caf6803cd506eb5f5f637f1d4acb37a738f71dd066"}, + {file = "regex-2024.9.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4838e24ee015101d9f901988001038f7f0d90dc0c3b115541a1365fb439add62"}, + {file = "regex-2024.9.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6edd623bae6a737f10ce853ea076f56f507fd7726bee96a41ee3d68d347e4d16"}, + {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c69ada171c2d0e97a4b5aa78fbb835e0ffbb6b13fc5da968c09811346564f0d3"}, + {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02087ea0a03b4af1ed6ebab2c54d7118127fee8d71b26398e8e4b05b78963199"}, + {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69dee6a020693d12a3cf892aba4808fe168d2a4cef368eb9bf74f5398bfd4ee8"}, + {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:297f54910247508e6e5cae669f2bc308985c60540a4edd1c77203ef19bfa63ca"}, + {file = "regex-2024.9.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ecea58b43a67b1b79805f1a0255730edaf5191ecef84dbc4cc85eb30bc8b63b9"}, + {file = "regex-2024.9.11-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eab4bb380f15e189d1313195b062a6aa908f5bd687a0ceccd47c8211e9cf0d4a"}, + {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0cbff728659ce4bbf4c30b2a1be040faafaa9eca6ecde40aaff86f7889f4ab39"}, + {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:54c4a097b8bc5bb0dfc83ae498061d53ad7b5762e00f4adaa23bee22b012e6ba"}, + {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:73d6d2f64f4d894c96626a75578b0bf7d9e56dcda8c3d037a2118fdfe9b1c664"}, + {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:e53b5fbab5d675aec9f0c501274c467c0f9a5d23696cfc94247e1fb56501ed89"}, + {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0ffbcf9221e04502fc35e54d1ce9567541979c3fdfb93d2c554f0ca583a19b35"}, + {file = "regex-2024.9.11-cp39-cp39-win32.whl", hash = "sha256:e4c22e1ac1f1ec1e09f72e6c44d8f2244173db7eb9629cc3a346a8d7ccc31142"}, + {file = "regex-2024.9.11-cp39-cp39-win_amd64.whl", hash = "sha256:faa3c142464efec496967359ca99696c896c591c56c53506bac1ad465f66e919"}, + {file = "regex-2024.9.11.tar.gz", hash = "sha256:6c188c307e8433bcb63dc1915022deb553b4203a70722fc542c363bf120a01fd"}, ] [[package]] @@ -4268,40 +4905,40 @@ pyasn1 = ">=0.1.3" [[package]] name = "ruff" -version = "0.7.0" +version = "0.7.1" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.7.0-py3-none-linux_armv6l.whl", hash = "sha256:0cdf20c2b6ff98e37df47b2b0bd3a34aaa155f59a11182c1303cce79be715628"}, - {file = "ruff-0.7.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:496494d350c7fdeb36ca4ef1c9f21d80d182423718782222c29b3e72b3512737"}, - {file = "ruff-0.7.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:214b88498684e20b6b2b8852c01d50f0651f3cc6118dfa113b4def9f14faaf06"}, - {file = "ruff-0.7.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:630fce3fefe9844e91ea5bbf7ceadab4f9981f42b704fae011bb8efcaf5d84be"}, - {file = "ruff-0.7.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:211d877674e9373d4bb0f1c80f97a0201c61bcd1e9d045b6e9726adc42c156aa"}, - {file = "ruff-0.7.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:194d6c46c98c73949a106425ed40a576f52291c12bc21399eb8f13a0f7073495"}, - {file = "ruff-0.7.0-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:82c2579b82b9973a110fab281860403b397c08c403de92de19568f32f7178598"}, - {file = "ruff-0.7.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9af971fe85dcd5eaed8f585ddbc6bdbe8c217fb8fcf510ea6bca5bdfff56040e"}, - {file = "ruff-0.7.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b641c7f16939b7d24b7bfc0be4102c56562a18281f84f635604e8a6989948914"}, - {file = "ruff-0.7.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d71672336e46b34e0c90a790afeac8a31954fd42872c1f6adaea1dff76fd44f9"}, - {file = "ruff-0.7.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ab7d98c7eed355166f367597e513a6c82408df4181a937628dbec79abb2a1fe4"}, - {file = "ruff-0.7.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1eb54986f770f49edb14f71d33312d79e00e629a57387382200b1ef12d6a4ef9"}, - {file = "ruff-0.7.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:dc452ba6f2bb9cf8726a84aa877061a2462afe9ae0ea1d411c53d226661c601d"}, - {file = "ruff-0.7.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:4b406c2dce5be9bad59f2de26139a86017a517e6bcd2688da515481c05a2cb11"}, - {file = "ruff-0.7.0-py3-none-win32.whl", hash = "sha256:f6c968509f767776f524a8430426539587d5ec5c662f6addb6aa25bc2e8195ec"}, - {file = "ruff-0.7.0-py3-none-win_amd64.whl", hash = "sha256:ff4aabfbaaba880e85d394603b9e75d32b0693152e16fa659a3064a85df7fce2"}, - {file = "ruff-0.7.0-py3-none-win_arm64.whl", hash = "sha256:10842f69c245e78d6adec7e1db0a7d9ddc2fff0621d730e61657b64fa36f207e"}, - {file = "ruff-0.7.0.tar.gz", hash = "sha256:47a86360cf62d9cd53ebfb0b5eb0e882193fc191c6d717e8bef4462bc3b9ea2b"}, + {file = "ruff-0.7.1-py3-none-linux_armv6l.whl", hash = "sha256:cb1bc5ed9403daa7da05475d615739cc0212e861b7306f314379d958592aaa89"}, + {file = "ruff-0.7.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:27c1c52a8d199a257ff1e5582d078eab7145129aa02721815ca8fa4f9612dc35"}, + {file = "ruff-0.7.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:588a34e1ef2ea55b4ddfec26bbe76bc866e92523d8c6cdec5e8aceefeff02d99"}, + {file = "ruff-0.7.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94fc32f9cdf72dc75c451e5f072758b118ab8100727168a3df58502b43a599ca"}, + {file = "ruff-0.7.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:985818742b833bffa543a84d1cc11b5e6871de1b4e0ac3060a59a2bae3969250"}, + {file = "ruff-0.7.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32f1e8a192e261366c702c5fb2ece9f68d26625f198a25c408861c16dc2dea9c"}, + {file = "ruff-0.7.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:699085bf05819588551b11751eff33e9ca58b1b86a6843e1b082a7de40da1565"}, + {file = "ruff-0.7.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:344cc2b0814047dc8c3a8ff2cd1f3d808bb23c6658db830d25147339d9bf9ea7"}, + {file = "ruff-0.7.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4316bbf69d5a859cc937890c7ac7a6551252b6a01b1d2c97e8fc96e45a7c8b4a"}, + {file = "ruff-0.7.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79d3af9dca4c56043e738a4d6dd1e9444b6d6c10598ac52d146e331eb155a8ad"}, + {file = "ruff-0.7.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c5c121b46abde94a505175524e51891f829414e093cd8326d6e741ecfc0a9112"}, + {file = "ruff-0.7.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8422104078324ea250886954e48f1373a8fe7de59283d747c3a7eca050b4e378"}, + {file = "ruff-0.7.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:56aad830af8a9db644e80098fe4984a948e2b6fc2e73891538f43bbe478461b8"}, + {file = "ruff-0.7.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:658304f02f68d3a83c998ad8bf91f9b4f53e93e5412b8f2388359d55869727fd"}, + {file = "ruff-0.7.1-py3-none-win32.whl", hash = "sha256:b517a2011333eb7ce2d402652ecaa0ac1a30c114fbbd55c6b8ee466a7f600ee9"}, + {file = "ruff-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f38c41fcde1728736b4eb2b18850f6d1e3eedd9678c914dede554a70d5241307"}, + {file = "ruff-0.7.1-py3-none-win_arm64.whl", hash = "sha256:19aa200ec824c0f36d0c9114c8ec0087082021732979a359d6f3c390a6ff2a37"}, + {file = "ruff-0.7.1.tar.gz", hash = "sha256:9d8a41d4aa2dad1575adb98a82870cf5db5f76b2938cf2206c22c940034a36f4"}, ] [[package]] name = "s3transfer" -version = "0.10.1" +version = "0.10.3" description = "An Amazon S3 Transfer Manager" optional = false -python-versions = ">= 3.8" +python-versions = ">=3.8" files = [ - {file = "s3transfer-0.10.1-py3-none-any.whl", hash = "sha256:ceb252b11bcf87080fb7850a224fb6e05c8a776bab8f2b64b7f25b969464839d"}, - {file = "s3transfer-0.10.1.tar.gz", hash = "sha256:5683916b4c724f799e600f41dd9e10a9ff19871bf87623cc8f491cb4f5fa0a19"}, + {file = "s3transfer-0.10.3-py3-none-any.whl", hash = "sha256:263ed587a5803c6c708d3ce44dc4dfedaab4c1a32e8329bab818933d79ddcf5d"}, + {file = "s3transfer-0.10.3.tar.gz", hash = "sha256:4f50ed74ab84d474ce614475e0b8d5047ff080810aac5d01ea25231cfc944b0c"}, ] [package.dependencies] @@ -4312,32 +4949,37 @@ crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] [[package]] name = "scikit-learn" -version = "1.5.0" +version = "1.5.2" description = "A set of python modules for machine learning and data mining" optional = false python-versions = ">=3.9" files = [ - {file = "scikit_learn-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:12e40ac48555e6b551f0a0a5743cc94cc5a765c9513fe708e01f0aa001da2801"}, - {file = "scikit_learn-1.5.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f405c4dae288f5f6553b10c4ac9ea7754d5180ec11e296464adb5d6ac68b6ef5"}, - {file = "scikit_learn-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df8ccabbf583315f13160a4bb06037bde99ea7d8211a69787a6b7c5d4ebb6fc3"}, - {file = "scikit_learn-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c75ea812cd83b1385bbfa94ae971f0d80adb338a9523f6bbcb5e0b0381151d4"}, - {file = "scikit_learn-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:a90c5da84829a0b9b4bf00daf62754b2be741e66b5946911f5bdfaa869fcedd6"}, - {file = "scikit_learn-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2a65af2d8a6cce4e163a7951a4cfbfa7fceb2d5c013a4b593686c7f16445cf9d"}, - {file = "scikit_learn-1.5.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:4c0c56c3005f2ec1db3787aeaabefa96256580678cec783986836fc64f8ff622"}, - {file = "scikit_learn-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f77547165c00625551e5c250cefa3f03f2fc92c5e18668abd90bfc4be2e0bff"}, - {file = "scikit_learn-1.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:118a8d229a41158c9f90093e46b3737120a165181a1b58c03461447aa4657415"}, - {file = "scikit_learn-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:a03b09f9f7f09ffe8c5efffe2e9de1196c696d811be6798ad5eddf323c6f4d40"}, - {file = "scikit_learn-1.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:460806030c666addee1f074788b3978329a5bfdc9b7d63e7aad3f6d45c67a210"}, - {file = "scikit_learn-1.5.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:1b94d6440603752b27842eda97f6395f570941857456c606eb1d638efdb38184"}, - {file = "scikit_learn-1.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d82c2e573f0f2f2f0be897e7a31fcf4e73869247738ab8c3ce7245549af58ab8"}, - {file = "scikit_learn-1.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3a10e1d9e834e84d05e468ec501a356226338778769317ee0b84043c0d8fb06"}, - {file = "scikit_learn-1.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:855fc5fa8ed9e4f08291203af3d3e5fbdc4737bd617a371559aaa2088166046e"}, - {file = "scikit_learn-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:40fb7d4a9a2db07e6e0cae4dc7bdbb8fada17043bac24104d8165e10e4cff1a2"}, - {file = "scikit_learn-1.5.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:47132440050b1c5beb95f8ba0b2402bbd9057ce96ec0ba86f2f445dd4f34df67"}, - {file = "scikit_learn-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:174beb56e3e881c90424e21f576fa69c4ffcf5174632a79ab4461c4c960315ac"}, - {file = "scikit_learn-1.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261fe334ca48f09ed64b8fae13f9b46cc43ac5f580c4a605cbb0a517456c8f71"}, - {file = "scikit_learn-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:057b991ac64b3e75c9c04b5f9395eaf19a6179244c089afdebaad98264bff37c"}, - {file = "scikit_learn-1.5.0.tar.gz", hash = "sha256:789e3db01c750ed6d496fa2db7d50637857b451e57bcae863bff707c1247bef7"}, + {file = "scikit_learn-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:299406827fb9a4f862626d0fe6c122f5f87f8910b86fe5daa4c32dcd742139b6"}, + {file = "scikit_learn-1.5.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2d4cad1119c77930b235579ad0dc25e65c917e756fe80cab96aa3b9428bd3fb0"}, + {file = "scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c412ccc2ad9bf3755915e3908e677b367ebc8d010acbb3f182814524f2e5540"}, + {file = "scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a686885a4b3818d9e62904d91b57fa757fc2bed3e465c8b177be652f4dd37c8"}, + {file = "scikit_learn-1.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:c15b1ca23d7c5f33cc2cb0a0d6aaacf893792271cddff0edbd6a40e8319bc113"}, + {file = "scikit_learn-1.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:03b6158efa3faaf1feea3faa884c840ebd61b6484167c711548fce208ea09445"}, + {file = "scikit_learn-1.5.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1ff45e26928d3b4eb767a8f14a9a6efbf1cbff7c05d1fb0f95f211a89fd4f5de"}, + {file = "scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f763897fe92d0e903aa4847b0aec0e68cadfff77e8a0687cabd946c89d17e675"}, + {file = "scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8b0ccd4a902836493e026c03256e8b206656f91fbcc4fde28c57a5b752561f1"}, + {file = "scikit_learn-1.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:6c16d84a0d45e4894832b3c4d0bf73050939e21b99b01b6fd59cbb0cf39163b6"}, + {file = "scikit_learn-1.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f932a02c3f4956dfb981391ab24bda1dbd90fe3d628e4b42caef3e041c67707a"}, + {file = "scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3b923d119d65b7bd555c73be5423bf06c0105678ce7e1f558cb4b40b0a5502b1"}, + {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, + {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, + {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, + {file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"}, + {file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"}, + {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"}, + {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"}, + {file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"}, + {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, + {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, + {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"}, + {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca64b3089a6d9b9363cd3546f8978229dcbb737aceb2c12144ee3f70f95684b7"}, + {file = "scikit_learn-1.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:3bed4909ba187aca80580fe2ef370d9180dcf18e621a27c4cf2ef10d279a7efe"}, + {file = "scikit_learn-1.5.2.tar.gz", hash = "sha256:b4237ed7b3fdd0a4882792e68ef2545d5baa50aca3bb45aa7df468138ad8f94d"}, ] [package.dependencies] @@ -4348,12 +4990,12 @@ threadpoolctl = ">=3.1.0" [package.extras] benchmark = ["matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "pandas (>=1.1.5)"] -build = ["cython (>=3.0.10)", "meson-python (>=0.15.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)"] -docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.15.0)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] +build = ["cython (>=3.0.10)", "meson-python (>=0.16.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-design (>=0.6.0)", "sphinx-gallery (>=0.16.0)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)"] examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] install = ["joblib (>=1.2.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)", "threadpoolctl (>=3.1.0)"] maintenance = ["conda-lock (==2.5.6)"] -tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.2.1)", "scikit-image (>=0.17.2)"] +tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.2.1)", "scikit-image (>=0.17.2)"] [[package]] name = "scipy" @@ -4399,13 +5041,13 @@ test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeo [[package]] name = "sentry-sdk" -version = "2.6.0" +version = "2.17.0" description = "Python client for Sentry (https://sentry.io)" optional = false python-versions = ">=3.6" files = [ - {file = "sentry_sdk-2.6.0-py2.py3-none-any.whl", hash = "sha256:422b91cb49378b97e7e8d0e8d5a1069df23689d45262b86f54988a7db264e874"}, - {file = "sentry_sdk-2.6.0.tar.gz", hash = "sha256:65cc07e9c6995c5e316109f138570b32da3bd7ff8d0d0ee4aaf2628c3dd8127d"}, + {file = "sentry_sdk-2.17.0-py2.py3-none-any.whl", hash = "sha256:625955884b862cc58748920f9e21efdfb8e0d4f98cca4ab0d3918576d5b606ad"}, + {file = "sentry_sdk-2.17.0.tar.gz", hash = "sha256:dd0a05352b78ffeacced73a94e86f38b32e2eae15fff5f30ca5abb568a72eacf"}, ] [package.dependencies] @@ -4428,14 +5070,16 @@ falcon = ["falcon (>=1.4)"] fastapi = ["fastapi (>=0.79.0)"] flask = ["blinker (>=1.1)", "flask (>=0.11)", "markupsafe"] grpcio = ["grpcio (>=1.21.1)", "protobuf (>=3.8.0)"] +http2 = ["httpcore[http2] (==1.*)"] httpx = ["httpx (>=0.16.0)"] huey = ["huey (>=2)"] huggingface-hub = ["huggingface-hub (>=0.22)"] langchain = ["langchain (>=0.0.210)"] +litestar = ["litestar (>=2.0.0)"] loguru = ["loguru (>=0.5)"] openai = ["openai (>=1.0.0)", "tiktoken (>=0.3.0)"] opentelemetry = ["opentelemetry-distro (>=0.35b0)"] -opentelemetry-experimental = ["opentelemetry-distro (>=0.40b0,<1.0)", "opentelemetry-instrumentation-aiohttp-client (>=0.40b0,<1.0)", "opentelemetry-instrumentation-django (>=0.40b0,<1.0)", "opentelemetry-instrumentation-fastapi (>=0.40b0,<1.0)", "opentelemetry-instrumentation-flask (>=0.40b0,<1.0)", "opentelemetry-instrumentation-requests (>=0.40b0,<1.0)", "opentelemetry-instrumentation-sqlite3 (>=0.40b0,<1.0)", "opentelemetry-instrumentation-urllib (>=0.40b0,<1.0)"] +opentelemetry-experimental = ["opentelemetry-distro"] pure-eval = ["asttokens", "executing", "pure-eval"] pymongo = ["pymongo (>=3.1)"] pyspark = ["pyspark (>=2.4.4)"] @@ -4445,7 +5089,7 @@ sanic = ["sanic (>=0.8)"] sqlalchemy = ["sqlalchemy (>=1.2)"] starlette = ["starlette (>=0.19.1)"] starlite = ["starlite (>=1.48)"] -tornado = ["tornado (>=5)"] +tornado = ["tornado (>=6)"] [[package]] name = "setproctitle" @@ -4549,19 +5193,97 @@ test = ["pytest"] [[package]] name = "setuptools" -version = "66.1.1" +version = "75.3.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" +files = [ + {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"}, + {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"}, +] + +[package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"] + +[[package]] +name = "setuptools-scm" +version = "8.1.0" +description = "the blessed package to manage your versions by scm tags" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools_scm-8.1.0-py3-none-any.whl", hash = "sha256:897a3226a6fd4a6eb2f068745e49733261a21f70b1bb28fce0339feb978d9af3"}, + {file = "setuptools_scm-8.1.0.tar.gz", hash = "sha256:42dea1b65771cba93b7a515d65a65d8246e560768a66b9106a592c8e7f26c8a7"}, +] + +[package.dependencies] +packaging = ">=20" +setuptools = "*" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +docs = ["entangled-cli (>=2.0,<3.0)", "mkdocs", "mkdocs-entangled-plugin", "mkdocs-material", "mkdocstrings[python]", "pygments"] +rich = ["rich"] +test = ["build", "pytest", "rich", "typing-extensions", "wheel"] + +[[package]] +name = "shap" +version = "0.46.0" +description = "A unified approach to explain the output of any machine learning model." +optional = false +python-versions = ">=3.9" files = [ - {file = "setuptools-66.1.1-py3-none-any.whl", hash = "sha256:6f590d76b713d5de4e49fe4fbca24474469f53c83632d5d0fd056f7ff7e8112b"}, - {file = "setuptools-66.1.1.tar.gz", hash = "sha256:ac4008d396bc9cd983ea483cb7139c0240a07bbc74ffb6232fceffedc6cf03a8"}, + {file = "shap-0.46.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:905b2d7a0262ef820785a7c0e3c7f24c9d281e6f934edb65cbe811fe0e971187"}, + {file = "shap-0.46.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bccbb30ffbf8b9ed53e476d0c1319fdfcbeac455fe9df277fb0d570d92790e80"}, + {file = "shap-0.46.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9633d3d7174acc01455538169ca6e6344f570530384548631aeadcf7bfdaaaea"}, + {file = "shap-0.46.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6097eb2ab7e8c194254bac3e462266490fbdd43bfe35a1014e9ee21c4ef10ee"}, + {file = "shap-0.46.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0cf7c6e3f056cf3bfd16bcfd5744d0cc25b851555b1e750a3ab889b3077d2d05"}, + {file = "shap-0.46.0-cp310-cp310-win_amd64.whl", hash = "sha256:949bd7fa40371c3f1885a30ae0611dd481bf4ac90066ff726c73cb5bb393032b"}, + {file = "shap-0.46.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f18217c98f39fd485d541f6aab0b860b3be74b69b21d4faf11959e3fcba765c5"}, + {file = "shap-0.46.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5bbdae4489577c6fce1cfe2d9d8f3d5b96d69284d29645fe651f78f6e965aeb4"}, + {file = "shap-0.46.0-cp311-cp311-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13d36dc58d1e8c010feb4e7da71c77d23626a52d12d16b02869e793b11be4695"}, + {file = "shap-0.46.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70e06fdfdf53d5fb932c82f4529397552b262e0ccce734f5226fb1e1eab2bc3e"}, + {file = "shap-0.46.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:943f0806fa00b4fafb174f172a73d88de2d8600e6d69c2e2bff833f00e6c4c21"}, + {file = "shap-0.46.0-cp311-cp311-win_amd64.whl", hash = "sha256:c972a2efdc9fc00d543efaa55805eca947b8c418d065962d967824c2d5d295d0"}, + {file = "shap-0.46.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a9cc9be191562bea1a782baff912854d267c6f4831bbf454d8d7bb7df7ddb214"}, + {file = "shap-0.46.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ab1fecfb43604605be17e26ae12bde4406c451c46b54b980d9570cec03fbc239"}, + {file = "shap-0.46.0-cp312-cp312-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b216adf2a17b0e0694f17965ac29354ca8c4f27ac3c66f68bf6fc4cb2aa28207"}, + {file = "shap-0.46.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b6e5dc5257b747a784f7a9b3acb64216a9011f01734f3c96b27fe5e15ae5f99f"}, + {file = "shap-0.46.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1230bf973463041dfa15734f290fbf3ab9c6e4e8222339c76f68fc355b940d80"}, + {file = "shap-0.46.0-cp312-cp312-win_amd64.whl", hash = "sha256:0cbbf996537b2a42d3bc7f2a13492988822ee1bfd7220700989408dfb9e1c5ad"}, + {file = "shap-0.46.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3c7d0c53a8cbefb2260ce28a98fa866c1a287770981f95c40a54f9d1082cbb31"}, + {file = "shap-0.46.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0726f8c63f09dde586c9859ad315641f5a080e9aecf123a0cabc336b61703d66"}, + {file = "shap-0.46.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99edc28daac4cbb98cd9f02febf4e9fbc6b9e3d24519c22ed59a98c68c47336c"}, + {file = "shap-0.46.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85a6ff9c9e15abd9a332360cff8d105165a600466167d6274dab468a050d005a"}, + {file = "shap-0.46.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:9f9f9727839e2459dfa4b4fbc190224e87f7b4b2a29f0e2a438500215921192b"}, + {file = "shap-0.46.0-cp39-cp39-win_amd64.whl", hash = "sha256:b169b485a69f7d32e32fa64ad77be00129436c4455b9d0997b21b553f0becc8c"}, + {file = "shap-0.46.0.tar.gz", hash = "sha256:bdaa5b098be5a958348015e940f6fd264339b5db1e651f9898a3117be95b05a0"}, ] +[package.dependencies] +cloudpickle = "*" +numba = "*" +numpy = "*" +packaging = ">20.9" +pandas = "*" +scikit-learn = "*" +scipy = "*" +slicer = "0.0.8" +tqdm = ">=4.27.0" + [package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] -testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] +docs = ["ipython", "matplotlib", "myst-parser (==2.0.0)", "nbsphinx (==0.9.3)", "numpydoc", "requests", "sphinx (==7.2.6)", "sphinx-github-changelog (==1.2.1)", "sphinx-rtd-theme (==2.0.0)"] +others = ["lime"] +plots = ["ipython", "matplotlib"] +test = ["catboost", "gpboost", "lightgbm", "ngboost", "opencv-python", "protobuf (==3.20.3)", "pyod", "pyspark", "pytest", "pytest-cov", "pytest-mpl", "sentencepiece", "tensorflow", "tf-keras", "torch", "torch (==2.2.0)", "torchvision", "transformers", "xgboost"] +test-core = ["pytest", "pytest-cov", "pytest-mpl"] +test-notebooks = ["datasets", "jupyter", "keras", "nbconvert", "nbformat", "nlp", "transformers"] [[package]] name = "shellingham" @@ -4607,6 +5329,17 @@ docs = ["fairlearn (>=0.7.0)", "matplotlib (>=3.3)", "numpydoc (>=1.0.0)", "pand rich = ["rich (>=12)"] tests = ["catboost (>=1.0)", "fairlearn (>=0.7.0)", "flake8 (>=3.8.2)", "flaky (>=3.7.0)", "lightgbm (>=3)", "matplotlib (>=3.3)", "pandas (>=1)", "pytest (>=7)", "pytest-cov (>=2.9.0)", "quantile-forest (>=1.0.0)", "rich (>=12)", "types-requests (>=2.28.5)", "xgboost (>=1.6)"] +[[package]] +name = "slicer" +version = "0.0.8" +description = "A small package for big slicing." +optional = false +python-versions = ">=3.6" +files = [ + {file = "slicer-0.0.8-py3-none-any.whl", hash = "sha256:6c206258543aecd010d497dc2eca9d2805860a0b3758673903456b7df7934dc3"}, + {file = "slicer-0.0.8.tar.gz", hash = "sha256:2e7553af73f0c0c2d355f4afcc3ecf97c6f2156fcf4593955c3f56cf6c4d6eb7"}, +] + [[package]] name = "smmap" version = "5.0.1" @@ -4631,13 +5364,13 @@ files = [ [[package]] name = "soupsieve" -version = "2.5" +version = "2.6" description = "A modern CSS selector implementation for Beautiful Soup." optional = false python-versions = ">=3.8" files = [ - {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, - {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, + {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, + {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, ] [[package]] @@ -4675,13 +5408,13 @@ widechars = ["wcwidth"] [[package]] name = "tenacity" -version = "8.4.1" +version = "9.0.0" description = "Retry code until it succeeds" optional = false python-versions = ">=3.8" files = [ - {file = "tenacity-8.4.1-py3-none-any.whl", hash = "sha256:28522e692eda3e1b8f5e99c51464efcc0b9fc86933da92415168bc1c4e2308fa"}, - {file = "tenacity-8.4.1.tar.gz", hash = "sha256:54b1412b878ddf7e1f1577cd49527bad8cdef32421bd599beac0c6c3f10582fd"}, + {file = "tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539"}, + {file = "tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b"}, ] [package.extras] @@ -4690,13 +5423,13 @@ test = ["pytest", "tornado (>=4.5)", "typeguard"] [[package]] name = "termcolor" -version = "2.4.0" +version = "2.5.0" description = "ANSI color formatting for output in terminal" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "termcolor-2.4.0-py3-none-any.whl", hash = "sha256:9297c0df9c99445c2412e832e882a7884038a25617c60cea2ad69488d4040d63"}, - {file = "termcolor-2.4.0.tar.gz", hash = "sha256:aab9e56047c8ac41ed798fa36d892a37aca6b3e9159f3e0c24bc64a9b3ac7b7a"}, + {file = "termcolor-2.5.0-py3-none-any.whl", hash = "sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8"}, + {file = "termcolor-2.5.0.tar.gz", hash = "sha256:998d8d27da6d48442e8e1f016119076b690d962507531df4890fcd2db2ef8a6f"}, ] [package.extras] @@ -4715,13 +5448,13 @@ files = [ [[package]] name = "tomli" -version = "2.0.1" +version = "2.0.2" description = "A lil' TOML parser" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, + {file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"}, + {file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"}, ] [[package]] @@ -4746,13 +5479,13 @@ files = [ [[package]] name = "tqdm" -version = "4.66.4" +version = "4.66.6" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.66.4-py3-none-any.whl", hash = "sha256:b75ca56b413b030bc3f00af51fd2c1a1a5eac6a0c1cca83cbb37a5c52abce644"}, - {file = "tqdm-4.66.4.tar.gz", hash = "sha256:e4d936c9de8727928f3be6079590e97d9abfe8d39a590be678eb5919ffc186bb"}, + {file = "tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63"}, + {file = "tqdm-4.66.6.tar.gz", hash = "sha256:4bdd694238bef1485ce839d67967ab50af8f9272aab687c0d7702a01da0be090"}, ] [package.dependencies] @@ -4781,13 +5514,13 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, [[package]] name = "typer" -version = "0.12.3" +version = "0.12.5" description = "Typer, build great CLIs. Easy to code. Based on Python type hints." optional = false python-versions = ">=3.7" files = [ - {file = "typer-0.12.3-py3-none-any.whl", hash = "sha256:070d7ca53f785acbccba8e7d28b08dcd88f79f1fbda035ade0aecec71ca5c914"}, - {file = "typer-0.12.3.tar.gz", hash = "sha256:49e73131481d804288ef62598d97a1ceef3058905aa536a1134f90891ba35482"}, + {file = "typer-0.12.5-py3-none-any.whl", hash = "sha256:62fe4e471711b147e3365034133904df3e235698399bc4de2b36c8579298d52b"}, + {file = "typer-0.12.5.tar.gz", hash = "sha256:f592f089bedcc8ec1b974125d64851029c3b1af145f04aca64d69410f0c9b722"}, ] [package.dependencies] @@ -4809,24 +5542,24 @@ files = [ [[package]] name = "tzdata" -version = "2024.1" +version = "2024.2" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" files = [ - {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, - {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, + {file = "tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd"}, + {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"}, ] [[package]] name = "urllib3" -version = "2.2.2" +version = "2.2.3" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.8" files = [ - {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, - {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, + {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"}, + {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"}, ] [package.extras] @@ -4837,57 +5570,64 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "uvloop" -version = "0.19.0" +version = "0.21.0" description = "Fast implementation of asyncio event loop on top of libuv" optional = false python-versions = ">=3.8.0" files = [ - {file = "uvloop-0.19.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:de4313d7f575474c8f5a12e163f6d89c0a878bc49219641d49e6f1444369a90e"}, - {file = "uvloop-0.19.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5588bd21cf1fcf06bded085f37e43ce0e00424197e7c10e77afd4bbefffef428"}, - {file = "uvloop-0.19.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b1fd71c3843327f3bbc3237bedcdb6504fd50368ab3e04d0410e52ec293f5b8"}, - {file = "uvloop-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a05128d315e2912791de6088c34136bfcdd0c7cbc1cf85fd6fd1bb321b7c849"}, - {file = "uvloop-0.19.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:cd81bdc2b8219cb4b2556eea39d2e36bfa375a2dd021404f90a62e44efaaf957"}, - {file = "uvloop-0.19.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5f17766fb6da94135526273080f3455a112f82570b2ee5daa64d682387fe0dcd"}, - {file = "uvloop-0.19.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4ce6b0af8f2729a02a5d1575feacb2a94fc7b2e983868b009d51c9a9d2149bef"}, - {file = "uvloop-0.19.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:31e672bb38b45abc4f26e273be83b72a0d28d074d5b370fc4dcf4c4eb15417d2"}, - {file = "uvloop-0.19.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:570fc0ed613883d8d30ee40397b79207eedd2624891692471808a95069a007c1"}, - {file = "uvloop-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5138821e40b0c3e6c9478643b4660bd44372ae1e16a322b8fc07478f92684e24"}, - {file = "uvloop-0.19.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:91ab01c6cd00e39cde50173ba4ec68a1e578fee9279ba64f5221810a9e786533"}, - {file = "uvloop-0.19.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:47bf3e9312f63684efe283f7342afb414eea4d3011542155c7e625cd799c3b12"}, - {file = "uvloop-0.19.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:da8435a3bd498419ee8c13c34b89b5005130a476bda1d6ca8cfdde3de35cd650"}, - {file = "uvloop-0.19.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:02506dc23a5d90e04d4f65c7791e65cf44bd91b37f24cfc3ef6cf2aff05dc7ec"}, - {file = "uvloop-0.19.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2693049be9d36fef81741fddb3f441673ba12a34a704e7b4361efb75cf30befc"}, - {file = "uvloop-0.19.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7010271303961c6f0fe37731004335401eb9075a12680738731e9c92ddd96ad6"}, - {file = "uvloop-0.19.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5daa304d2161d2918fa9a17d5635099a2f78ae5b5960e742b2fcfbb7aefaa593"}, - {file = "uvloop-0.19.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:7207272c9520203fea9b93843bb775d03e1cf88a80a936ce760f60bb5add92f3"}, - {file = "uvloop-0.19.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:78ab247f0b5671cc887c31d33f9b3abfb88d2614b84e4303f1a63b46c046c8bd"}, - {file = "uvloop-0.19.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:472d61143059c84947aa8bb74eabbace30d577a03a1805b77933d6bd13ddebbd"}, - {file = "uvloop-0.19.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45bf4c24c19fb8a50902ae37c5de50da81de4922af65baf760f7c0c42e1088be"}, - {file = "uvloop-0.19.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271718e26b3e17906b28b67314c45d19106112067205119dddbd834c2b7ce797"}, - {file = "uvloop-0.19.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:34175c9fd2a4bc3adc1380e1261f60306344e3407c20a4d684fd5f3be010fa3d"}, - {file = "uvloop-0.19.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e27f100e1ff17f6feeb1f33968bc185bf8ce41ca557deee9d9bbbffeb72030b7"}, - {file = "uvloop-0.19.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:13dfdf492af0aa0a0edf66807d2b465607d11c4fa48f4a1fd41cbea5b18e8e8b"}, - {file = "uvloop-0.19.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6e3d4e85ac060e2342ff85e90d0c04157acb210b9ce508e784a944f852a40e67"}, - {file = "uvloop-0.19.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ca4956c9ab567d87d59d49fa3704cf29e37109ad348f2d5223c9bf761a332e7"}, - {file = "uvloop-0.19.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f467a5fd23b4fc43ed86342641f3936a68ded707f4627622fa3f82a120e18256"}, - {file = "uvloop-0.19.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:492e2c32c2af3f971473bc22f086513cedfc66a130756145a931a90c3958cb17"}, - {file = "uvloop-0.19.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2df95fca285a9f5bfe730e51945ffe2fa71ccbfdde3b0da5772b4ee4f2e770d5"}, - {file = "uvloop-0.19.0.tar.gz", hash = "sha256:0246f4fd1bf2bf702e06b0d45ee91677ee5c31242f39aab4ea6fe0c51aedd0fd"}, + {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f"}, + {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:196274f2adb9689a289ad7d65700d37df0c0930fd8e4e743fa4834e850d7719d"}, + {file = "uvloop-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f38b2e090258d051d68a5b14d1da7203a3c3677321cf32a95a6f4db4dd8b6f26"}, + {file = "uvloop-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87c43e0f13022b998eb9b973b5e97200c8b90823454d4bc06ab33829e09fb9bb"}, + {file = "uvloop-0.21.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10d66943def5fcb6e7b37310eb6b5639fd2ccbc38df1177262b0640c3ca68c1f"}, + {file = "uvloop-0.21.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:67dd654b8ca23aed0a8e99010b4c34aca62f4b7fce88f39d452ed7622c94845c"}, + {file = "uvloop-0.21.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c0f3fa6200b3108919f8bdabb9a7f87f20e7097ea3c543754cabc7d717d95cf8"}, + {file = "uvloop-0.21.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0878c2640cf341b269b7e128b1a5fed890adc4455513ca710d77d5e93aa6d6a0"}, + {file = "uvloop-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9fb766bb57b7388745d8bcc53a359b116b8a04c83a2288069809d2b3466c37e"}, + {file = "uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a375441696e2eda1c43c44ccb66e04d61ceeffcd76e4929e527b7fa401b90fb"}, + {file = "uvloop-0.21.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:baa0e6291d91649c6ba4ed4b2f982f9fa165b5bbd50a9e203c416a2797bab3c6"}, + {file = "uvloop-0.21.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4509360fcc4c3bd2c70d87573ad472de40c13387f5fda8cb58350a1d7475e58d"}, + {file = "uvloop-0.21.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:359ec2c888397b9e592a889c4d72ba3d6befba8b2bb01743f72fffbde663b59c"}, + {file = "uvloop-0.21.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f7089d2dc73179ce5ac255bdf37c236a9f914b264825fdaacaded6990a7fb4c2"}, + {file = "uvloop-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baa4dcdbd9ae0a372f2167a207cd98c9f9a1ea1188a8a526431eef2f8116cc8d"}, + {file = "uvloop-0.21.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86975dca1c773a2c9864f4c52c5a55631038e387b47eaf56210f873887b6c8dc"}, + {file = "uvloop-0.21.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:461d9ae6660fbbafedd07559c6a2e57cd553b34b0065b6550685f6653a98c1cb"}, + {file = "uvloop-0.21.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:183aef7c8730e54c9a3ee3227464daed66e37ba13040bb3f350bc2ddc040f22f"}, + {file = "uvloop-0.21.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bfd55dfcc2a512316e65f16e503e9e450cab148ef11df4e4e679b5e8253a5281"}, + {file = "uvloop-0.21.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:787ae31ad8a2856fc4e7c095341cccc7209bd657d0e71ad0dc2ea83c4a6fa8af"}, + {file = "uvloop-0.21.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ee4d4ef48036ff6e5cfffb09dd192c7a5027153948d85b8da7ff705065bacc6"}, + {file = "uvloop-0.21.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3df876acd7ec037a3d005b3ab85a7e4110422e4d9c1571d4fc89b0fc41b6816"}, + {file = "uvloop-0.21.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd53ecc9a0f3d87ab847503c2e1552b690362e005ab54e8a48ba97da3924c0dc"}, + {file = "uvloop-0.21.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5c39f217ab3c663dc699c04cbd50c13813e31d917642d459fdcec07555cc553"}, + {file = "uvloop-0.21.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:17df489689befc72c39a08359efac29bbee8eee5209650d4b9f34df73d22e414"}, + {file = "uvloop-0.21.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc09f0ff191e61c2d592a752423c767b4ebb2986daa9ed62908e2b1b9a9ae206"}, + {file = "uvloop-0.21.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0ce1b49560b1d2d8a2977e3ba4afb2414fb46b86a1b64056bc4ab929efdafbe"}, + {file = "uvloop-0.21.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e678ad6fe52af2c58d2ae3c73dc85524ba8abe637f134bf3564ed07f555c5e79"}, + {file = "uvloop-0.21.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:460def4412e473896ef179a1671b40c039c7012184b627898eea5072ef6f017a"}, + {file = "uvloop-0.21.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:10da8046cc4a8f12c91a1c39d1dd1585c41162a15caaef165c2174db9ef18bdc"}, + {file = "uvloop-0.21.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c097078b8031190c934ed0ebfee8cc5f9ba9642e6eb88322b9958b649750f72b"}, + {file = "uvloop-0.21.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:46923b0b5ee7fc0020bef24afe7836cb068f5050ca04caf6b487c513dc1a20b2"}, + {file = "uvloop-0.21.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53e420a3afe22cdcf2a0f4846e377d16e718bc70103d7088a4f7623567ba5fb0"}, + {file = "uvloop-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88cb67cdbc0e483da00af0b2c3cdad4b7c61ceb1ee0f33fe00e09c81e3a6cb75"}, + {file = "uvloop-0.21.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:221f4f2a1f46032b403bf3be628011caf75428ee3cc204a22addf96f586b19fd"}, + {file = "uvloop-0.21.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2d1f581393673ce119355d56da84fe1dd9d2bb8b3d13ce792524e1607139feff"}, + {file = "uvloop-0.21.0.tar.gz", hash = "sha256:3bf12b0fda68447806a7ad847bfa591613177275d35b6724b1ee573faa3704e3"}, ] [package.extras] +dev = ["Cython (>=3.0,<4.0)", "setuptools (>=60)"] docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] -test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] +test = ["aiohttp (>=3.10.5)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] [[package]] name = "virtualenv" -version = "20.26.3" +version = "20.27.1" description = "Virtual Python Environment builder" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "virtualenv-20.26.3-py3-none-any.whl", hash = "sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589"}, - {file = "virtualenv-20.26.3.tar.gz", hash = "sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a"}, + {file = "virtualenv-20.27.1-py3-none-any.whl", hash = "sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4"}, + {file = "virtualenv-20.27.1.tar.gz", hash = "sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba"}, ] [package.dependencies] @@ -4901,19 +5641,21 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [[package]] name = "wandb" -version = "0.18.0" +version = "0.18.5" description = "A CLI and library for interacting with the Weights & Biases API." optional = false python-versions = ">=3.7" files = [ - {file = "wandb-0.18.0-py3-none-any.whl", hash = "sha256:a176af0d51b55a363dac3c54a8b7aa1cfd5a89cad6fc6574237232f37c779965"}, - {file = "wandb-0.18.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:2bc7f18becda9a566a63723666390f941e8b115b9e7746e0e5d73dc9ea9714c6"}, - {file = "wandb-0.18.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e14a385c95e61e77b0b5c4cbc6c5a0b47ac0d9e66730ca8c17b84eba374e35d1"}, - {file = "wandb-0.18.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0764ad8911a70cdb7cb339567c4170b860e8f5f523447b2f748d7e0e6224e29"}, - {file = "wandb-0.18.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d2ffea43710e3482168a2d89b2770aa9a14007ba16e717b176428f2a50765f2"}, - {file = "wandb-0.18.0-py3-none-win32.whl", hash = "sha256:b209840a9499bf687e8b5b20117341e7722f86a85f986c422501eb1a709dc721"}, - {file = "wandb-0.18.0-py3-none-win_amd64.whl", hash = "sha256:25aa8ee1808eae0c0e4818b81bc43fd6461e4f3603d7918e5eab2f9afca00715"}, - {file = "wandb-0.18.0.tar.gz", hash = "sha256:872dfd7298c053ca861352196bc422452caff105d3bc66b90e7bc86f17ad8bdd"}, + {file = "wandb-0.18.5-py3-none-any.whl", hash = "sha256:49ba7bafff0cecff2159bc6fb68176d6e5561d744a9bd6a63753e7077a74e26d"}, + {file = "wandb-0.18.5-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:c9d903dbff9517843881d9a0d561d82bcf0e949d8b8c03aafa35aceef31ea7e0"}, + {file = "wandb-0.18.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:33d3e5765a9bb305558af4f291338cf8723856d2b3a3c377414cd8f8b711baa4"}, + {file = "wandb-0.18.5-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:9ff72e7a45e998e2a7ff42645ec76bedabf17ea51fd112ae2837dce5023ac0cc"}, + {file = "wandb-0.18.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:136a79c06c114c225add8f977acec23d53df5e2a4d2803700a5eb5501ae40160"}, + {file = "wandb-0.18.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:350fc0f6f5bc23f4baeef12ccfea6daa89d1a5b84948d72d48249cbb652ab22a"}, + {file = "wandb-0.18.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:4e31741eefa2b2a83aa9f69ef27ce6112fee6f4c792a9e2996cf374a74465276"}, + {file = "wandb-0.18.5-py3-none-win32.whl", hash = "sha256:b2a25e9caf63c12e5de1cbdb30b13f76553c04754f9ff404080f70386adb8384"}, + {file = "wandb-0.18.5-py3-none-win_amd64.whl", hash = "sha256:83b619167eb2ffdd1188cba3805ccad158f6fd7fc06bef43daf6d2729a787fa0"}, + {file = "wandb-0.18.5.tar.gz", hash = "sha256:75ef47ba7fc709b787be05e558f1635d99246afeacc9369031c1be6e5b620ce6"}, ] [package.dependencies] @@ -4925,9 +5667,10 @@ protobuf = {version = ">=3.19.0,<4.21.0 || >4.21.0,<5.28.0 || >5.28.0,<6", marke psutil = ">=5.0.0" pyyaml = "*" requests = ">=2.0.0,<3" -sentry-sdk = ">=1.0.0" +sentry-sdk = ">=2.0.0" setproctitle = "*" setuptools = "*" +typing-extensions = {version = ">=4.4,<5", markers = "python_version < \"3.12\""} [package.extras] aws = ["boto3"] @@ -4936,7 +5679,7 @@ gcp = ["google-cloud-storage"] importers = ["filelock", "mlflow", "polars (<=1.2.1)", "rich", "tenacity"] kubeflow = ["google-cloud-storage", "kubernetes", "minio", "sh"] launch = ["awscli", "azure-containerregistry", "azure-identity", "azure-storage-blob", "boto3", "botocore", "chardet", "google-auth", "google-cloud-aiplatform", "google-cloud-artifact-registry", "google-cloud-compute", "google-cloud-storage", "iso8601", "jsonschema", "kubernetes", "kubernetes-asyncio", "nbconvert", "nbformat", "optuna", "pydantic", "pyyaml (>=6.0.0)", "tomli", "typing-extensions"] -media = ["bokeh", "moviepy", "numpy", "pillow", "plotly (>=5.18.0)", "rdkit-pypi", "soundfile"] +media = ["bokeh", "imageio", "moviepy", "numpy", "pillow", "plotly (>=5.18.0)", "rdkit", "soundfile"] models = ["cloudpickle"] perf = ["orjson"] sweeps = ["sweeps (>=0.2.0)"] @@ -4944,43 +5687,41 @@ workspaces = ["wandb-workspaces"] [[package]] name = "watchdog" -version = "4.0.1" +version = "5.0.3" description = "Filesystem events monitoring" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "watchdog-4.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:da2dfdaa8006eb6a71051795856bedd97e5b03e57da96f98e375682c48850645"}, - {file = "watchdog-4.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e93f451f2dfa433d97765ca2634628b789b49ba8b504fdde5837cdcf25fdb53b"}, - {file = "watchdog-4.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ef0107bbb6a55f5be727cfc2ef945d5676b97bffb8425650dadbb184be9f9a2b"}, - {file = "watchdog-4.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:17e32f147d8bf9657e0922c0940bcde863b894cd871dbb694beb6704cfbd2fb5"}, - {file = "watchdog-4.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:03e70d2df2258fb6cb0e95bbdbe06c16e608af94a3ffbd2b90c3f1e83eb10767"}, - {file = "watchdog-4.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:123587af84260c991dc5f62a6e7ef3d1c57dfddc99faacee508c71d287248459"}, - {file = "watchdog-4.0.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:093b23e6906a8b97051191a4a0c73a77ecc958121d42346274c6af6520dec175"}, - {file = "watchdog-4.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:611be3904f9843f0529c35a3ff3fd617449463cb4b73b1633950b3d97fa4bfb7"}, - {file = "watchdog-4.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:62c613ad689ddcb11707f030e722fa929f322ef7e4f18f5335d2b73c61a85c28"}, - {file = "watchdog-4.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d4925e4bf7b9bddd1c3de13c9b8a2cdb89a468f640e66fbfabaf735bd85b3e35"}, - {file = "watchdog-4.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cad0bbd66cd59fc474b4a4376bc5ac3fc698723510cbb64091c2a793b18654db"}, - {file = "watchdog-4.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a3c2c317a8fb53e5b3d25790553796105501a235343f5d2bf23bb8649c2c8709"}, - {file = "watchdog-4.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c9904904b6564d4ee8a1ed820db76185a3c96e05560c776c79a6ce5ab71888ba"}, - {file = "watchdog-4.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:667f3c579e813fcbad1b784db7a1aaa96524bed53437e119f6a2f5de4db04235"}, - {file = "watchdog-4.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d10a681c9a1d5a77e75c48a3b8e1a9f2ae2928eda463e8d33660437705659682"}, - {file = "watchdog-4.0.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0144c0ea9997b92615af1d94afc0c217e07ce2c14912c7b1a5731776329fcfc7"}, - {file = "watchdog-4.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:998d2be6976a0ee3a81fb8e2777900c28641fb5bfbd0c84717d89bca0addcdc5"}, - {file = "watchdog-4.0.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e7921319fe4430b11278d924ef66d4daa469fafb1da679a2e48c935fa27af193"}, - {file = "watchdog-4.0.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:f0de0f284248ab40188f23380b03b59126d1479cd59940f2a34f8852db710625"}, - {file = "watchdog-4.0.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bca36be5707e81b9e6ce3208d92d95540d4ca244c006b61511753583c81c70dd"}, - {file = "watchdog-4.0.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ab998f567ebdf6b1da7dc1e5accfaa7c6992244629c0fdaef062f43249bd8dee"}, - {file = "watchdog-4.0.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:dddba7ca1c807045323b6af4ff80f5ddc4d654c8bce8317dde1bd96b128ed253"}, - {file = "watchdog-4.0.1-py3-none-manylinux2014_armv7l.whl", hash = "sha256:4513ec234c68b14d4161440e07f995f231be21a09329051e67a2118a7a612d2d"}, - {file = "watchdog-4.0.1-py3-none-manylinux2014_i686.whl", hash = "sha256:4107ac5ab936a63952dea2a46a734a23230aa2f6f9db1291bf171dac3ebd53c6"}, - {file = "watchdog-4.0.1-py3-none-manylinux2014_ppc64.whl", hash = "sha256:6e8c70d2cd745daec2a08734d9f63092b793ad97612470a0ee4cbb8f5f705c57"}, - {file = "watchdog-4.0.1-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:f27279d060e2ab24c0aa98363ff906d2386aa6c4dc2f1a374655d4e02a6c5e5e"}, - {file = "watchdog-4.0.1-py3-none-manylinux2014_s390x.whl", hash = "sha256:f8affdf3c0f0466e69f5b3917cdd042f89c8c63aebdb9f7c078996f607cdb0f5"}, - {file = "watchdog-4.0.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ac7041b385f04c047fcc2951dc001671dee1b7e0615cde772e84b01fbf68ee84"}, - {file = "watchdog-4.0.1-py3-none-win32.whl", hash = "sha256:206afc3d964f9a233e6ad34618ec60b9837d0582b500b63687e34011e15bb429"}, - {file = "watchdog-4.0.1-py3-none-win_amd64.whl", hash = "sha256:7577b3c43e5909623149f76b099ac49a1a01ca4e167d1785c76eb52fa585745a"}, - {file = "watchdog-4.0.1-py3-none-win_ia64.whl", hash = "sha256:d7b9f5f3299e8dd230880b6c55504a1f69cf1e4316275d1b215ebdd8187ec88d"}, - {file = "watchdog-4.0.1.tar.gz", hash = "sha256:eebaacf674fa25511e8867028d281e602ee6500045b57f43b08778082f7f8b44"}, + {file = "watchdog-5.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:85527b882f3facda0579bce9d743ff7f10c3e1e0db0a0d0e28170a7d0e5ce2ea"}, + {file = "watchdog-5.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:53adf73dcdc0ef04f7735066b4a57a4cd3e49ef135daae41d77395f0b5b692cb"}, + {file = "watchdog-5.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e25adddab85f674acac303cf1f5835951345a56c5f7f582987d266679979c75b"}, + {file = "watchdog-5.0.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f01f4a3565a387080dc49bdd1fefe4ecc77f894991b88ef927edbfa45eb10818"}, + {file = "watchdog-5.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:91b522adc25614cdeaf91f7897800b82c13b4b8ac68a42ca959f992f6990c490"}, + {file = "watchdog-5.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d52db5beb5e476e6853da2e2d24dbbbed6797b449c8bf7ea118a4ee0d2c9040e"}, + {file = "watchdog-5.0.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:94d11b07c64f63f49876e0ab8042ae034674c8653bfcdaa8c4b32e71cfff87e8"}, + {file = "watchdog-5.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:349c9488e1d85d0a58e8cb14222d2c51cbc801ce11ac3936ab4c3af986536926"}, + {file = "watchdog-5.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:53a3f10b62c2d569e260f96e8d966463dec1a50fa4f1b22aec69e3f91025060e"}, + {file = "watchdog-5.0.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:950f531ec6e03696a2414b6308f5c6ff9dab7821a768c9d5788b1314e9a46ca7"}, + {file = "watchdog-5.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ae6deb336cba5d71476caa029ceb6e88047fc1dc74b62b7c4012639c0b563906"}, + {file = "watchdog-5.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1021223c08ba8d2d38d71ec1704496471ffd7be42cfb26b87cd5059323a389a1"}, + {file = "watchdog-5.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:752fb40efc7cc8d88ebc332b8f4bcbe2b5cc7e881bccfeb8e25054c00c994ee3"}, + {file = "watchdog-5.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a2e8f3f955d68471fa37b0e3add18500790d129cc7efe89971b8a4cc6fdeb0b2"}, + {file = "watchdog-5.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b8ca4d854adcf480bdfd80f46fdd6fb49f91dd020ae11c89b3a79e19454ec627"}, + {file = "watchdog-5.0.3-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:90a67d7857adb1d985aca232cc9905dd5bc4803ed85cfcdcfcf707e52049eda7"}, + {file = "watchdog-5.0.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:720ef9d3a4f9ca575a780af283c8fd3a0674b307651c1976714745090da5a9e8"}, + {file = "watchdog-5.0.3-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:223160bb359281bb8e31c8f1068bf71a6b16a8ad3d9524ca6f523ac666bb6a1e"}, + {file = "watchdog-5.0.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:560135542c91eaa74247a2e8430cf83c4342b29e8ad4f520ae14f0c8a19cfb5b"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:dd021efa85970bd4824acacbb922066159d0f9e546389a4743d56919b6758b91"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_armv7l.whl", hash = "sha256:78864cc8f23dbee55be34cc1494632a7ba30263951b5b2e8fc8286b95845f82c"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_i686.whl", hash = "sha256:1e9679245e3ea6498494b3028b90c7b25dbb2abe65c7d07423ecfc2d6218ff7c"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_ppc64.whl", hash = "sha256:9413384f26b5d050b6978e6fcd0c1e7f0539be7a4f1a885061473c5deaa57221"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:294b7a598974b8e2c6123d19ef15de9abcd282b0fbbdbc4d23dfa812959a9e05"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_s390x.whl", hash = "sha256:26dd201857d702bdf9d78c273cafcab5871dd29343748524695cecffa44a8d97"}, + {file = "watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:0f9332243355643d567697c3e3fa07330a1d1abf981611654a1f2bf2175612b7"}, + {file = "watchdog-5.0.3-py3-none-win32.whl", hash = "sha256:c66f80ee5b602a9c7ab66e3c9f36026590a0902db3aea414d59a2f55188c1f49"}, + {file = "watchdog-5.0.3-py3-none-win_amd64.whl", hash = "sha256:f00b4cf737f568be9665563347a910f8bdc76f88c2970121c86243c8cfdf90e9"}, + {file = "watchdog-5.0.3-py3-none-win_ia64.whl", hash = "sha256:49f4d36cb315c25ea0d946e018c01bb028048023b9e103d3d3943f58e109dd45"}, + {file = "watchdog-5.0.3.tar.gz", hash = "sha256:108f42a7f0345042a854d4d0ad0834b741d421330d5f575b81cb27b883500176"}, ] [package.extras] @@ -4988,13 +5729,13 @@ watchmedo = ["PyYAML (>=3.10)"] [[package]] name = "wcmatch" -version = "8.5.2" +version = "10.0" description = "Wildcard/glob file name matcher." optional = false python-versions = ">=3.8" files = [ - {file = "wcmatch-8.5.2-py3-none-any.whl", hash = "sha256:17d3ad3758f9d0b5b4dedc770b65420d4dac62e680229c287bf24c9db856a478"}, - {file = "wcmatch-8.5.2.tar.gz", hash = "sha256:a70222b86dea82fb382dd87b73278c10756c138bd6f8f714e2183128887b9eb2"}, + {file = "wcmatch-10.0-py3-none-any.whl", hash = "sha256:0dd927072d03c0a6527a20d2e6ad5ba8d0380e60870c383bc533b71744df7b7a"}, + {file = "wcmatch-10.0.tar.gz", hash = "sha256:e72f0de09bba6a04e0de70937b0cf06e55f36f37b3deb422dfaf854b867b840a"}, ] [package.dependencies] @@ -5092,13 +5833,13 @@ files = [ [[package]] name = "xyzservices" -version = "2024.6.0" +version = "2024.9.0" description = "Source of XYZ tiles providers" optional = false python-versions = ">=3.8" files = [ - {file = "xyzservices-2024.6.0-py3-none-any.whl", hash = "sha256:fecb2508f0f2b71c819aecf5df2c03cef001c56a4b49302e640f3b34710d25e4"}, - {file = "xyzservices-2024.6.0.tar.gz", hash = "sha256:58c1bdab4257d2551b9ef91cd48571f77b7c4d2bc45bf5e3c05ac97b3a4d7282"}, + {file = "xyzservices-2024.9.0-py3-none-any.whl", hash = "sha256:776ae82b78d6e5ca63dd6a94abb054df8130887a4a308473b54a6bd364de8644"}, + {file = "xyzservices-2024.9.0.tar.gz", hash = "sha256:68fb8353c9dbba4f1ff6c0f2e5e4e596bb9e1db7f94f4f7dfbcb26e25aa66fde"}, ] [[package]] @@ -5121,108 +5862,101 @@ dev = ["doc8", "flake8", "flake8-import-order", "rstcheck[sphinx]", "sphinx"] [[package]] name = "yarl" -version = "1.9.4" +version = "1.17.0" description = "Yet another URL library" optional = false -python-versions = ">=3.7" +python-versions = ">=3.9" files = [ - {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"}, - {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2"}, - {file = "yarl-1.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c38c9ddb6103ceae4e4498f9c08fac9b590c5c71b0370f98714768e22ac6fa66"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8477c1ee4bd47c57d49621a062121c3023609f7a13b8a46953eb6c9716ca392"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5ff2c858f5f6a42c2a8e751100f237c5e869cbde669a724f2062d4c4ef93551"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:357495293086c5b6d34ca9616a43d329317feab7917518bc97a08f9e55648455"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54525ae423d7b7a8ee81ba189f131054defdb122cde31ff17477951464c1691c"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:801e9264d19643548651b9db361ce3287176671fb0117f96b5ac0ee1c3530d53"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e516dc8baf7b380e6c1c26792610230f37147bb754d6426462ab115a02944385"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7d5aaac37d19b2904bb9dfe12cdb08c8443e7ba7d2852894ad448d4b8f442863"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:54beabb809ffcacbd9d28ac57b0db46e42a6e341a030293fb3185c409e626b8b"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bac8d525a8dbc2a1507ec731d2867025d11ceadcb4dd421423a5d42c56818541"}, - {file = "yarl-1.9.4-cp310-cp310-win32.whl", hash = "sha256:7855426dfbddac81896b6e533ebefc0af2f132d4a47340cee6d22cac7190022d"}, - {file = "yarl-1.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:848cd2a1df56ddbffeb375535fb62c9d1645dde33ca4d51341378b3f5954429b"}, - {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:35a2b9396879ce32754bd457d31a51ff0a9d426fd9e0e3c33394bf4b9036b099"}, - {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c7d56b293cc071e82532f70adcbd8b61909eec973ae9d2d1f9b233f3d943f2c"}, - {file = "yarl-1.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d8a1c6c0be645c745a081c192e747c5de06e944a0d21245f4cf7c05e457c36e0"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b3c1ffe10069f655ea2d731808e76e0f452fc6c749bea04781daf18e6039525"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:549d19c84c55d11687ddbd47eeb348a89df9cb30e1993f1b128f4685cd0ebbf8"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7409f968456111140c1c95301cadf071bd30a81cbd7ab829169fb9e3d72eae9"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e23a6d84d9d1738dbc6e38167776107e63307dfc8ad108e580548d1f2c587f42"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8b889777de69897406c9fb0b76cdf2fd0f31267861ae7501d93003d55f54fbe"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:03caa9507d3d3c83bca08650678e25364e1843b484f19986a527630ca376ecce"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e9035df8d0880b2f1c7f5031f33f69e071dfe72ee9310cfc76f7b605958ceb9"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:c0ec0ed476f77db9fb29bca17f0a8fcc7bc97ad4c6c1d8959c507decb22e8572"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:ee04010f26d5102399bd17f8df8bc38dc7ccd7701dc77f4a68c5b8d733406958"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49a180c2e0743d5d6e0b4d1a9e5f633c62eca3f8a86ba5dd3c471060e352ca98"}, - {file = "yarl-1.9.4-cp311-cp311-win32.whl", hash = "sha256:81eb57278deb6098a5b62e88ad8281b2ba09f2f1147c4767522353eaa6260b31"}, - {file = "yarl-1.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:d1d2532b340b692880261c15aee4dc94dd22ca5d61b9db9a8a361953d36410b1"}, - {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0d2454f0aef65ea81037759be5ca9947539667eecebca092733b2eb43c965a81"}, - {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:44d8ffbb9c06e5a7f529f38f53eda23e50d1ed33c6c869e01481d3fafa6b8142"}, - {file = "yarl-1.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aaaea1e536f98754a6e5c56091baa1b6ce2f2700cc4a00b0d49eca8dea471074"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3777ce5536d17989c91696db1d459574e9a9bd37660ea7ee4d3344579bb6f129"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fc5fc1eeb029757349ad26bbc5880557389a03fa6ada41703db5e068881e5f2"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea65804b5dc88dacd4a40279af0cdadcfe74b3e5b4c897aa0d81cf86927fee78"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa102d6d280a5455ad6a0f9e6d769989638718e938a6a0a2ff3f4a7ff8c62cc4"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09efe4615ada057ba2d30df871d2f668af661e971dfeedf0c159927d48bbeff0"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:008d3e808d03ef28542372d01057fd09168419cdc8f848efe2804f894ae03e51"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6f5cb257bc2ec58f437da2b37a8cd48f666db96d47b8a3115c29f316313654ff"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:992f18e0ea248ee03b5a6e8b3b4738850ae7dbb172cc41c966462801cbf62cf7"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0e9d124c191d5b881060a9e5060627694c3bdd1fe24c5eecc8d5d7d0eb6faabc"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3986b6f41ad22988e53d5778f91855dc0399b043fc8946d4f2e68af22ee9ff10"}, - {file = "yarl-1.9.4-cp312-cp312-win32.whl", hash = "sha256:4b21516d181cd77ebd06ce160ef8cc2a5e9ad35fb1c5930882baff5ac865eee7"}, - {file = "yarl-1.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:a9bd00dc3bc395a662900f33f74feb3e757429e545d831eef5bb280252631984"}, - {file = "yarl-1.9.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:63b20738b5aac74e239622d2fe30df4fca4942a86e31bf47a81a0e94c14df94f"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d7f7de27b8944f1fee2c26a88b4dabc2409d2fea7a9ed3df79b67277644e17"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c74018551e31269d56fab81a728f683667e7c28c04e807ba08f8c9e3bba32f14"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ca06675212f94e7a610e85ca36948bb8fc023e458dd6c63ef71abfd482481aa5"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5aef935237d60a51a62b86249839b51345f47564208c6ee615ed2a40878dccdd"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b134fd795e2322b7684155b7855cc99409d10b2e408056db2b93b51a52accc7"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d25039a474c4c72a5ad4b52495056f843a7ff07b632c1b92ea9043a3d9950f6e"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f7d6b36dd2e029b6bcb8a13cf19664c7b8e19ab3a58e0fefbb5b8461447ed5ec"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:957b4774373cf6f709359e5c8c4a0af9f6d7875db657adb0feaf8d6cb3c3964c"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d7eeb6d22331e2fd42fce928a81c697c9ee2d51400bd1a28803965883e13cead"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6a962e04b8f91f8c4e5917e518d17958e3bdee71fd1d8b88cdce74dd0ebbf434"}, - {file = "yarl-1.9.4-cp37-cp37m-win32.whl", hash = "sha256:f3bc6af6e2b8f92eced34ef6a96ffb248e863af20ef4fde9448cc8c9b858b749"}, - {file = "yarl-1.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:ad4d7a90a92e528aadf4965d685c17dacff3df282db1121136c382dc0b6014d2"}, - {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ec61d826d80fc293ed46c9dd26995921e3a82146feacd952ef0757236fc137be"}, - {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8be9e837ea9113676e5754b43b940b50cce76d9ed7d2461df1af39a8ee674d9f"}, - {file = "yarl-1.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bef596fdaa8f26e3d66af846bbe77057237cb6e8efff8cd7cc8dff9a62278bbf"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d47552b6e52c3319fede1b60b3de120fe83bde9b7bddad11a69fb0af7db32f1"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84fc30f71689d7fc9168b92788abc977dc8cefa806909565fc2951d02f6b7d57"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4aa9741085f635934f3a2583e16fcf62ba835719a8b2b28fb2917bb0537c1dfa"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:206a55215e6d05dbc6c98ce598a59e6fbd0c493e2de4ea6cc2f4934d5a18d130"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07574b007ee20e5c375a8fe4a0789fad26db905f9813be0f9fef5a68080de559"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5a2e2433eb9344a163aced6a5f6c9222c0786e5a9e9cac2c89f0b28433f56e23"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6ad6d10ed9b67a382b45f29ea028f92d25bc0bc1daf6c5b801b90b5aa70fb9ec"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:6fe79f998a4052d79e1c30eeb7d6c1c1056ad33300f682465e1b4e9b5a188b78"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a825ec844298c791fd28ed14ed1bffc56a98d15b8c58a20e0e08c1f5f2bea1be"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8619d6915b3b0b34420cf9b2bb6d81ef59d984cb0fde7544e9ece32b4b3043c3"}, - {file = "yarl-1.9.4-cp38-cp38-win32.whl", hash = "sha256:686a0c2f85f83463272ddffd4deb5e591c98aac1897d65e92319f729c320eece"}, - {file = "yarl-1.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:a00862fb23195b6b8322f7d781b0dc1d82cb3bcac346d1e38689370cc1cc398b"}, - {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:604f31d97fa493083ea21bd9b92c419012531c4e17ea6da0f65cacdcf5d0bd27"}, - {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a854227cf581330ffa2c4824d96e52ee621dd571078a252c25e3a3b3d94a1b1"}, - {file = "yarl-1.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ba6f52cbc7809cd8d74604cce9c14868306ae4aa0282016b641c661f981a6e91"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6327976c7c2f4ee6816eff196e25385ccc02cb81427952414a64811037bbc8b"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8397a3817d7dcdd14bb266283cd1d6fc7264a48c186b986f32e86d86d35fbac5"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0381b4ce23ff92f8170080c97678040fc5b08da85e9e292292aba67fdac6c34"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23d32a2594cb5d565d358a92e151315d1b2268bc10f4610d098f96b147370136"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ddb2a5c08a4eaaba605340fdee8fc08e406c56617566d9643ad8bf6852778fc7"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:26a1dc6285e03f3cc9e839a2da83bcbf31dcb0d004c72d0730e755b33466c30e"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:18580f672e44ce1238b82f7fb87d727c4a131f3a9d33a5e0e82b793362bf18b4"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:29e0f83f37610f173eb7e7b5562dd71467993495e568e708d99e9d1944f561ec"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:1f23e4fe1e8794f74b6027d7cf19dc25f8b63af1483d91d595d4a07eca1fb26c"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db8e58b9d79200c76956cefd14d5c90af54416ff5353c5bfd7cbe58818e26ef0"}, - {file = "yarl-1.9.4-cp39-cp39-win32.whl", hash = "sha256:c7224cab95645c7ab53791022ae77a4509472613e839dab722a72abe5a684575"}, - {file = "yarl-1.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:824d6c50492add5da9374875ce72db7a0733b29c2394890aef23d533106e2b15"}, - {file = "yarl-1.9.4-py3-none-any.whl", hash = "sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad"}, - {file = "yarl-1.9.4.tar.gz", hash = "sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf"}, + {file = "yarl-1.17.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2d8715edfe12eee6f27f32a3655f38d6c7410deb482158c0b7d4b7fad5d07628"}, + {file = "yarl-1.17.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1803bf2a7a782e02db746d8bd18f2384801bc1d108723840b25e065b116ad726"}, + {file = "yarl-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e66589110e20c2951221a938fa200c7aa134a8bdf4e4dc97e6b21539ff026d4"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7069d411cfccf868e812497e0ec4acb7c7bf8d684e93caa6c872f1e6f5d1664d"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cbf70ba16118db3e4b0da69dcde9d4d4095d383c32a15530564c283fa38a7c52"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0bc53cc349675b32ead83339a8de79eaf13b88f2669c09d4962322bb0f064cbc"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6aa18a402d1c80193ce97c8729871f17fd3e822037fbd7d9b719864018df746"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d89c5bc701861cfab357aa0cd039bc905fe919997b8c312b4b0c358619c38d4d"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b728bdf38ca58f2da1d583e4af4ba7d4cd1a58b31a363a3137a8159395e7ecc7"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:5542e57dc15d5473da5a39fbde14684b0cc4301412ee53cbab677925e8497c11"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e564b57e5009fb150cb513804d7e9e9912fee2e48835638f4f47977f88b4a39c"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:eb3c4cff524b4c1c1dba3a6da905edb1dfd2baf6f55f18a58914bbb2d26b59e1"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:05e13f389038842da930d439fbed63bdce3f7644902714cb68cf527c971af804"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:153c38ee2b4abba136385af4467459c62d50f2a3f4bde38c7b99d43a20c143ef"}, + {file = "yarl-1.17.0-cp310-cp310-win32.whl", hash = "sha256:4065b4259d1ae6f70fd9708ffd61e1c9c27516f5b4fae273c41028afcbe3a094"}, + {file = "yarl-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:abf366391a02a8335c5c26163b5fe6f514cc1d79e74d8bf3ffab13572282368e"}, + {file = "yarl-1.17.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:19a4fe0279626c6295c5b0c8c2bb7228319d2e985883621a6e87b344062d8135"}, + {file = "yarl-1.17.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cadd0113f4db3c6b56868d6a19ca6286f5ccfa7bc08c27982cf92e5ed31b489a"}, + {file = "yarl-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:60d6693eef43215b1ccfb1df3f6eae8db30a9ff1e7989fb6b2a6f0b468930ee8"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb8bf3843e1fa8cf3fe77813c512818e57368afab7ebe9ef02446fe1a10b492"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d2a5b35fd1d8d90443e061d0c8669ac7600eec5c14c4a51f619e9e105b136715"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c5bf17b32f392df20ab5c3a69d37b26d10efaa018b4f4e5643c7520d8eee7ac7"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48f51b529b958cd06e78158ff297a8bf57b4021243c179ee03695b5dbf9cb6e1"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5fcaa06bf788e19f913d315d9c99a69e196a40277dc2c23741a1d08c93f4d430"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:32f3ee19ff0f18a7a522d44e869e1ebc8218ad3ae4ebb7020445f59b4bbe5897"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:a4fb69a81ae2ec2b609574ae35420cf5647d227e4d0475c16aa861dd24e840b0"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7bacc8b77670322132a1b2522c50a1f62991e2f95591977455fd9a398b4e678d"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:437bf6eb47a2d20baaf7f6739895cb049e56896a5ffdea61a4b25da781966e8b"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:30534a03c87484092080e3b6e789140bd277e40f453358900ad1f0f2e61fc8ec"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b30df4ff98703649915144be6f0df3b16fd4870ac38a09c56d5d9e54ff2d5f96"}, + {file = "yarl-1.17.0-cp311-cp311-win32.whl", hash = "sha256:263b487246858e874ab53e148e2a9a0de8465341b607678106829a81d81418c6"}, + {file = "yarl-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:07055a9e8b647a362e7d4810fe99d8f98421575e7d2eede32e008c89a65a17bd"}, + {file = "yarl-1.17.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:84095ab25ba69a8fa3fb4936e14df631b8a71193fe18bd38be7ecbe34d0f5512"}, + {file = "yarl-1.17.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02608fb3f6df87039212fc746017455ccc2a5fc96555ee247c45d1e9f21f1d7b"}, + {file = "yarl-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13468d291fe8c12162b7cf2cdb406fe85881c53c9e03053ecb8c5d3523822cd9"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8da3f8f368fb7e2f052fded06d5672260c50b5472c956a5f1bd7bf474ae504ab"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec0507ab6523980bed050137007c76883d941b519aca0e26d4c1ec1f297dd646"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08fc76df7fd8360e9ff30e6ccc3ee85b8dbd6ed5d3a295e6ec62bcae7601b932"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d522f390686acb6bab2b917dd9ca06740c5080cd2eaa5aef8827b97e967319d"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:147c527a80bb45b3dcd6e63401af8ac574125d8d120e6afe9901049286ff64ef"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:24cf43bcd17a0a1f72284e47774f9c60e0bf0d2484d5851f4ddf24ded49f33c6"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c28a44b9e0fba49c3857360e7ad1473fc18bc7f6659ca08ed4f4f2b9a52c75fa"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:350cacb2d589bc07d230eb995d88fcc646caad50a71ed2d86df533a465a4e6e1"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:fd1ab1373274dea1c6448aee420d7b38af163b5c4732057cd7ee9f5454efc8b1"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4934e0f96dadc567edc76d9c08181633c89c908ab5a3b8f698560124167d9488"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8d0a278170d75c88e435a1ce76557af6758bfebc338435b2eba959df2552163e"}, + {file = "yarl-1.17.0-cp312-cp312-win32.whl", hash = "sha256:61584f33196575a08785bb56db6b453682c88f009cd9c6f338a10f6737ce419f"}, + {file = "yarl-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:9987a439ad33a7712bd5bbd073f09ad10d38640425fa498ecc99d8aa064f8fc4"}, + {file = "yarl-1.17.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8deda7b8eb15a52db94c2014acdc7bdd14cb59ec4b82ac65d2ad16dc234a109e"}, + {file = "yarl-1.17.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56294218b348dcbd3d7fce0ffd79dd0b6c356cb2a813a1181af730b7c40de9e7"}, + {file = "yarl-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1fab91292f51c884b290ebec0b309a64a5318860ccda0c4940e740425a67b6b7"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cf93fa61ff4d9c7d40482ce1a2c9916ca435e34a1b8451e17f295781ccc034f"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:261be774a0d71908c8830c33bacc89eef15c198433a8cc73767c10eeeb35a7d0"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:deec9693b67f6af856a733b8a3e465553ef09e5e8ead792f52c25b699b8f9e6e"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c804b07622ba50a765ca7fb8145512836ab65956de01307541def869e4a456c9"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d013a7c9574e98c14831a8f22d27277688ec3b2741d0188ac01a910b009987a"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e2cfcba719bd494c7413dcf0caafb51772dec168c7c946e094f710d6aa70494e"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:c068aba9fc5b94dfae8ea1cedcbf3041cd4c64644021362ffb750f79837e881f"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:3616df510ffac0df3c9fa851a40b76087c6c89cbcea2de33a835fc80f9faac24"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:755d6176b442fba9928a4df787591a6a3d62d4969f05c406cad83d296c5d4e05"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c18f6e708d1cf9ff5b1af026e697ac73bea9cb70ee26a2b045b112548579bed2"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5b937c216b6dee8b858c6afea958de03c5ff28406257d22b55c24962a2baf6fd"}, + {file = "yarl-1.17.0-cp313-cp313-win32.whl", hash = "sha256:d0131b14cb545c1a7bd98f4565a3e9bdf25a1bd65c83fc156ee5d8a8499ec4a3"}, + {file = "yarl-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:01c96efa4313c01329e88b7e9e9e1b2fc671580270ddefdd41129fa8d0db7696"}, + {file = "yarl-1.17.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0d44f67e193f0a7acdf552ecb4d1956a3a276c68e7952471add9f93093d1c30d"}, + {file = "yarl-1.17.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:16ea0aa5f890cdcb7ae700dffa0397ed6c280840f637cd07bffcbe4b8d68b985"}, + {file = "yarl-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cf5469dc7dcfa65edf5cc3a6add9f84c5529c6b556729b098e81a09a92e60e51"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e662bf2f6e90b73cf2095f844e2bc1fda39826472a2aa1959258c3f2a8500a2f"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8260e88f1446904ba20b558fa8ce5d0ab9102747238e82343e46d056d7304d7e"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5dc16477a4a2c71e64c5d3d15d7ae3d3a6bb1e8b955288a9f73c60d2a391282f"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46027e326cecd55e5950184ec9d86c803f4f6fe4ba6af9944a0e537d643cdbe0"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc95e46c92a2b6f22e70afe07e34dbc03a4acd07d820204a6938798b16f4014f"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:16ca76c7ac9515320cd09d6cc083d8d13d1803f6ebe212b06ea2505fd66ecff8"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:eb1a5b97388f2613f9305d78a3473cdf8d80c7034e554d8199d96dcf80c62ac4"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:41fd5498975418cdc34944060b8fbeec0d48b2741068077222564bea68daf5a6"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:146ca582ed04a5664ad04b0e0603934281eaab5c0115a5a46cce0b3c061a56a1"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:6abb8c06107dbec97481b2392dafc41aac091a5d162edf6ed7d624fe7da0587a"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4d14be4613dd4f96c25feb4bd8c0d8ce0f529ab0ae555a17df5789e69d8ec0c5"}, + {file = "yarl-1.17.0-cp39-cp39-win32.whl", hash = "sha256:174d6a6cad1068f7850702aad0c7b1bca03bcac199ca6026f84531335dfc2646"}, + {file = "yarl-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:6af417ca2c7349b101d3fd557ad96b4cd439fdb6ab0d288e3f64a068eea394d0"}, + {file = "yarl-1.17.0-py3-none-any.whl", hash = "sha256:62dd42bb0e49423f4dd58836a04fcf09c80237836796025211bbe913f1524993"}, + {file = "yarl-1.17.0.tar.gz", hash = "sha256:d3f13583f378930377e02002b4085a3d025b00402d5a80911726d43a67911cd9"}, ] [package.dependencies] idna = ">=2.0" multidict = ">=4.0" +propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "9caf73143d9e3b6d71389d7fce3b554c619492bff8567de39847129ed7309af2" +content-hash = "c22ab3de1b76f7549448f4204d52fe0a2d9e68cbbd4d4e873fad667a075dffe3" diff --git a/pyproject.toml b/pyproject.toml index 4985c6709..54b5a20ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,8 @@ scikit-learn = "^1.3.2" pandas = { extras = ["gcp", "parquet"], version = "^2.2.2" } skops = ">=0.9,<0.11" google-cloud-secret-manager = "^2.20.0" +shap = "^0.46.0" +matplotlib = "3.7.3" [tool.poetry.dev-dependencies] pre-commit = "^4.0.0" diff --git a/src/gentropy/method/l2g/model.py b/src/gentropy/method/l2g/model.py index 45d90f90d..e35e255a2 100644 --- a/src/gentropy/method/l2g/model.py +++ b/src/gentropy/method/l2g/model.py @@ -2,10 +2,12 @@ from __future__ import annotations +import json from dataclasses import dataclass, field from pathlib import Path from typing import TYPE_CHECKING, Any, Type +import pandas as pd import skops.io as sio from pandas import DataFrame as pd_dataframe from pandas import to_numeric as pd_to_numeric @@ -148,6 +150,23 @@ def save(self: LocusToGeneModel, path: str) -> None: else: sio.dump(self.model, path) + @staticmethod + def load_feature_matrix_from_wandb(wandb_run_name: str) -> pd.DataFrame: + """Loads dataset of feature matrix used during a wandb run. + + Args: + wandb_run_name (str): Name of the wandb run to load the feature matrix from + + Returns: + pd.DataFrame: Feature matrix used during the wandb run + """ + with open(wandb_run_name) as f: + raw_data = json.load(f) + + data = raw_data["data"] + columns = raw_data["columns"] + return pd.DataFrame(data, columns=columns) + def _create_hugging_face_model_card( self: LocusToGeneModel, local_repo: str, diff --git a/src/gentropy/method/l2g/trainer.py b/src/gentropy/method/l2g/trainer.py index 1c4088462..fe56b3f42 100644 --- a/src/gentropy/method/l2g/trainer.py +++ b/src/gentropy/method/l2g/trainer.py @@ -2,11 +2,14 @@ from __future__ import annotations +import os from dataclasses import dataclass from functools import partial -from typing import Any +from typing import TYPE_CHECKING, Any +import matplotlib.pyplot as plt import pandas as pd +import shap from sklearn.metrics import ( accuracy_score, f1_score, @@ -15,7 +18,8 @@ roc_auc_score, ) from sklearn.model_selection import train_test_split -from wandb.data_types import Table +from wandb.data_types import Image, Table +from wandb.errors.term import termlog as wandb_termlog from wandb.sdk.wandb_init import init as wandb_init from wandb.sdk.wandb_sweep import sweep as wandb_sweep from wandb.sklearn import plot_classifier @@ -24,6 +28,11 @@ from gentropy.dataset.l2g_feature_matrix import L2GFeatureMatrix from gentropy.method.l2g.model import LocusToGeneModel +if TYPE_CHECKING: + from matplotlib.axes._axes import Axes + from shap._explanation import Explanation + from wandb.sdk.wandb_run import Run + @dataclass class LocusToGeneTrainer: @@ -34,13 +43,22 @@ class LocusToGeneTrainer: # Initialise vars features_list: list[str] | None = None - target_labels: list[str] | None = None + label_col: str = "goldStandardSet" x_train: pd.DataFrame | None = None y_train: pd.Series | None = None x_test: pd.DataFrame | None = None y_test: pd.Series | None = None + run: Run | None = None wandb_l2g_project_name: str = "gentropy-locus-to-gene" + def __post_init__(self) -> None: + """Set default features_list to feature_matrix's features_list if not provided.""" + self.features_list = ( + self.feature_matrix.features_list + if self.features_list is None + else self.features_list + ) + def fit( self: LocusToGeneTrainer, ) -> LocusToGeneModel: @@ -65,6 +83,54 @@ def fit( return self.model raise ValueError("Train data not set, nothing to fit.") + def _get_shap_explanation( + self: LocusToGeneTrainer, + model: LocusToGeneModel, + ) -> Explanation: + """Get the SHAP values for the given model and data. We pass the full X matrix (without the labels) to interpret their shap values. + + Args: + model (LocusToGeneModel): Model to explain. + + Returns: + Explanation: SHAP values for the given model and data. + + Raises: + ValueError: Train data not set, cannot get SHAP values. + """ + if self.x_train is not None and self.x_test is not None: + training_data = pd.concat([self.x_train, self.x_test], ignore_index=True) + explainer = shap.TreeExplainer( + model.model, + data=training_data, + feature_perturbation="interventional", + ) + return explainer(training_data) + raise ValueError("Train data not set.") + + def log_plot_image_to_wandb( + self: LocusToGeneTrainer, title: str, plot: Axes + ) -> None: + """Accepts a plot object, and saves the fig to PNG to then log it in W&B. + + Args: + title (str): Title of the plot. + plot (Axes): Shap plot to log. + + Raises: + ValueError: Run not set, cannot log to W&B. + """ + if self.run is None: + raise ValueError("Run not set, cannot log to W&B.") + if not plot: + # Scatter plot returns none, so we need to handle this case + plt.savefig("tmp.png", bbox_inches="tight") + else: + plot.figure.savefig("tmp.png", bbox_inches="tight") + self.run.log({title: Image("tmp.png")}) + plt.close() + os.remove("tmp.png") + def log_to_wandb( self: LocusToGeneTrainer, wandb_run_name: str, @@ -76,12 +142,16 @@ def log_to_wandb( Args: wandb_run_name (str): Name of the W&B run + + Raises: + ValueError: If dependencies are not available. """ if ( self.x_train is not None and self.x_test is not None and self.y_train is not None and self.y_test is not None + and self.features_list is not None ): assert ( not self.x_train.empty and not self.y_train.empty @@ -89,7 +159,7 @@ def log_to_wandb( fitted_classifier = self.model.model y_predicted = fitted_classifier.predict(self.x_test.values) y_probas = fitted_classifier.predict_proba(self.x_test.values) - run = wandb_init( + self.run = wandb_init( project=self.wandb_l2g_project_name, name=wandb_run_name, config=fitted_classifier.get_params(), @@ -109,39 +179,66 @@ def log_to_wandb( is_binary=True, ) # Track evaluation metrics - run.log( + self.run.log( { "areaUnderROC": roc_auc_score( self.y_test, y_probas[:, 1], average="weighted" ) } ) - run.log({"accuracy": accuracy_score(self.y_test, y_predicted)}) - run.log( + self.run.log({"accuracy": accuracy_score(self.y_test, y_predicted)}) + self.run.log( { "weightedPrecision": precision_score( self.y_test, y_predicted, average="weighted" ) } ) - run.log( + self.run.log( { "weightedRecall": recall_score( self.y_test, y_predicted, average="weighted" ) } ) - run.log({"f1": f1_score(self.y_test, y_predicted, average="weighted")}) + self.run.log({"f1": f1_score(self.y_test, y_predicted, average="weighted")}) # Track gold standards and their features - run.log( + self.run.log( {"featureMatrix": Table(dataframe=self.feature_matrix._df.toPandas())} ) # Log feature missingness - run.log( + self.run.log( { "missingnessRates": self.feature_matrix.calculate_feature_missingness_rate() } ) + # Plot marginal contribution of each feature + explanation = self._get_shap_explanation(self.model) + self.log_plot_image_to_wandb( + "Feature Contribution", + shap.plots.bar( + explanation, max_display=len(self.x_train.columns), show=False + ), + ) + self.log_plot_image_to_wandb( + "Beeswarm Plot", + shap.plots.beeswarm( + explanation, max_display=len(self.x_train.columns), show=False + ), + ) + # Plot correlation between feature values and their importance + for feature in self.features_list: + self.log_plot_image_to_wandb( + f"Effect of {feature} on the predictions", + shap.plots.scatter( + explanation[:, feature], + show=False, + ), + ) + wandb_termlog("Logged Shapley contributions.") + self.run.finish() + else: + raise ValueError("Something went wrong, couldn't log to W&B.") def train( self: LocusToGeneTrainer, @@ -158,20 +255,12 @@ def train( data_df = self.feature_matrix._df.drop("geneId", "studyLocusId").toPandas() # Encode labels in `goldStandardSet` to a numeric value - data_df["goldStandardSet"] = data_df["goldStandardSet"].map( - self.model.label_encoder - ) + data_df[self.label_col] = data_df[self.label_col].map(self.model.label_encoder) # Ensure all columns are numeric and split data_df = data_df.apply(pd.to_numeric) - self.feature_cols = [ - col - for col in data_df.columns - if col not in ["studyLocusId", "goldStandardSet"] - ] - label_col = "goldStandardSet" - X = data_df[self.feature_cols].copy() - y = data_df[label_col].copy() + X = data_df[self.features_list].copy() + y = data_df[self.label_col].copy() self.x_train, self.x_test, self.y_train, self.y_test = train_test_split( X, y, test_size=0.2, random_state=42 ) diff --git a/utils/install_dependencies_on_cluster.sh b/utils/install_dependencies_on_cluster.sh index b0a165c04..849dee0c3 100644 --- a/utils/install_dependencies_on_cluster.sh +++ b/utils/install_dependencies_on_cluster.sh @@ -62,7 +62,7 @@ function main() { echo "Install package..." # NOTE: ensure the gentropy is reinstalled each time without version cache # see https://pip.pypa.io/en/stable/cli/pip_install/#cmdoption-force-reinstall - run_with_retry pip install --force-reinstall ${PACKAGENAME} + run_with_retry pip install --force-reinstall --ignore-installed ${PACKAGENAME} } From 7bb74a5056c98b76344525ef96f864ef2fb22c4d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 31 Oct 2024 16:15:35 +0000 Subject: [PATCH 144/188] chore: pre-commit autoupdate (#885) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.7.0 → v0.7.1](https://github.com/astral-sh/ruff-pre-commit/compare/v0.7.0...v0.7.1) - [github.com/pre-commit/mirrors-mypy: v1.12.1 → v1.13.0](https://github.com/pre-commit/mirrors-mypy/compare/v1.12.1...v1.13.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7d2d55fc2..e62a9d790 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ ci: skip: [poetry-lock] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.0 + rev: v0.7.1 hooks: - id: ruff args: @@ -65,7 +65,7 @@ repos: stages: [commit-msg] - repo: https://github.com/pre-commit/mirrors-mypy - rev: "v1.12.1" + rev: "v1.13.0" hooks: - id: mypy args: From fa38ca67d75c3064691120a93a89b0c208ade818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Thu, 31 Oct 2024 17:56:10 +0000 Subject: [PATCH 145/188] fix(distance_features): correct mean distance equation and correct rows with negative values (#889) * fix(distance_features): hack to set to null negative values * fix(distance_features): correct mean distance equation --- src/gentropy/dataset/l2g_features/distance.py | 37 +++++++++++-------- tests/gentropy/dataset/test_l2g_feature.py | 2 +- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/gentropy/dataset/l2g_features/distance.py b/src/gentropy/dataset/l2g_features/distance.py index 2149dc339..08ffc7c5e 100644 --- a/src/gentropy/dataset/l2g_features/distance.py +++ b/src/gentropy/dataset/l2g_features/distance.py @@ -42,10 +42,6 @@ def common_distance_feature_logic( distances_dataset = variant_index.get_distance_to_gene(distance_type=distance_type) if "Mean" in feature_name: # Weighting by the SNP contribution is only applied when we are averaging all distances - distance_score_expr = ( - f.lit(genomic_window) - f.col(distance_type) + f.lit(1) - ) * f.col("posteriorProbability") - agg_expr = f.mean(f.col("distance_score")) df = study_loci_to_annotate.df.withColumn( "variantInLocus", f.explode_outer("locus") ).select( @@ -53,11 +49,15 @@ def common_distance_feature_logic( f.col("variantInLocus.variantId").alias("variantId"), f.col("variantInLocus.posteriorProbability").alias("posteriorProbability"), ) + distance_score_expr = ( + f.lit(genomic_window) - f.col(distance_type) + f.lit(1) + ) * f.col("posteriorProbability") + agg_expr = f.sum(f.col("distance_score")) elif "Sentinel" in feature_name: + df = study_loci_to_annotate.df.select("studyLocusId", "variantId") # For minimum distances we calculate the unweighted distance between the sentinel (lead) and the gene. This distance_score_expr = f.lit(genomic_window) - f.col(distance_type) + f.lit(1) agg_expr = f.first(f.col("distance_score")) - df = study_loci_to_annotate.df.select("studyLocusId", "variantId") return ( df.join( distances_dataset.withColumnRenamed("targetId", "geneId"), @@ -66,10 +66,15 @@ def common_distance_feature_logic( ) .withColumn( "distance_score", - f.log10(distance_score_expr) / f.log10(f.lit(genomic_window + 1)), + distance_score_expr, ) .groupBy("studyLocusId", "geneId") - .agg(agg_expr.alias(feature_name)) + .agg(agg_expr.alias("distance_score_agg")) + .withColumn( + feature_name, + f.log10(f.col("distance_score_agg")) / f.log10(f.lit(genomic_window + 1)), + ) + .drop("distance_score_agg") ) @@ -120,7 +125,6 @@ def common_neighbourhood_distance_feature_logic( class DistanceTssMeanFeature(L2GFeature): """Average distance of all tagging variants to gene TSS.""" - fill_na_value = 500_000 feature_dependency_type = VariantIndex feature_name = "distanceTssMean" @@ -147,6 +151,11 @@ def compute( feature_name=cls.feature_name, distance_type=distance_type, **feature_dependency, + ).withColumn( + cls.feature_name, + f.when(f.col(cls.feature_name) < 0, f.lit(0.0)).otherwise( + f.col(cls.feature_name) + ), ), id_vars=("studyLocusId", "geneId"), var_name="featureName", @@ -159,7 +168,6 @@ def compute( class DistanceTssMeanNeighbourhoodFeature(L2GFeature): """Minimum mean distance to TSS for all genes in the vicinity of a studyLocus.""" - fill_na_value = 500_000 feature_dependency_type = VariantIndex feature_name = "distanceTssMeanNeighbourhood" @@ -198,7 +206,6 @@ def compute( class DistanceSentinelTssFeature(L2GFeature): """Distance of the sentinel variant to gene TSS. This is not weighted by the causal probability.""" - fill_na_value = 500_000 feature_dependency_type = VariantIndex feature_name = "distanceSentinelTss" @@ -237,7 +244,6 @@ def compute( class DistanceSentinelTssNeighbourhoodFeature(L2GFeature): """Distance between the sentinel variant and a gene TSS as a relation of the distnace with all the genes in the vicinity of a studyLocus. This is not weighted by the causal probability.""" - fill_na_value = 500_000 feature_dependency_type = VariantIndex feature_name = "distanceSentinelTssNeighbourhood" @@ -276,7 +282,6 @@ def compute( class DistanceFootprintMeanFeature(L2GFeature): """Average distance of all tagging variants to the footprint of a gene.""" - fill_na_value = 500_000 feature_dependency_type = VariantIndex feature_name = "distanceFootprintMean" @@ -303,6 +308,11 @@ def compute( feature_name=cls.feature_name, distance_type=distance_type, **feature_dependency, + ).withColumn( + cls.feature_name, + f.when(f.col(cls.feature_name) < 0, f.lit(0.0)).otherwise( + f.col(cls.feature_name) + ), ), id_vars=("studyLocusId", "geneId"), var_name="featureName", @@ -315,7 +325,6 @@ def compute( class DistanceFootprintMeanNeighbourhoodFeature(L2GFeature): """Minimum mean distance to footprint for all genes in the vicinity of a studyLocus.""" - fill_na_value = 500_000 feature_dependency_type = VariantIndex feature_name = "distanceFootprintMeanNeighbourhood" @@ -354,7 +363,6 @@ def compute( class DistanceSentinelFootprintFeature(L2GFeature): """Distance between the sentinel variant and the footprint of a gene.""" - fill_na_value = 500_000 feature_dependency_type = VariantIndex feature_name = "distanceSentinelFootprint" @@ -393,7 +401,6 @@ def compute( class DistanceSentinelFootprintNeighbourhoodFeature(L2GFeature): """Distance between the sentinel variant and a gene footprint as a relation of the distnace with all the genes in the vicinity of a studyLocus. This is not weighted by the causal probability.""" - fill_na_value = 500_000 feature_dependency_type = VariantIndex feature_name = "distanceSentinelFootprintNeighbourhood" diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py index c6019cefc..0637b4a86 100644 --- a/tests/gentropy/dataset/test_l2g_feature.py +++ b/tests/gentropy/dataset/test_l2g_feature.py @@ -513,7 +513,7 @@ class TestCommonDistanceFeatureLogic: ( "distanceTssMean", [ - {"studyLocusId": "1", "geneId": "gene1", "distanceTssMean": 0.08}, + {"studyLocusId": "1", "geneId": "gene1", "distanceTssMean": 0.52}, {"studyLocusId": "1", "geneId": "gene2", "distanceTssMean": 0.63}, ], ), From b812f67a486ff3fc0f7b272048820227443321f1 Mon Sep 17 00:00:00 2001 From: Vivien Ho <56025826+vivienho@users.noreply.github.com> Date: Fri, 1 Nov 2024 11:08:51 +0000 Subject: [PATCH 146/188] feat: add step to generate association data (#888) * feat: add step to generate association data * fix: evidence input file is json * feat: changed maximum theoretical harmonic sum formula * test: add test for calculate_harmonic_sum function * chore: move calculate_harmonic_sum function to spark_helpers.py * chore: update import statement --- docs/python_api/steps/l2g.md | 2 + src/gentropy/common/spark_helpers.py | 35 +++++++++++++++++ src/gentropy/config.py | 14 +++++++ src/gentropy/l2g.py | 59 ++++++++++++++++++++++++++++ 4 files changed, 110 insertions(+) diff --git a/docs/python_api/steps/l2g.md b/docs/python_api/steps/l2g.md index 5594f1605..e6aeb0ebb 100644 --- a/docs/python_api/steps/l2g.md +++ b/docs/python_api/steps/l2g.md @@ -7,3 +7,5 @@ title: Locus to Gene (L2G) ::: gentropy.l2g.LocusToGeneStep ::: gentropy.l2g.LocusToGeneEvidenceStep + +::: gentropy.l2g.LocusToGeneAssociationsStep diff --git a/src/gentropy/common/spark_helpers.py b/src/gentropy/common/spark_helpers.py index 4e40ac4f1..a1bf9670a 100644 --- a/src/gentropy/common/spark_helpers.py +++ b/src/gentropy/common/spark_helpers.py @@ -847,3 +847,38 @@ def get_struct_field_schema(schema: t.StructType, name: str) -> t.DataType: if not matching_fields: raise ValueError("Provided name %s is not present in the schema.", name) return matching_fields[0].dataType + +def calculate_harmonic_sum(input_array: Column) -> Column: + """Calculate the harmonic sum of an array. + + Args: + input_array (Column): input array of doubles + + Returns: + Column: column of harmonic sums + + Examples: + >>> from pyspark.sql import Row + >>> df = spark.createDataFrame([ + ... Row([0.3, 0.8, 1.0]), + ... Row([0.7, 0.2, 0.9]), + ... ], ["input_array"] + ... ) + >>> df.select("*", calculate_harmonic_sum(f.col("input_array")).alias("harmonic_sum")).show() + +---------------+------------------+ + | input_array| harmonic_sum| + +---------------+------------------+ + |[0.3, 0.8, 1.0]|0.7502326177269538| + |[0.7, 0.2, 0.9]|0.6674366756805108| + +---------------+------------------+ + + """ + return f.aggregate( + f.arrays_zip( + f.sort_array(input_array, False).alias("score"), + f.sequence(f.lit(1), f.size(input_array)).alias("pos") + ), + f.lit(0.0), + lambda acc, x: acc + + x["score"]/f.pow(x["pos"], 2)/f.lit(sum(1 / ((i + 1)**2) for i in range(1000))) + ) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index c5889dbab..e9bf26f31 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -632,6 +632,15 @@ class LocusToGeneEvidenceStepConfig(StepConfig): locus_to_gene_threshold: float = 0.05 _target_: str = "gentropy.l2g.LocusToGeneEvidenceStep" +@dataclass +class LocusToGeneAssociationsStepConfig(StepConfig): + """Configuration of the locus to gene association step.""" + + evidence_input_path: str = MISSING + disease_index_path: str = MISSING + direct_associations_output_path: str = MISSING + indirect_associations_output_path: str = MISSING + _target_: str = "gentropy.l2g.LocusToGeneAssociationsStep" @dataclass class StudyLocusValidationStepConfig(StepConfig): @@ -733,5 +742,10 @@ def register_config() -> None: name="locus_to_gene_evidence", node=LocusToGeneEvidenceStepConfig, ) + cs.store( + group="step", + name="locus_to_gene_associations", + node=LocusToGeneAssociationsStepConfig, + ) cs.store(group="step", name="finngen_ukb_meta_ingestion", node=FinngenUkbMetaConfig) cs.store(group="step", name="credible_set_qc", node=CredibleSetQCStepConfig) diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index ca52fbf04..1004fa0fb 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -9,6 +9,7 @@ from wandb import login as wandb_login from gentropy.common.session import Session +from gentropy.common.spark_helpers import calculate_harmonic_sum from gentropy.common.utils import access_gcp_secret from gentropy.dataset.colocalisation import Colocalisation from gentropy.dataset.gene_index import GeneIndex @@ -320,3 +321,61 @@ def __init__( .write.mode(session.write_mode) .json(evidence_output_path) ) + +class LocusToGeneAssociationsStep: + """Locus to gene associations step.""" + + def __init__( + self, + session: Session, + evidence_input_path: str, + disease_index_path: str, + direct_associations_output_path: str, + indirect_associations_output_path: str, + ) -> None: + """Create direct and indirect association datasets. + + Args: + session (Session): Session object that contains the Spark session + evidence_input_path (str): Path to the L2G evidence input dataset + disease_index_path (str): Path to disease index file + direct_associations_output_path (str): Path to the direct associations output dataset + indirect_associations_output_path (str): Path to the indirect associations output dataset + """ + # Read in the disease index + disease_index = ( + session.spark.read.parquet(disease_index_path) + .select( + f.col("id").alias("diseaseId"), + f.explode("ancestors").alias("ancestorDiseaseId") + ) + ) + + # Read in the L2G evidence + disease_target_evidence = ( + session.spark.read.json(evidence_input_path) + .select( + f.col("targetFromSourceId").alias("targetId"), + f.col("diseaseFromSourceMappedId").alias("diseaseId"), + f.col("resourceScore") + ) + ) + + # Generate direct assocations and save file + ( + disease_target_evidence + .groupBy("targetId", "diseaseId") + .agg(f.collect_set("resourceScore").alias("scores")) + .select("targetId", "diseaseId", calculate_harmonic_sum(f.col("scores")).alias("harmonicSum")) + .write.mode(session.write_mode).parquet(direct_associations_output_path) + ) + + # Generate indirect assocations and save file + ( + disease_target_evidence + .join(disease_index, on="diseaseId", how="inner") + .groupBy("targetId", "ancestorDiseaseId") + .agg(f.collect_set("resourceScore").alias("scores")) + .select("targetId", "ancestorDiseaseId", calculate_harmonic_sum(f.col("scores")).alias("harmonicSum")) + .write.mode(session.write_mode).parquet(indirect_associations_output_path) + ) From c5998569841160e665d75b306f0b52292c5d47b8 Mon Sep 17 00:00:00 2001 From: Vivien Ho <56025826+vivienho@users.noreply.github.com> Date: Fri, 1 Nov 2024 11:18:09 +0000 Subject: [PATCH 147/188] fix: revert distinct for associations input file (#871) * fix: revert distinct for associations input file * refactor: rename the temporary studyLocusId to rowId * refactor: rename the temporary studyLocusId to rowId in test * revert: add back assign_study_locus_id in window_baased_clumping as required for SummaryStatistics --------- Co-authored-by: Daniel Suveges --- .../datasource/gwas_catalog/associations.py | 32 +++++++++++-------- .../test_gwas_catalog_associations.py | 2 +- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/gentropy/datasource/gwas_catalog/associations.py b/src/gentropy/datasource/gwas_catalog/associations.py index da2bcc6df..31238e425 100644 --- a/src/gentropy/datasource/gwas_catalog/associations.py +++ b/src/gentropy/datasource/gwas_catalog/associations.py @@ -209,7 +209,7 @@ def _map_variants_to_gnomad_variants( """ # Subset of GWAS Catalog associations required for resolving variant IDs: gwas_associations_subset = gwas_associations.select( - "studyLocusId", + "rowId", f.col("CHR_ID").alias("chromosome"), # The positions from GWAS Catalog are from ensembl that causes discrepancy for indels: f.col("CHR_POS").cast(IntegerType()).alias("ensemblPosition"), @@ -258,7 +258,7 @@ def _map_variants_to_gnomad_variants( .withColumn( "rsIdFilter", GWASCatalogCuratedAssociationsParser._flag_mappings_to_retain( - f.col("studyLocusId"), + f.col("rowId"), GWASCatalogCuratedAssociationsParser._compare_rsids( f.col("rsIdsGnomad"), f.col("rsIdsGwasCatalog") ), @@ -267,7 +267,7 @@ def _map_variants_to_gnomad_variants( .withColumn( "concordanceFilter", GWASCatalogCuratedAssociationsParser._flag_mappings_to_retain( - f.col("studyLocusId"), + f.col("rowId"), GWASCatalogCuratedAssociationsParser._check_concordance( f.col("riskAllele"), f.col("referenceAllele"), @@ -285,11 +285,11 @@ def _map_variants_to_gnomad_variants( ) ) - # Keep only highest maxMaf variant per studyLocusId + # Keep only highest maxMaf variant per rowId fully_mapped_associations = get_record_with_maximum_value( - filtered_associations, grouping_col="studyLocusId", sorting_col="maxMaf" + filtered_associations, grouping_col="rowId", sorting_col="maxMaf" ).select( - "studyLocusId", + "rowId", "variantId", "referenceAllele", "alternateAllele", @@ -298,7 +298,7 @@ def _map_variants_to_gnomad_variants( ) return gwas_associations.join( - fully_mapped_associations, on="studyLocusId", how="left" + fully_mapped_associations, on="rowId", how="left" ) @staticmethod @@ -1106,11 +1106,9 @@ def from_source( pvalue_threshold is keeped in sync with the WindowBasedClumpingStep gwas_significance. """ return StudyLocusGWASCatalog( - _df=gwas_associations - # drop duplicate rows - .distinct() - .withColumn( - "studyLocusId", f.monotonically_increasing_id().cast(StringType()) + _df=gwas_associations.withColumn( + # temporary column + "rowId", f.monotonically_increasing_id().cast(StringType()) ) .transform( # Map/harmonise variants to variant annotation dataset: @@ -1138,6 +1136,14 @@ def from_source( ) # Harmonising effect to beta value and flip effect if needed: .transform(cls.harmonise_association_effect_to_beta) + .withColumnRenamed("STUDY ACCESSION", "studyId") + # Adding study-locus id: + .withColumn( + "studyLocusId", + StudyLocus.assign_study_locus_id( + ["studyId", "variantId"] + ), + ) .select( # INSIDE STUDY-LOCUS SCHEMA: "studyLocusId", @@ -1145,7 +1151,7 @@ def from_source( # Mapped genomic location of the variant (; separated list) "chromosome", "position", - f.col("STUDY ACCESSION").alias("studyId"), + "studyId", # p-value of the association, string: split into exponent and mantissa. *GWASCatalogCuratedAssociationsParser._parse_pvalue(f.col("P-VALUE")), # Capturing phenotype granularity at the association level diff --git a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py index fe9608bf0..147f6c067 100644 --- a/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py +++ b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py @@ -71,7 +71,7 @@ def test_map_variants_to_variant_index( assert isinstance( GWASCatalogCuratedAssociationsParser._map_variants_to_gnomad_variants( sample_gwas_catalog_associations.withColumn( - "studyLocusId", f.monotonically_increasing_id().cast(StringType()) + "rowId", f.monotonically_increasing_id().cast(StringType()) ), mock_variant_index, ), From 9295d5850a2b03bf7cff8df4f7cccbf11299cfc6 Mon Sep 17 00:00:00 2001 From: Tobi Alegbe Date: Fri, 1 Nov 2024 14:24:50 +0000 Subject: [PATCH 148/188] feat: add effect size direction to coloc output (#854) * feat(coloc effect sizes): add calculate beta ratio method * feat(coloc effect sizes): update schema and coloc code * fix(coloc effect sizes): tweak beta ratio return * chore: tweak colocalisation schema * feat(coloc_effect_sizes): add simple test * fix(coloc_effect_sizes): change variable to camel case * fix: test broken * feat: remove zero betas too * chore: stylistic changes --------- Co-authored-by: Daniel Suveges --- .../assets/schemas/colocalisation.json | 6 ++ src/gentropy/dataset/study_locus_overlap.py | 34 ++++++++ src/gentropy/method/colocalisation.py | 10 +++ .../method/test_colocalisation_method.py | 86 ++++++++++++++++++- 4 files changed, 133 insertions(+), 3 deletions(-) diff --git a/src/gentropy/assets/schemas/colocalisation.json b/src/gentropy/assets/schemas/colocalisation.json index 0bfb66816..a065dc947 100644 --- a/src/gentropy/assets/schemas/colocalisation.json +++ b/src/gentropy/assets/schemas/colocalisation.json @@ -72,6 +72,12 @@ "type": "double", "nullable": true, "metadata": {} + }, + { + "name": "betaRatioSignAverage", + "type": "double", + "nullable": true, + "metadata": {} } ] } diff --git a/src/gentropy/dataset/study_locus_overlap.py b/src/gentropy/dataset/study_locus_overlap.py index a6288a5e8..d4faea4cc 100644 --- a/src/gentropy/dataset/study_locus_overlap.py +++ b/src/gentropy/dataset/study_locus_overlap.py @@ -5,10 +5,13 @@ from dataclasses import dataclass from typing import TYPE_CHECKING +import pyspark.sql.functions as f + from gentropy.common.schemas import parse_spark_schema from gentropy.dataset.dataset import Dataset if TYPE_CHECKING: + from pyspark.sql import DataFrame from pyspark.sql.types import StructType from gentropy.dataset.study_locus import StudyLocus @@ -48,6 +51,37 @@ def from_associations( """ return study_locus.find_overlaps() + + def calculate_beta_ratio(self: StudyLocusOverlap) -> DataFrame: + """Calculate the beta ratio for the overlapping signals. + + Returns: + DataFrame: A dataframe containing left and right loci IDs, chromosome + and the average sign of the beta ratio + """ + return ( + # Unpack statistics column: + self.df.select("*", "statistics.*") + .drop("statistics") + # Drop any rows where the beta is null or zero + .filter( + f.col("left_beta").isNotNull() & + f.col("right_beta").isNotNull() & + (f.col("left_beta") != 0) & + (f.col("right_beta") != 0) + ) + # Calculate the beta ratio and get the sign, then calculate the average sign across all variants in the locus + .withColumn( + "betaRatioSign", + f.signum(f.col("left_beta") / f.col("right_beta")) + ) + # Aggregate beta signs: + .groupBy("leftStudyLocusId","rightStudyLocusId","chromosome") + .agg( + f.avg("betaRatioSign").alias("betaRatioSignAverage") + ) + ) + def _convert_to_square_matrix(self: StudyLocusOverlap) -> StudyLocusOverlap: """Convert the dataset to a square matrix. diff --git a/src/gentropy/method/colocalisation.py b/src/gentropy/method/colocalisation.py index 2867e700c..c20f4909c 100644 --- a/src/gentropy/method/colocalisation.py +++ b/src/gentropy/method/colocalisation.py @@ -179,6 +179,11 @@ def colocalise( f.sum(f.col("clpp")).alias("clpp"), ) .withColumn("colocalisationMethod", f.lit(cls.METHOD_NAME)) + .join( + overlapping_signals.calculate_beta_ratio(), + on=["leftStudyLocusId", "rightStudyLocusId","chromosome"], + how="left" + ) ), _schema=Colocalisation.get_schema(), ) @@ -379,6 +384,11 @@ def colocalise( "lH4bf", ) .withColumn("colocalisationMethod", f.lit(cls.METHOD_NAME)) + .join( + overlapping_signals.calculate_beta_ratio(), + on=["leftStudyLocusId", "rightStudyLocusId","chromosome"], + how="left" + ) ), _schema=Colocalisation.get_schema(), ) diff --git a/tests/gentropy/method/test_colocalisation_method.py b/tests/gentropy/method/test_colocalisation_method.py index d44652fbb..5b05d724b 100644 --- a/tests/gentropy/method/test_colocalisation_method.py +++ b/tests/gentropy/method/test_colocalisation_method.py @@ -38,7 +38,12 @@ def test_coloc(mock_study_locus_overlap: StudyLocusOverlap) -> None: "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "snp", - "statistics": {"left_logBF": 10.3, "right_logBF": 10.5}, + "statistics": { + "left_logBF": 10.3, + "right_logBF": 10.5, + "left_beta": 0.1, + "right_beta": 0.2, + }, }, ], # expected coloc @@ -62,7 +67,12 @@ def test_coloc(mock_study_locus_overlap: StudyLocusOverlap) -> None: "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "snp1", - "statistics": {"left_logBF": 10.3, "right_logBF": 10.5}, + "statistics": { + "left_logBF": 10.3, + "right_logBF": 10.5, + "left_beta": 0.1, + "right_beta": 0.2, + }, }, { "leftStudyLocusId": "1", @@ -70,7 +80,12 @@ def test_coloc(mock_study_locus_overlap: StudyLocusOverlap) -> None: "rightStudyType": "eqtl", "chromosome": "1", "tagVariantId": "snp2", - "statistics": {"left_logBF": 10.3, "right_logBF": 10.5}, + "statistics": { + "left_logBF": 10.3, + "right_logBF": 10.5, + "left_beta": 0.3, + "right_beta": 0.5, + }, }, ], # expected coloc @@ -134,6 +149,8 @@ def test_coloc_no_logbf( "statistics": { "left_logBF": None, "right_logBF": None, + "left_beta": 0.1, + "right_beta": 0.2, "left_posteriorProbability": None, "right_posteriorProbability": None, }, # irrelevant for COLOC @@ -152,6 +169,8 @@ def test_coloc_no_logbf( [ StructField("left_logBF", DoubleType(), True), StructField("right_logBF", DoubleType(), True), + StructField("left_beta", DoubleType(), False), + StructField("right_beta", DoubleType(), False), StructField( "left_posteriorProbability", DoubleType(), True ), @@ -176,6 +195,67 @@ def test_coloc_no_logbf( ), "COLOC should return a low h4 (traits are associated) when the input data has irrelevant logBF." +def test_coloc_no_betas(spark: SparkSession) -> None: + """Test COLOC output when the input data has no betas.""" + observed_overlap = StudyLocusOverlap( + ( + spark.createDataFrame( + [ + { + "leftStudyLocusId": "1", + "rightStudyLocusId": "2", + "rightStudyType": "eqtl", + "chromosome": "1", + "tagVariantId": "snp", + "statistics": { + "left_logBF": 10.5, + "right_logBF": 10.3, + "left_beta": None, + "right_beta": None, + "left_posteriorProbability": None, + "right_posteriorProbability": None, + }, # irrelevant for COLOC + } + ], + schema=StructType( + [ + StructField("leftStudyLocusId", StringType(), False), + StructField("rightStudyLocusId", StringType(), False), + StructField("rightStudyType", StringType(), False), + StructField("chromosome", StringType(), False), + StructField("tagVariantId", StringType(), False), + StructField( + "statistics", + StructType( + [ + StructField("left_logBF", DoubleType(), False), + StructField("right_logBF", DoubleType(), False), + StructField("left_beta", DoubleType(), True), + StructField("right_beta", DoubleType(), True), + StructField( + "left_posteriorProbability", DoubleType(), True + ), + StructField( + "right_posteriorProbability", DoubleType(), True + ), + ] + ), + ), + ] + ), + ) + ), + StudyLocusOverlap.get_schema(), + ) + observed_coloc_df = Coloc.colocalise(observed_overlap).df + assert ( + observed_coloc_df.select("betaRatioSignAverage").collect()[0][ + "betaRatioSignAverage" + ] + is None + ), "No betas results in None type." + + def test_ecaviar(mock_study_locus_overlap: StudyLocusOverlap) -> None: """Test eCAVIAR.""" assert isinstance(ECaviar.colocalise(mock_study_locus_overlap), Colocalisation) From 9237d73edfcc108665f44284a733f154983c303e Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Mon, 4 Nov 2024 14:44:02 +0000 Subject: [PATCH 149/188] refactor(convert to vcf): allow multiple input sources (#891) * refactor(convert to vcf step): parse variants from multiple data sources * chore(config): align config * fix: pass write mode from session * feat: allow for 2 columns to create partitions --------- Co-authored-by: Szymon Szyszkowski --- src/gentropy/config.py | 7 +- src/gentropy/variant_index.py | 55 +++++-- .../credible-sets-extended/._SUCCESS.crc | Bin 0 -> 8 bytes ...-b24d-7ed798f8860f-c000.snappy.parquet.crc | Bin 0 -> 560 bytes .../credible-sets-extended/_SUCCESS | 0 ...4437-b24d-7ed798f8860f-c000.snappy.parquet | Bin 0 -> 70519 bytes .../credible-sets/._SUCCESS.crc | Bin 0 -> 8 bytes ...-911e-4d61475173d0-c000.snappy.parquet.crc | Bin 0 -> 168 bytes .../variant_sources/credible-sets/_SUCCESS | 0 ...4bf4-911e-4d61475173d0-c000.snappy.parquet | Bin 0 -> 20452 bytes .../variant_sources/eva-test.jsonl | 50 ++++++ .../pharmacogenomics-test.jsonl | 46 ++++++ .../variant_sources/uniprot-test-sort.jsonl | 9 + .../variant_sources/uniprot-test.jsonl | 50 ++++++ .../gentropy/step/test_convert_to_vcf_step.py | 154 ++++++++++++++++++ 15 files changed, 358 insertions(+), 13 deletions(-) create mode 100644 tests/gentropy/data_samples/variant_sources/credible-sets-extended/._SUCCESS.crc create mode 100644 tests/gentropy/data_samples/variant_sources/credible-sets-extended/.part-00000-aed9d229-0baa-4437-b24d-7ed798f8860f-c000.snappy.parquet.crc create mode 100644 tests/gentropy/data_samples/variant_sources/credible-sets-extended/_SUCCESS create mode 100644 tests/gentropy/data_samples/variant_sources/credible-sets-extended/part-00000-aed9d229-0baa-4437-b24d-7ed798f8860f-c000.snappy.parquet create mode 100644 tests/gentropy/data_samples/variant_sources/credible-sets/._SUCCESS.crc create mode 100644 tests/gentropy/data_samples/variant_sources/credible-sets/.part-00000-a9a641da-3820-4bf4-911e-4d61475173d0-c000.snappy.parquet.crc create mode 100644 tests/gentropy/data_samples/variant_sources/credible-sets/_SUCCESS create mode 100644 tests/gentropy/data_samples/variant_sources/credible-sets/part-00000-a9a641da-3820-4bf4-911e-4d61475173d0-c000.snappy.parquet create mode 100644 tests/gentropy/data_samples/variant_sources/eva-test.jsonl create mode 100644 tests/gentropy/data_samples/variant_sources/pharmacogenomics-test.jsonl create mode 100644 tests/gentropy/data_samples/variant_sources/uniprot-test-sort.jsonl create mode 100644 tests/gentropy/data_samples/variant_sources/uniprot-test.jsonl create mode 100644 tests/gentropy/step/test_convert_to_vcf_step.py diff --git a/src/gentropy/config.py b/src/gentropy/config.py index e9bf26f31..82ead9ed0 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -506,9 +506,10 @@ class _ConsequenceToPathogenicityScoreMap(TypedDict): class ConvertToVcfStepConfig(StepConfig): """Variant to VCF step configuration.""" - source_path: str = MISSING - source_format: str = MISSING - vcf_path: str = MISSING + source_paths: list[str] = MISSING + source_formats: list[str] = MISSING + output_path: str = MISSING + partition_size: int = 2000 _target_: str = "gentropy.variant_index.ConvertToVcfStep" diff --git a/src/gentropy/variant_index.py b/src/gentropy/variant_index.py index b50b470b2..4843553e8 100644 --- a/src/gentropy/variant_index.py +++ b/src/gentropy/variant_index.py @@ -2,6 +2,11 @@ from __future__ import annotations +import math +from functools import reduce + +from pyspark.sql import functions as f + from gentropy.common.session import Session from gentropy.dataset.variant_index import VariantIndex from gentropy.datasource.ensembl.vep_parser import VariantEffectPredictorParser @@ -29,7 +34,7 @@ def __init__( session (Session): Session object. vep_output_json_path (str): Variant effect predictor output path (in json format). variant_index_path (str): Variant index dataset path to save resulting data. - hash_threshold (int): Hash threshold for variant identifier lenght. + hash_threshold (int): Hash threshold for variant identifier length. gnomad_variant_annotations_path (str | None): Path to extra variant annotation dataset. """ # Extract variant annotations from VEP output: @@ -64,21 +69,51 @@ class ConvertToVcfStep: def __init__( self, session: Session, - source_path: str, - source_format: str, - vcf_path: str, + source_paths: list[str], + source_formats: list[str], + output_path: str, + partition_size: int, ) -> None: """Initialize step. Args: session (Session): Session object. - source_path (str): Input dataset path. - source_format(str): Format of the input dataset. - vcf_path (str): Output VCF file path. + source_paths (list[str]): Input dataset path. + source_formats (list[str]): Format of the input dataset. + output_path (str): Output VCF file path. + partition_size (int): Approximate number of variants in each output partition. """ + assert len(source_formats) == len( + source_paths + ), "Must provide format for each source path." + # Load - df = session.load_data(source_path, source_format) + raw_variants = [ + session.load_data(p, f) + for p, f in zip(source_paths, source_formats, strict=True) + ] + # Extract - vcf_df = OpenTargetsVariant.as_vcf_df(session, df) + processed_variants = [ + OpenTargetsVariant.as_vcf_df(session, df) for df in raw_variants + ] + + # Merge + merged_variants = reduce( + lambda x, y: x.unionByName(y), processed_variants + ).drop_duplicates(["#CHROM", "POS", "REF", "ALT"]) + + variant_count = merged_variants.count() + n_partitions = int(math.ceil(variant_count / partition_size)) + partitioned_variants = ( + merged_variants.repartitionByRange( + n_partitions, f.col("#CHROM"), f.col("POS") + ) + .sortWithinPartitions(f.col("#CHROM").asc(), f.col("POS").asc()) + # Due to the large number of partitions ensure we do not lose the partitions before saving them + .persist() + ) # Write - vcf_df.write.csv(vcf_path, sep="\t", header=True) + partitioned_variants.write.mode(session.write_mode).csv( + output_path, sep="\t", header=True + ) diff --git a/tests/gentropy/data_samples/variant_sources/credible-sets-extended/._SUCCESS.crc b/tests/gentropy/data_samples/variant_sources/credible-sets-extended/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/tests/gentropy/data_samples/variant_sources/credible-sets-extended/.part-00000-aed9d229-0baa-4437-b24d-7ed798f8860f-c000.snappy.parquet.crc b/tests/gentropy/data_samples/variant_sources/credible-sets-extended/.part-00000-aed9d229-0baa-4437-b24d-7ed798f8860f-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..751da3705151000702b9f8929e42b4337b71aab6 GIT binary patch literal 560 zcmV-00?++pa$^7h00ICSBDeX|e{(}5>O(Iet8q<=i7_e?%kVRy&?NRlQ&)+c1&*+b zny7}BQy_sjYu;!$l$-B);j8O3t%_8P!S|{WK!7Tr?H!mDH~PI)vVd}TP2E{q7U+8} zcK0|^-fNc(r6Y^^397G}4Yx5HgZUZfQtI_)Jj0v~j+#%Y4%`l^AF%B5zSV;5!PBY7 zCDbeFFrRVN$!oBn)I)b zojdO(!8O5%$9)eQ2b!Wt z%NI{EzaM(YcLW}q&nwJ2TiMOBk&cC2Vif91!OoLU63!1TF9;D}$ZT6>4b#5n&!6)B z0E>Bw)3p%6gT?M4^rtGJrSL@r{1I(ESkBLyCJ9GFVhU1io18sH9w!B8CU)J*X*56A ydX?$A7|q7Kw5TXZ3;g=3_)g}9Tu)F&juRCv^!w##CcH{05GV`I7{MNavb8TQ>kJbB literal 0 HcmV?d00001 diff --git a/tests/gentropy/data_samples/variant_sources/credible-sets-extended/_SUCCESS b/tests/gentropy/data_samples/variant_sources/credible-sets-extended/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/tests/gentropy/data_samples/variant_sources/credible-sets-extended/part-00000-aed9d229-0baa-4437-b24d-7ed798f8860f-c000.snappy.parquet b/tests/gentropy/data_samples/variant_sources/credible-sets-extended/part-00000-aed9d229-0baa-4437-b24d-7ed798f8860f-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..634dbbaacb582279e61a75c216b1bcba8e85d1c6 GIT binary patch literal 70519 zcmcG#cUV+O&^LMx%-{?T;}8ZJ1qB5}!f4LZgaH)OD(0Nmv?k2D z=A1EyHRr4hf{1y|Yy4{5eed1xKKGA%pXXlmP;<`o>F%no`c-vRlVNI2>k0jon))(> ze)OGDaN;4E?6)CGsF-}g9 zShCkJJI%?WOxbCfg70~m5-D1gcwXi>&cO*zGJKe9b&3v)1zxa8KxLY>TP#+Ip#=wT zp#_EvR|!s*+?4I4B!RV1f+WcTD>1BTA%PS`i<1_t&_c9GqD7!(hIcqH>}NOgC-Kph~-3wOmh-v2a$4|jkbt3 zR-y#T#yiOH0#M2A(Btvqe# zMJMC5N-Qstw}$a{iQ(+5!^y(h37}W7cs53$Mb2trV1?c6O0>f&*=$x;;%tJGrbGr5 zi>6`T9AjZ%CM*ML;t&PdP64Rk;fU;T^^bG7>=eG%Gt;rxUaTbdnAm zrH}-&R~~Jbz}7?-7-kuZRTM-PRFSrVaat`7b*}=8WMRQ(9AIUXLy&pVVV7jkBnc!Y z3QkymPPECun~jlJ3kZ~Vfd0 z=g~54cZi^sHm5_D?3~@kOS~1tVV4ApWnw^qLvYY`fpLPi!%S?TXV4Q1pyQM*cDn>R z<^WaV0rl*FKpgPOQ8ufCqF7KeC;W=IR$#GE77GJCIz`#ZvUZl^!2o&QVWns%=y1bc z3@D8gERF@1Wl^-yvS4+{0vlU2wa!&n~7Nt9?_Vja-T0TgW9Ny$z@6s!VIh7(d@ znw{ck&LPWo5!7GcoFG+3a)L5*Ohl-Hl1$2MS!S((2NdOWa*PBxV*%b|&=8QhXtz@= z?~niuCD1GQ6VI%-RJLpzQa5@VRC`wjf++qiav(l`{hk0ozyVG8DiJVd326Rqh zpj#&_rV{{4lo%&~g#^MC1-G;S&1j0^9I$#KFE}|sbrt|two#0OkteP!plOT1F$@hL zCD2wo%{y$c=HNhW#IB=IKL>>aI&AMkP;N z>(Q*S!W515@XBGW{#;dk`S?Z0760*p%c^Am^Hw$Jo;EWS4LBkQjM-#PRr^QLJV*09 zC9>uuAOBd&3|>o+WtuW)>HR602NMDbn^Vn6Ix=0N%`^q847e|vlgwGl7U9q#7{0^` zGU>0Z*N`#00S>U57tC2^lP}qT367;Z$0g$c@{J~Es)vmDqtE%N`|Ff_%N2?FI2qirq?E~i;f$GuRsuK#-%FvuIXF6zW#r%}hBC7} z&vPP2`*wBT35*1qEi%3zq8N~&D1yIZc=7`|FC6YU_?ibzU8Nio&OoOW$AXlVk5s_~ zLHZ|ylK>0{!6lheQ%zY(NvR~Mfw}sVp%|noQj`o9W6pw~RCB~Dvbv6E1)2)!7YdT4 zMK}JDE+Q*sAlXPmvplCv1j%ba8911wDD&jLh{Pz)48|)1bchtWFD%F{Z5BL$$nbO! z!IKd>2?i3C57TwWI>D&@2$l>{DhZDC1`#L;fmAjW-5pBq zU$XmG2Qa&IeTLq3a@1Gte=Id}&ObJ%^dZKC9uFEb$9hgyty+1l)0CZ$?+w+Rj4EF= z=SGhg2jTT8p87eS`l%})EKvW)YEP1{PyExxhtFz_Nuty@M(t9}d9+=5wQ+y1Le*-r zyJERlhPcsd%!XrLv#PFDJRNhxv+@O{7_?a9al=(f{&qQv_}*d=Z(z0#v!&uNPOb}MPS(}C=v z5B2z3#fLC{V%1x}TqA(RWjg(*s``V)6CV7O(Z9j@4Ale((Fs4Y>i08ChmHt>&$jOU z=>&+T!Lfj~#6ch;E4}r>fFCRcaRW=m0amHKiRwNO)Fg=R#TO+u4y2A*r8xFygHRa39VN%Hq_+mB-se-4kN}(kFKisER{25@o&f$79GyVL zF*0!DN4{$)CU6`tfKA5n5XlRS9+*o28AuR!i*XVIqoq@F0#8XYAU74q!;`o_CEyJ# zQGifQ03nEKZy%9ks048HKv5L%35;GXh_&e#MIPzsuaO$dl@%GaH@PSsbbw2MNyr>8#{s-j;Gj%R8p2}`B;Y6nA21B3R);NX z$b-}**s_Q+u*S=o%4)SyF0Dwz5 zP6S?jYEV)F13wVT%5e+~VyF=g(}oQrumW=NsSzxL?p|XV;*Sfmea=)5}pPT zJkJ1n}6DQjH}D6X^t7 zuY5D?Gi2HvM~*WbjqT&;KhR;$%dzHIdKpG%!P0>6jkE}1z|UX;+=vW@sd4XUd9RRh z$$5?ec{u~%Yp?)t%oN3wY7H;bqIyIq4I4(0Hpghlqg#^NWI>!j!D0z5fwRnfi>HwUG(ZD&eu+Qw%w z(~1ay$Wf#@w9ld(x_YP^LgZN9}ozOWL3B*YJ>4_`!P z+O6``fsS#avIjx8^zjVKQ*m5`p?Q$m(jU}x@bH&8LK;{t$R!pyYh&x)JI7&n^s-DL zq#{LlR(HnS+tE9>|KN~E&^_=Ma#Tk#5P605%dfvU$YO*zE#YTe1~{Zvs{r1|&IyG8 zQWDH@wFqXgl4SYJ06-GhrX={0E&kT!!oUR~F&YlGNIA)^{m1|ak03@e>?K&sBjaRJ z2PqMXHiW`}lFX+RMNCaeHCB44Y`FuB-;@nO0cgr?819%3V#y;KA8+!T>50OWkyw*V zACPCs1Iyz8ecYi{f;)7hLjq*L8grasMiA&8giUyEs#iLI9*E8e2Ih|9+=$6BK57Ms zJqtGN06<(_kV(3uZ%)s!zabjVD==Go<>uPMerX!6gJ_nghdW@BGAB1`5XG=EU`HCf z(9(nz4jb8&5bCp<-WFT`v9+2!^TL2Pf?yQQ@DJ>t7I+`P4Zwmf0TDx(BcAEtc6f;~ zvgUuIKG-{%m1+$qN|ADyqMKvv45C-#Aw-Zn{^aPUgF}uCjJ4(V0!SN>)2(-Ij`Hh^ z&l*;w*{$g8E(<$DA$xv)X^q zxX|<6NC^A@H%aC9+8)0Tc%9z81YM*x4}O3~S(%#DZc?M7$H(aML3?~60anqJ{&mlWTOzvXkcoeJNWvFFs4@%M`( z$sRp!cY1^N*0}xK+?l1tx2h_8RjGRiHGRL>JSVga*&ln%JGJm)@u*51>UXBN_|?X= z^o*qP;^&!rYK?t%1jQDuGx;r_Q#^h35avhs=fyiqYkDu;xE0AeP7ge^bRi1-a`xq! zV{eNM8>H)wnbyMp{0+@QX)$>7t6UmX|@lAXxY78uQEqmLVh_lM=am=vba_d{*+OtwD|jG z{q8eEXCgsy%5m@7t>S}khChn@WqfhO)|K~$PlJhH`TKRO={B+rIqFv}>Q*tYelVkn z@&VF$Eg4#E-h<-kqrRsIpB2T!qAt%mb@fJZw?*UD9$k917?x+4{#X6X6&tAktW9Ep zH!O@N))7@mMUjqpGI&YF;3fX}(ayLT2UZ#nCz53MLqUh-X^*67kAm=*fO(oD3H9*o zeeu&bs%jjDPXCF#zZ_jV@BcpXp5JFRJ~MC$(#CB=^*fKiQ|EQXeAijH>3Y3M*WnE6 zT|t^=zBq^Cw#~%D(Fs(3sTn?76k_te$>Qq;HBE6LoAH|~-w;8RV1m$@a{YSZg$oSu znZO^SM&cc($Dyz53X$T_2|T%1Jbw3W3<}#)imTrqg&Sv2#Q}E~;U%vspwIL0&Pa6Z zy@Z*+y5LjIH{+|X-r$ve%W;I)dt5V~$CZ(Pp(U>#;<3+2Q{(Iw&}KGh}OEA@qQ#)kh#J1%&^CBS>@1|b~^IQgSPFv zKHw0V)9&WiHp*S7*U?ABpx{A+AMq1(4VS?qlP9lX#m*ATfKO-}1t@w(F*wEWlV zyPPpA&|{V1^!BrxK@Et*CFOJerFPVy@N>z@L0eI^{PJtfK4qY<89B$#C2mJVZlU7D z^zBGAv>n$^Jc?SZICCPX!)Ek;?6VF1W^O^l*Z%ryp5JYn{AnwysDD6zb?!2>f8U`Vhlg%KM}y~! zrQsXU)X}R)R@=J+#r-&$G$wox>bq+%-NnXHeP_J@p8*F2T^T2qUynOrM&|bN=`~jg(2)ue* z`u26$LkH#LbloC6NeMo{d0SV+VcBD z#A*-C%k3N5Jtr>=`fg`Ywp3a(oVodZ>@bD@ZERpRL7%8!wfpF{pANyT&yb%B>H80} zKJa?gw)tJ!e$3B*fAhYM^iYoZv#whP4IytgdHcjkRT{z@Ac0!pZvW9HzfF!q?W3kr z;&S>p;W5!S$2I^4iPFdFpI_Md6R5Hd${`!(g*9$uXyD0 z__uFOtUCwcP9?fufZk#u?=kMliyJ=wF~PvB|L(Fqst`H?6CSnP%_k`JhZXw5zrOAG zuhTp3(fz+~K+ns#Wl0$5$R!yZsZt&8-bSrbW!`B>>NF!4JmyJNYy@2R;o03^>;fNq#D_Mx-v~e)cZ3UnuU5bYYQ(ElO7BByD^zd z+dNUJQ?-BJ0^SY1QJ@Uf94`9Nozy9owNv9*{AY(4>~U;TyY4rA$skSkx0@oV(k!ce z!w7xgCu?c^JQCT9r)#0-!@CZ}!k5`qFJ{7*%jO0&!Q*BHUXD+1N$O%t?j4K9ALb-q zNL(1`o<;aD2HN6b)de7-<6qP6{xXM0Vc(=79?lgir7qSK($XyeB5r>&hK0cJ@9@LI zY1)6)Jd_}mr2REo8-Bh1pomwfb3hlvrf)CNaec&z{nwtkO(z_ddh@MG-~YSNe#!s4 z3-j>QY5(p0!=hak0NFo{=zn^zym_PT$z5V$ARquqpwZ5JH z&5L_e|GVV%m#Kj{w=Y$LFAaTDI>Rf7NG7t+`H&gQlNH0%z1349XSfy3tv2o+Ud`XT z%KJYk@PFx{^~p6}n+J9xrVa8XUq4M%+n!Xee2O z7^NhYC1f4=+K*6%gL$OQ>ZI@tZ91#30_lohG|dUGr!goK~T)1;)VkZ2u*<&fc;NWkuaViOh-I@X=kNw5*R zpP6aqD8$E5KZY>Z2w%ef8sgT?goYOwIYLY5^ow*XY^xbahG!XW=vXNIQV9cKQwbXu zbCR*r-5*q`gmFX=q2+46NY&DVPvysfxKpBs2f34Y;H$2c@y>pGSg@s>92wekaDx(CTSxhi6p)s8Ksg% zZZLvWt-lWTMUY6d5M%?>E_yvVUIU}>!(+pADo9B=yyD8pNQ)}0tWmgv78(PmkTQpj zwMW=xUSys_@C2nlWa2rg<8r0H&Yha!nLh|<4oOL6dPFUFfq_aw z*zmZSWPrP4SqNKgTmbuDNOKzQXU_E>Xf`fAYruu)n zhofOqW4W;h)&3fH@g!omA}gu>1t<|nP{)%C!wHiymsG*w0MlOgszZ7(;({R)1VqW? z7Q$pIam1@LRqlH18K@wvh8j<_29(gWb%9c^fg&QUT%m8E1Bpn2X|=-SXW8>}29S>^ z+Mr6!G8yW_e4uc!po+4-MutpAnB91aP|1R1S#9(uVA*6L*8@6$8jsT7utf#g2}=%< zx|LL0RIm*;9W=HiU?5ul?bJ^>f?kgYU4$%bVVg!R-I5;P5bRb92LH1h!w9k%rX`XL zei}%)3QRbmVJVpj`w}7$>=Bg;szD)wg66}LjA?{|;Vhm8_nsHdGFZmz1;|2CqN7EK zpF)zuPQoivmmkU<1ncUe0Mi~psK7{59EKGd7EV`;HR*p*Fnm~3MUolb5SWyTklrfD zE4t1Y!Zl!zbYYZ=Ip33C@2Ny+fH zb&$n`^DpThAUy#}`=$>ee?o>r5W*5eWDV5KIT+JzlvJJZYV}M=jtDGwivxEbX_tDv@*~l#GNKxr^~dqscA21cCyTuv`t9pb?>LaC98h=6`n93w>d_)Y-fz@ZN9-`)z7sYVZt&k_YEj=dd{ z4@k$cBwpzIvvzbd1=+2!B@cz(4uy)DsYKf zT~7mtrG$j$Bv6X36RY-#RM1>P6NL`4f{^G2SOUG|X+N&&f+nDI9K&lzs)$s1lm^BV z9OaD!vSHXCfRrFXnooG^pL@+W*(mwJIX$g7^so)}$p{1#3`T+5Qiw-bEM%+lRq!Rw@_Xn0ZD>D zVIaK+WnvN*1ajs^gaO=aQjJ6(uqUYh2$Hsyy5njxo8T?21H-B*>#3l| zt+s@0RPd}TlE?~4IHDjV3nEN@SH0B*HH0>HR}leC9n#71>gps{14jH5{nI9$jz2^A!@hYC)r1vd1RLlvT=&G89as|WLUmj;T`++G(V7U?HM$n@Avey^)W zoCnLN!(0In+9YMw?Ux!0RYKsvY=U}vYNCQLej$ROI#0oq`>aVt&!{fO)P~7p5-uBt*Xee^VdXq5|*iw^RGin$wcyp21%QMNDlwm9dEj z=nUkywFCV^BEf|TvUY@ysqLWvJB@@81;E;`g;sv-7LE|OxT;~k0vyi@V~n>BDo>)A zaEMUIwDztFtdL+$v$cudzHX3&YG!03-}-YkP{8NONW&k1%8V!~4{57aQ%2V6UsaWr zn$<-Er~^57&hM1Z`xP3fn#q2VY9m!eL5VePs_QhWy6Uv46wtkvmkNsTE=`v@ZnYL& zXNM0j%2oh)?3Vk1Jp4TUm>+}+`uCfn^2_w|IZ4oA%|S~}sS0EL9(p$#pn~#GxsO`_ z0t_c8t3r++>QFvav`-;n&1*Mkx~v0NDluB8Dv+**2{acm!0^TGY5`e&su}>Q0pKGJ-|nP?=$)g&vQ=3obEB4_;QC>UVltV8mDM0@ z<;21&AxpI(s<4v=CrN5osTLc66^Xu86j z*Y-xPoS?6Ag_3cWjkU?}OxYa&(O+FhJwwVGgUn2&ZvfFV}9Q==VK zjj2j1Xhs{XS@5jZM&H2AzWPQ0(xPP*Vd|2ika+Dcx&&Gg40ThM2J6x4bR91dl6HP| z(<8$)cML%*_bIXrw1$w5SQNZjyT>Q&gd)qd*rLt$afjPUA?*O+n^Yy|7}kJg@l@Ch zgTJe_-%@|WFc31FKS-_y*bN>PUNFdR-)qomzXs^d5SP%j-wcD_y6S~pNHf7`H~T6; z-5dC5tLO+yyUEXZOa&VN(Qi*kWMNge|CdEZJIdR*gz(E$fzMoU6WmN-3qKH!VVO>X z;~|pY6t!U)1lE*ajPKt&K|VYyktdDXs(|^7HbO6Kg;EooDA9h=8d?!zQ7hMVZW0{}V#Ri?`XlaNlRU`spJFJ!Dca6i+@ z@LAi>oBRVn3m}7*O^g9Hf^Z7duVb~2DG=sBaW5>?;1A^{f-meR|E+>;t+uwev6C7k zAh3S5LkyAb07rGqe$7o@~sr9u+5p=#!Zr-GpZf6*Go0(kIDg0Y%HqH1ldy>mJ^ ze%K>H4Q;UwoShr15)@!daI%D{?&($~j*A>$qv=_QA_J?0Gy>@Nc4Zi@fYax-rfR|& zlHj#R^+al5WRNmFB|*DMMPv!(z69rYtGclf0U}|zgx1x;EF@-Cj$xwqotj|FpYv}u zBAo5;EAtCkWVxfwRvBafSf)IJFh@qHwA(aVzd-PV13fhW$Go<&w_m6s*nRp7%D|Fq zbKo@t>8E{J4Pt=0M3&Zlr-7UaudMJ(hR-_AeRh^i(}#z{kqX%Fxw9sa0oxLz-xi~v zvh>j2Y7OZ_knb*f%inKKT-5m`I`;Y1#tyA3(TR*9w{8wEM;EWx9^WeU zCE8;7BV|4L5=Aa$>%Ly}0!6#_|Drwf0x`8d^jrP>1*-hz_VF%%y+H2j2GlQ5%hU7J zS7Kdg`s0$g4;L#@_Uuy)?d{5t`HUCe^N$zk+NX!L7G8dVmLUD|kNTHLh}~TM*qs+> z-S1z!cV6)Vm0UCIKehZhDv2A{Q=IS;-8gtMW3H|o-BQ$T;J4!?ikP+FXhD@1sB_w$ z5#67>Kp(#My?Ec#h2*fAn>?RAM@ML)%kA4Q(9uauh7!_ z>62Ftc!i$)%isE!AFEy6QGpz<_AY(f)P>wxHiCGKx_xUrBq7m-_Qqdrd^7zCnyP6W z`0I~yq^uLVTwm`o>fks}^{?YXPi7x$_`0|P#kPLz^W#zpDxkJyCgT#ca%apuiF}L_ zx9>K6t5$*@T_!ZGN0gxFH6q)ET3?_>IJeoJAtk8kv3hkA{wPJ)yIYnRy1hc~l&5?A zSZZPK(M`+I=)ZmPCw6>=rm@+jt$LKZ%OgL(@4WhR!>13Qpn(G(7hd1?3LU!KU|aeI z7h0^_IxK%!Il8+iutXs~LKUy`7xrx8Laz%Sv5wdhG<8tNvFocqzE-7bQm!tR3B6p2xUXB_p*%ANwdKqf9-pZ9N1o;b{<#9EjO51+V zayGUM)jFYQv1?2jns}>CZ)H&#ioZv{Jvg=u?Ra5-|KxHxYFzPqxFPTc>(a{fUW2VOlsR;SyXQZ!-)@B3|L z87kj>Da?Mrh4yq=miXn93u)yo4MRbn-rf(ifAcOyi$9o0azo2d@MlAN>xWA8$n*KK z!Sl<|@uZfgdK$~nfv4}5G<#Zs-sjaF6yqvGC9US-zqXa3#y8_;&uCSKHbm^ye(CTW zEgRopwJ4OMuDg~Wc@N{Yt6u;4eOoz__iKNtEtjF|&-XXj{g(^<`E<5}u;Pmu4<1@^Yc$7mj~y|Emi< zK4kcuIj9t6J)QgLNV{@Wxo-BCrAaRIyus)d>zb9Kx%UlQIzO*KPj7E%cOt7C#Rb0{ zK5ADbBED>y^R?R(G>Saa^vU>2baQL8q4-8AI?%M{j=1@i==;@a@lQ-H6dr-SGo#AU z(<{f8FO*xnO@~eeX@h(_ozn+6_`Z06g2}fA#m=a&#}S z-WR{|Pf$u(pH0e(73gfOFEt-iWhhFKzb*4^1xj8LVQsgp6h*e5;(Gh&5pt+*2fYD2 zj`@6}(Sg4np%Ow~?|52*ew?}+d+Spr+I;KfzU?2%Q0PPF!ibSBc!CUH?*~kH`UqJA79<=4J9W86-9K`-0__;ydg_O$m*}M5l92&p z%8@v=eA4X?rRd7q?JMLzOVRC+4NE2kmZCmZTgFGnbF^HUX?!}b9J#v8Y1HX!1=_-w z&KQ2wg{sHCum9;_Im%tp=J=laE_6Dlam5H2?_BMW^3J&~l-c(VS%k|`xbeyD6GO^T zMB1v|qh^;Nk2k~58m3jElgx-`AG=ke_Ah?-@akTUI-fm%@`SY<6`eTn{)e_49rS#B zY1EWQh9OKpX)aX1 zZH?5{gUeAuU}kf}`BJ2a{@ngrb~)<#dT{>cw&kc!Wa&lygmRRe-|g>Bzq!yy-KF7+ za~`3CnSGZ=EOnvK)IPdqGhL`kV8*ui?=CdYkdvrNC`ZeJP&FUG>u#xAkKVDqL{$lr={lp^a(NVvc@xq5E|LpJbk^K;y0(Mn;349xTY*a9}%( zyJ5t#6&K3UC#QPnFpy90*uPrTxaUI0J_hr|LKo^;O{-kms0?jwFsJt~o=?#5^z^{< zVkv6Yao@fRLrPJNviTd2iH_JRQQ@}mFU0Cfr7m=P-t|!}&X*yz>2A*YdX;GBQ@%|RcA?RGCjZf`{1NItZc4zB+b-1LQ{C8E z65#x|%!GZlK<-_BDO|Xy41GKQK$RZkLS5gj*cu3UH%ln$Rkj^)DZlblz2C}Ep+eqj zed|L0OOGvFoe%i>q|T=gRZ7vW9mb7&);vY4tldmkE|sCpRK=r%Aom~1$F4Poe$PdT z%jAr5bn?dMEzP#O(1K&l>NsY(P*CK}(#s7?(T~Xk7be~*Lk~M-dQS5yMdY13gSK}B z|9kqxye%Wk5t~<{Wv!RJylx zkbB~k%1<6E%g}TE!Ti6wJVG5`M#eS)zjS5Q*+U0wK1B=rvYVfPz3Wy#Z+UJ=C2H|- zUuL7_Wyng_UU=~EQxu~fA35T+3oW_V#=~&^5gOAg&)5054DB(Vta7(4t6ALEqPd_vA5&EMjVesXdY1gz%aQJqdF&GKtNSkY*zGZ= z5mfN%uyb z%yLK7dB(29Fp`LzGM}~+bLatHEo$&RZ|&&kS>m%uBFfGcwK)Y-S&?TJNv))CN^}~=DfV#@host=Y0eJdRqRh`j+gY<9uVB&x5wv|C~#E zUOIQ1B`z&*>Pb!Q#P_@jEYEFmSLj3R8~nw~xM)v5;`DFqAZ?cf;^~d8Ju6;@94L76 zm|WKW;)6!R{(kr3(annw5gZ)UhO3q>J2G13AFbFjBMkO0A&FyxGE0*u>EpT{pP%`t z$+VD0zbXu^!$-~}(&lpNlJGz05Luh-xBUoW9?|val=}XM(--o+J@!mw>k*3^4F2%$ zZ-3&_GGdH2qSLjgkt@<>M|b&i$GWR)GFCKGQor?R`$v0&_56C+D!B{jvlRNT@Ha7M z{@9`k)}QqIpZfVkkNH+;;CUke4mEze;sKTOV5p!+0$oeOhhS29XgC9R2S}|Z04P~s zdIMA6usQs1n5=cS;Cjdyz*B>8W z8bT=dxwHTI@%)Fc@1*|jtp!d0|72^z{{yzZdl2xy2`HH-7;Q-t{nIA;4YQW)gk9_N zpGP;g)P1+vH)iU(-k%DRa8}I9#Ut)C##H(os!5&3_(a&?2b-I4xRAel9 z*@wdWglRnbmTeemCM%?sBALd$(`!u|E7G{6frc3r zL*rGIohl?>8pn;P3%O<(*U%wo?nesWyEa?=@tDGkZ&y4w@1gMH(T*)7&)sU$(b9aPh3k$PKcn8~cswtJQx%uT;|Hsb*Em@gkB@RE@*drZ$M*cS=MMZC4|2a$ zdH;AkUfJaF{DNii_+({j^6e?{c>n$_^L@w12Z&8weI`ohKIO*dM_H;a$QLuc%KQ4t-FZ#FzLKS~#mbM87{57fltmT848 z##f2Qvk%`f(eI2nxin#h=UXFwwc*OP{8vU?+jmsgdZk8eTz~Y}-ItBHuugCRdmf&D z9MjG7h!Nkg9;)J8ZNzDXUXN?fHR3^a4jc2Q8}YSO?W-&sW5g4m{ISqE+=zpxZ@+(} zz=$_G3lGl7H{z47BYzgOG2)Tx!iB4njF>3Td0)&KaeVfyvX7BQJb{l0t`lIyqbo*E z$qOz^E%%1=2kYTyt~NjJsRrU##`3-O z3p(OOCxdT1&FzSvej5DzD$r?Oe+}9M^m@l;OBVp0H*C0NCeZh{@{+~@b(}b}HVG)< z^d51lhl~x_<-d2d%QY&-&xQ8O1+UDc0BZ00MG}!Rh z0}+or8ENlyOT?3#_;sc)+}G39X;)4@v?{?exDh)`I3m&wo=~?1G;ekUTOYC5s#SeTr(PI zqtkWQtAMuNc0Z5=+F7dg@W%xa7aY!L*yX&4vy+^BW1#w}%?~#IQ^fa+b?@Gu7V)oP ztu9UhdeOJm$Ns0Fzd+rpnN1V$33mIWyUrJ9*uRbaohkOun%~!*5^eZM#iQ~A!@Ws0_#E*3tMo^7#jl}#G@!ADrC$*9Q;?$WtO)=;3gy2BaTNem7?@HBSG2nHW| z`TfwiVGP#S;hx0+-MXVPpa#&T&!z`H9LnI`k5@jRhA>#Q;PrZ zK2ZA|S4poP4379%FL+4~gWn__ymYBMgU>M!=kIkgc=Y~@{ig!$9kRH_0tbT~;eOQw zpkc}Nwz%3d_{E5`dDR;*xZ#b8D<2|39)`5aMnKz6IhayY$zb_Xr#bCk(YX66+CRia z<2F5y&3Su^#$WSWIkIZ)9RKEy5Wd@Bs`dnYD0>2j4cDGl_1R9^m1Pxem5 zG_L3}UCaXdYljytFDKIYLDd0GTg1V5J(XveSQ?)#d(iN6G_-HOcitaB>HXt!m(-_m zaqRj-SL@Mu{K_hQ?nKgf-sMjR&IHnU&!0RNA)c7{Dx!X*ipD7)2dIDBz8H5HuRl|{ zBOUKMBF%ZfDjmC4z7ER=+S1EP4W5>cCmt*@X92xnKM}ohYC7g-ciD1mN;-bF=kl6k zgVOPr{BPux>~wrZ7j^txt8{F2ev5c1r{k>J;;DMo+TxdEig(;!mxb9yL;be?mW6NK zPT39Tckzcna$HhJ7T5f?xbKJNG+uI#A5j5*dEYISzk%N# zI=s)d@*;?9z8$ue_3-Ay7r%EKSr5P5`_ZzZXFZH;Ix?evB&Ij%+r-U{!1ZP- z@)u5yz=g@lUxc7=d^RY&zzgWM`jc8Y0>bf#J3BAEZdC`z7!OZ;?@%D$NE%1jY-tSmCKG%p}Xh)hC6vyH6jx+1G-w4!m z*Y*v$ad<o3G;S(!w)&J6v!`{Iux%W8^fB!q`^K}h} z^~Q6)4@eGwK8q^uKV!k~S8qA?E{nH6>9UqR&EnXrp|_4LV)4N%y3b_`Slq?>*W6Y^ zSv=rO(2J%0SbSyo$mr|6Sp4Sd)aQBKSX|L*?{hXCaAoDp>AofwvpJ=)HR4&kY|Q%? zCp>j;GipTOP{aC6{(3m}WA|?xb1fWj`^}v4ec`z2@a}WJZVbobrGV(A)xvRD&r?Lh zuXS+(eKaHURbBk>%%OVr^L24%&t;v~U#^RZZu{@=+F2JH`a7<5-%%F_9d6p^-uk-u zu)Uyer_nT?yuNn|*)ASuemYmP{XmFwe5x*)p^U~$g7zEKx%IJd=0tw~F7@$*sOO6h zRjH5H?L2+&lA|7ur#?)-c_R|n^zKqHdPXFEFfA*;hBXtH)bw1+MrUH*_8vK%4Vn1c zy5`yT>Y3Q*!;`X5pn)ULAEot~c&m2p&9?!Wc>dJ~P8ZPqfA#42<7EbJ`E+_j*ozE2 zW^Un;kOvtU+rk^{zn6hGR++hM^qmZxa?<;BqjMR!%jBD<-kiL+x0K}8v1(%R-C@O=J;HAujAYIV`gUHMbbduj*~NRcHYw%eo_Yhl^Wt1 zG%*9#vAo6N2^qMa@p6DSP_oJPz?ktF_{gx=?T?Jhz_m(#(H$9^fp;&*i7SCt?Ru_f zOQ88v!n-UToq;RfZHac%dDp#xIh|&WocQP6Gh@9joHz(u8c;DB~S;_d<8>N-XjAY!M z5;sHgeIJ8ATyDEr_c{hYt5f$@+VdE^v1i4#sG=Br`TQRndst)esyEkPolT0tFNS^^ zHXX;{w`;uTPmhklWzU0}Mb(c1Jo_U<6A^=_2-|1A2#>*k=}zdXhs9veUA?AOgv8+P zw+l))0d3xk2?BY5ghfUjO_pcI-{i9FU3j3=*ZqW`ce{-uo)^%K(IpG@U`H>M{ zHearf2L;h@S`4X=Tdk-*dj242H#L0Jg3CP4?DwSQhyV^>FI|#%IfKOkC48rVzrerK zsn0FDjrgMR-SKu4jQCLb^}rv&MttFI(>DWF#Nj6;ZM??k#o>b!FJGM&0yvf&8gk@d zEZ!^)aP$S*^n1q1xX!V-$MJox742hj^YaIe-)kO=o2=7*`YkaQ&)60b_l}Rn{uPnE zx^c01|CL6kZZolX=$J{#yOpunwQMoVmc`%}JDWUtd^QFf{5K6bbs`3Ta%?5$Esnv1 zLwYqNX2f9XY@gn-lVfn^Zy#=L?;3-1W@}Q6U1IP|YlRC-G5E%B#%03{F?f$~_3rJU z7<_Kn;}s3{ux|Ov8}~h9@Wi?+eSY3hZXXncBi=kcSj{4d_#JFJPm+xJc&kpz+<8InMN06{>SF})zL2t`1#f{0iU1r-$) zL<9sSASen72ntq;s3`W{#dh0o#ooo<#a?d9S>yBUz2E)5=X%dS`#N4;nUG8-GrzKa zt4zKh?;wxiR_E{h?H#oKa@@+2Gu}b-r*wSu#_c)R0TYj}_YQi^W~}g7jQelA{CHlP zN6>+{p>F-T9zop-H%;R;xdrv@S2i-q*DdHo+~U*SzqtlId_1Etx!xrx=(FKg=)zyIwMw533Ep$@nGnx@TNg4^o5e;t~N+e4oNHPp7_w%z%N zvpzwe!eft9+xz~WgO^}CuX|AIrNnJ(uy$1Wbf2J_$R`i%`uPNP`kj}Oe%CuFfwkq^ z25gs#=AVw*Ol${>*yV#)dj+kZ+HG3zRIi}O#fq5^@>YKP=@d# zYrys3pkI?&!(z_{2i5oe*tgKjA7|OW4IiqF$8FxnRb(DW^B*RG;^-L?M;fm!bGb@%hl3H_tGLc;G^-BF_ZtIg)tz5z6=z#p_wUNA?&;D-qkkMYA@ zDrS+rh0XV>W&L=}bYtP=1jhx?leyp&*vVA2gz(NHk<}E7NvpM*DHw z5woNx?2@Yb_YjX>X6j)-uBEky!^HjcaK|(}qGQ@g>0gdhE*LE&8J81hOEPbp!kw}o zRp7(3`q@r7uUajf$y}O6-q&{)E+&?eB`vZ`XStSmZe?{Y)4ylARYWLHxe2Oh=GAfO zY>(Qc%194hElse5X1;WCTS=D{Sr1CO@K(_HEAz>@o{iZeF|9 z(x+u}+gzWG`^|d!Zat~A>bm_xFg{!!Ft6*b+XcO3dmc5+lkMwho7e3S&8Ad-lows9 z_{(Ksl+x)GO?uk*Xq4)_zj<$UTa=2cxe__xHnY|=+*UjOmv@sPnV#cs|UtX5B9Nie%>{CVWUeK5)?RJZ5a%$g;55!K0T?y*hYoQ`6P> z2|FHN9Wwd2`?aANSCX#{%WAJ)Jbe1QD~l6m(nU2R3i+`$I1q~CM$YYeB5u_DfX{YG z6_HwTa`oWRH9b)?)!7$33;FI%yypf zwVir+yyo7FAD^Dyn@Q{Bc|V^a8hhWw@>qMnfTMkOzmV6%vwfBzc5Hi*-RQOL#g6IM z>t{O`*fq@QTp8Cu4NTX$o((l4=X$OF^RV{8TpWO|^8~(T?5;bX;i%jD^})PuN9-C) zRp<2&OI2+Voy*j1aihvKn3w+7KeS(UBZccm3h&f!Y{g%XuKFWPSFZTDz>Z8h6YZ4q z1QcGm9I5+hs9*j%NAA=N6_dv-RkXhb-~NujZXap{>6cH6`0zsz(~)y;Ti7AEveRzx zwva=xY~9*3m-Zcmyqzx>=Q-Cv)3Av#{^m9C5_G7_w_V}aVKKhDNZD|$6d>S3;=dv6YZpgY{V73JAu1eR8XDo)=75$12 zp09?X50ZD)#a6+yxUTsIt19@F=9O=$uY{PA&2#Q8s(|o8-Flw#E(eqOsoz{_ImC&y zijC?#5Xl_?W#@v|-v)1%HvS;d4_-DtM z-!oriK<=nbdu|_@0);!~&j{X?4z$HRw#5IK3_qHth{xWa3|4{>Sv}8Bgs)ef=FM+U zg)g@Ulh?^fr6UcJ;F~t5)?1SV-ycLR3Tzt%|CGEGZcvW`OG8H1${!=) zpNSl?cvB*L`rvJ7XC%V+ms9qBy)*(|ngoCQ6eqw})xv>6Jrm$nBz-?UU>N*<$`Lqs z9|oOXNpz zkiqaO+S(AW9t^f|6RLMv3$8*z#`5I%avANM*O10Tw^t@+R|25h!ly%exwfYWJ5?a3R_@Z-R<1**N# zK(9<)b0=c}d>ee;f%mvS(CdmSwr8L?jUvFJecLbBsU@&f6@5oUk7Y%p4m`pY67?TC=bT7255ii zyeO!>0oLI!4s=%=pfRl0B)rf7t!E>0o6j{s$3GWql213lsPT%r_MHu2A$48rn7ays z%>);YPFMxch=ut{^Or!&ozKeKLu=qzx&wCuu^7JC*g7|BszIK(_3m6Tw*UglE{5zg6~G~qS7M!y`*l59 zoI3+g_k=$4B_m}@UV_A%UQAk7(tog z`*bTHb<~K81A#U0S7v5p)yrylL6=^;UoabDV@)wD2Ngl?w6U*OEXsmzn-8(>-@OcP z`kCgKZtjL*pSvE)TD~0;^?e)Xu?mRlzya%Gs zW|1#)x4`PXJKs1Fo8fTvr)^8#V7q0*-E@eFFQIKut)eNn=^~cT&rey%r_VX-3?EXRqeqZMKVAN*~XL zElJ{b$+FpyJt^b;!G*KohmkkczH&Ag4w;p=l+K2+Rk!>4CC`QmM)KF&H;duR*T-$D z)?yHUSK80tR}B5O$$k#nRtz5x-0@qvu^5QXTWXxv6$7($T1ffIVwgT5k|Ua33P2p!5!hPhyn;&JNwM*C;1R~dC{5uH}j#O#CF)J z*jw5xlW*htQ@!n8*x)h1{?M`B zw*P4OlDMGQnmZaSA6@+Q3va)!P2oRrS8j(ROOnzgs8X_Xb3wh+lPy4KPV69FYyj)? zhL)-A8-Sc)n>~5=2B;tU%RPVA2AI~RAV$q?hRx}p^QSys1=Z$NrC9?TU`(6snw!s7 zz+3kh4Cfbr!5Leg_CfAeShFnDQAFDU4mW=!hsSJ!R})=>=1Mk#HemMaji&W5%5iGA zq+lJaIJ@>v^8RLcu{La2;;d%Kz3|&-$@aC-X@&V9TJN!YdL;fK%v zvAm144bD$F96i0E0cfUq-5yDo1G~?LN2cN$u)WpkWVh&Q2$ScR`ju6|NXK_an6ia% zti${&qhv0u+gus5om&E)`u@fXhl=3W9mD?o9pJSbeWY4Q>n*l<6w>{2nbKG<{JX45Fau(?{7M`(;vBqIbsicfXoc8aL!4K2JqSmGo^B8* zcEkPL^@nYG?u5w4CTnx=oiIZ!l~-2tm)21g7 zF!V}pL&<(~+V%IKt^}PsY&IzA>Zbu>fn)BDDl{-v8A8K_d zKO04=ZuSk!M87@naZ{W=Y9`Qb?e}|dC=X~Gml)Tl=0Q|tVkxE}I` z@F&wN*29ebVQ*g_Tn7z9+~xL7>!4?eHm}s~4uH}w0c)g_N%G}OnuzgSC zrota{ATo5$eV>VQAdqV(hB4^ZaKyPAHku$fr`)$kH2M!eM)~Z$lMewGCK%b0e3-tA zJ*MyRnNalM{hL}G$Mljn!-fx<4zqfg3g2JKg1iq4vUZNj0yFDRyZ`wz4SvTDey=<| z4GOCLtamTOzvJ!{xCG>eqa~e=KG%=@bc4CVD=b1syoIjPe9_qLwDtCX@VV~}c(h`%Fv@>3xc};& z((-K`gqPlYxp!DIgj_f=eA(?akofLIqUzQf=o4a{Z%7Dn6neXF%PJ!y! zr%6$tr@)zY&hPpaPl4dNZpF_MroiT3D+5IVQ(zhVyq}np2F=%U`sT)^!NVTL+}O6s za6-RvlF#DFF#M9^r0;_#f^FKJ<5w<>hotPdA)3|W!2jKYxTwl;pxu0cdmwfk^lG!+ z^LcFwy!yWXwv}uwv}GLZb-Z^n+&?|-wA-R2I48V#cKef2a3^kO_L%xn@Jg0_KO%Az zblIqOdSJxu;QF`2JB@+~pPd7izaI%<%&px&ri_F+f!-|7;Ui&Y$ky@Xr$lH!8TWbV z?L-)Gd@eId9SL{CSA6aBDgx|YEnm9-U9^ zg|?OVfXnxOmQ1zl0cWk$BkJFT!K|)lhY9b5L0rYY`HP#w;2beL+_y0dHg37xSUM^U zB;%qN-b@IC+`@vUCB9)W=*YR_(|?45!rij@;Ehl?YB4XFdpQ&;`VO7`Pkks%y6n5p zeSRn`_q#XTB`*{_2U%C_8yE_Q`)g0+b3);sPnmxI*AOs&*LPjn(-3IsKjz-6T1Ef7>hd{TKS2c|3A(#iqSwA2- z1Uyf=W&3>&28ZwGH{aJLwv?HuIrg#INFe1J#tep47U;7yHFGitdBdV zyW|Cf=#AG9N9*gk~;5U;PxQ+#P=}`ZV7_U z<8Ib0lLW!O)!Km8j{ruij+EmM0%6FhD&gwfKuG+SJ$THp0NB|A%cIQ$;9XU*{=rLs zsEZEGo>S%z5hbtoo(c1Z3D>g%zrO7bi@*4F6rAr4yEzrZ-|XlPTk`Uv9@ljTwQIwT zUe10{;a#(!kmm>E`llUOSY?3jgKGS)jW>X`MSN}NAqL0}49n*YFhCru@GQgE0N#sR zLPzio&_}fCh2*y$*4!(gm447ey2b0Ay$|T&pHGvz`0duiX6J-^@%eg4`n$5xFH;Yf z(p{MfbREPmNa^!{N+u35F7B>9+esFqsEnw~R(=v_*^3zwdS#@govBdfL$s;Pbd+Vz8no<=cDXtcA zi&YSQIr?ZMsNn4OwtB)x1#w-f?)>DcV5sI*q3uT{Oevi)OY5zK+i#;zMZHzPwT1IJ z!w=v#AZ?rTG&x*88}heisvHhK?CLdcgdEoFb}sf9A_rSmPfh2e+ab&$XppF6D-7&y ze`{#UR(LhQZ#6l1D~w|AqR$N53S$#6$qh!*Tvr|sO+d7)x;Oni>cfjDc1G?8h{-7IM;&wcSar;(Hw7pyj z=LVj3fr;hN5#9IEW4}^3`=H^|+esyGH0Oa$kVgqj=-=MixU>iyo9m~R&Mty#%ka0K z`xk*(`?%}e}2li>75U>8AmOBF%M_rTrgRM&VXON-fZalD;K7V z-}&Bto(p*cCOzG}F&Ah-HO#LqxiBMs_b&HLe80={5g#M)?=6>IE`{Yn;UJr(2i{Hx zhH09rwKfNsSIU$}as9~e7g3cnY)-J|bIh0k?oe_bw51@@z7oQT&LC!NfT zT6ShEM4U=oHD854x0D_~JiqT@n6-MOH!X1^O!q6ZDtNLAobT`7T~@ypM$Z^|!=14O z=pz_2vd}+XX`Si)>`fEo4LY64Ay$F;r1vWfzv`fq@5!*NxMdL2&sE`p@kslf(LFb; zTntT5)SjuM7sI@Tzcn`=E`mKSqsbr9i{KXLop9XOY8bg`O25v}s-d*ghm1Sds$paJ z=qI^0RZy49_n30G5_bRXaL1Na31wzyKUrLM zat+KM9Z9nZT?3EDhu$@luYuFW?PGhMYl2^+511t$SOr}?!Yag98}VmRb`SO+XatwT zXM0rbYJ_&0pull^BkW*V?Yg_I5eVlE{WG>U!db6=KPTcgj{NyokA;o!J`2*R?OUOH z^0LxKIDN@!Lc^R!SiNt_sJPvA;NnxX{Kc*nF!RV9tNbN3a9U6h%de<`Wy^`!0b~_K zP8sC1@X%biRyXxY)$7@Cu^{Z(<@>Yn=a}2wR5NG6((jt(4hu}6cy%SSVEZN5@wNB$ zl}|3f%IBBs-k0r%$iPw7(gnMqqIK*RRrwmQjQh+vbD|OS*5wBc>5E{xp?SjQ$yIP* z2TB6_FNE+7ZykN+mq3g2UG|+Tv*C81K{e#6+3@?TT3X&|76g8qk$+=LK6L7TGNt71 z444tKcH#Qy96%9=(KnMb;m46__Ffs|sh`6<5mEoIy;ySQzZGA%S^Ww!mY!mYN#df30jh)2F zHEEsfQ`Tq(bcf{`bsbKDwEx|XURoa|g4>RRF^SV%LkiWzyzy=FuK+k;m_6V%eo zmrOVk*=2b~TVxmBavFa{u31mdhEnBe&&E=!$XYe>wAb2I4X3@=w;bu|vtjppOS~@E zGrrr-D)He$aIdaAucuqdcHgh;CENSzNUv@Of0#wdk8nbw6ek=DIZCIKG_O;h+c>Io z`VSoS#fZ-5)R$wh_;odI3f8QX8qZ;yUMkbw%f}Y_(@gENI#YGBkUoaTHKXPmp8hFj zuWCH+_p;?rMf=kH?r#s0@}l9SgfuiQF4&~q3apgZ>k6%d?doUQ42Y{QvK?7dU+nPzs;V~BG|ctc zdcEP_h10gU#xmW5n#TFv-(PQB0A}J<`3etJlBT`)cRQH~TtH_`Kc!@#^@u2fjX7y6NCQ?{957^xMpFGj$UQbrZ+XKeuR5 z4--0C9a-im{noPih^5c|jw4p8+nbMCqjz)EI)tM*#=|?1j`9EXV?s2fXw1((F%$mS zN*8`ydFBq%fBx%dXsfr;|7Y!aN96y}od3~sr63{yg+QZtR{x@+A?^y9WE9WJh9>Uf ziK3U3YAN!=!r6iUPfVJ`#tj)js4|87#xOnp8#9gTfr^T#tr=)TY2Tq98w#SbP%%$S zXr15P$+p!6kqa7r=jSf zN{NbQ0u3k%^p(olXIa|CMK=;=b}QbMMv7irYdNTG>GQG^sy zBO}Bp%b}q%ke0n(g1SdY*l=GZ7Vz-8wYuQ>G}Ka6sf|a)o;)P0=@rs*GJn*~rMPX{ zoiutDL#GVfiQlEssf_g;KK8elJr8LUC=_%WW$^k7PfzwI z(X;VL92EG|hn2*haAcRdo?YqtcclG6od@Q7+g>cchR?h)WBRbf19OloYVgAkqHvZ%OYdM9 z1%`7BgErAOF^LwVT!jioDor5Q41pyShfX`pjE7hbtvvf(N_W(Pq{nFcQLU)cC$5#8 zMYTDVI&i-^J(H&Qi^GdW-bm4Q$$ccyP~ggqI8$ET(TX{8Cxu{oQ^WZ3ME}2=|COX;>2Z=QoMFl7o~Sd1Sr0& zMrqrzctUEFY*c&XvBJJecjGnQ{__%woB8E2srOL+Elc8wYRpKd3cO&3qKGQp7XR-w{TDJA}g`hH1B1VS}Sz0Pkk3My(9 ziu#YTlV~aoc!^2anWz-!7lb{fL~U8Rr}8#6*>Jx2GAY7~O5qnnJweW*G?yCVxRb_x z_l;&Awue$j_os!+>Zva>XzB+BoWnh_C;g06$0J9~Xd1@GVTkPIScI*3Z}K0(G>J7zX42!T4Nlhok7RGK*XZoNa}h~G>q86 z3cxvu!cy|&Vw%m4&o~NDR&CTDsZE*BA6aY-;bSFpq~ixr$5Q$@fJOJD8BF~HLUHIQ zl!1y)ECkV@DjrH2OU+O=4z(-kES!(1MJv6C-GEZjT8*E<94`h9P?*#slx>HcXO!Db zAVrpjc$A7$E89ombcx3Kq(+(9F*p$^h$B5(G98IeWBszs@dZ@L^|PQO#mGPgS;{O> z{1p4LN0tRcV+ekPkU;4V+>e8#SiR}2>>!L&? zpJ;01E6E+y6V|Dl6iR_PUW76+K#8^j?GSOyQc{Lpf>dF@JuFH-Sh`<%)a(xgQSZ0J z61xLInu%q@)F_0F7WE~Laa06X`lZl0XvMXGB^J~OR~8v@UQvLrz@Ny#K{+x*`&5}H zCH$=>rIwNoRJPTsON@I`afay>qH1<_ceErZ;gvWF1qnkWfZ!ao+2m=i7^typPzN^P zpwl7v&8-vhkwsHU2gw?-mryF0SjCdzbk-r^mwpCkBTC#UwM+rcHv6t4iuf83vl&8M z<SRGOjWpiW2X*o<(YFa#G&ZQ=|{X^(K?N^0;Map?@aOqp<%HZas2 zX-t}6RwqiGTPN+Vyo^Q?nzyzPf& z324aW3VKasH-wL(HTgqYm7XbC|KBL9Wkc8~&W$TewjCYU8Wf$=$!o1pN*s0Yv8Ms;$9;<(_B7oIw$zn;j%fsUGZ(!CXD z*q*qW>WchO@wSbRa@ZPq`y&>LAo9&nR-1fDFeNB7i@K=DwnY(OV?v13^ongqsb*@3 z=~kQ=V|WPLz%Yi3NKzbt9ie5qqsDo-)88y^02)6O+_WSYvX}yTF-_Bck`Zq7%HfJ= zT9Jk=5zy4_wkBKjlyJ0YOIEn=WcoAI2BMplmPY4k5I?6LyqfJvX@|;*C!EN=c$7~x z#o*O=nrZ`ig{|yI?WRS}^w)qa{B9kALbc(6xVCEQnMzM&k`k^cfSMpmvScRVm*7ym zXoF`LqV)NrvTppQY@{)8Aj^B3xIj$inKBF(xSvk5+@0%*@~LZCPA4s~C<7pA~2 z4D2kqI`{+iiFQJ{lscXUrCWzmPjHP)?2bMc$`0Qc+|_}hQIcS*{7T152+YDI5M5hQ z8e4Xo@&QOY0r-4 z`Jf=MT-`cYImC>5QXh?7gk#@th#3wyBAQ+QNqigMPgvJQ%^9dES!FHp#OpQaz2%&)3dJ$oO=fE(Ij3J0!EXUY6sG3%CJ=OCfnsd!OZmA?7n=ItyCuv&xPVH$a+&L zQFmPQhRy6k({0HSd!S;fLX=~R<`h>a4Z&xNdRw9$MW(tcc_zM%YX71!0x@zDH1-3z zD1nJ|x8u8Tccf}+NHv@KAc6~|8q_Xk=+vgK81b;sCe{WqYwRd}U_(NT(+vl%fMy^+ z5R%?#{te#S9N1_=b?Pv#q!<;5$pDe07X58Qz1Tj?ifN$fq}=)@u0ep_HgYk&XIRnH zY#Rm=>pA=cWK2*_$Ovv^2-5={aTL5p6X29@g^&YWqO@T%X>PX4vpXhG5;C0 zFZ!^!`p)7C4MeI4fdr;zFDcst)pDC6gz0GW49qAep(DOxPIPZld z3q5i2zP*^lshjFRj}*J1N1_Zf!*{St$+I>nSdKd>^rCc@m_$)gKP@_k*dEe($Uo4@ z1Cf59Hi#k|#1Tlr&^J|ysY9v~w@4aLVp{ZtO=>LZFA%+;Uwj ziQ(3fQxJWDGzoneqsfM8Ls+s{hJ=tbHCuqfyt-tycmxMyI(hAHrUyf(us4xn4+aK? z1C+&745lT;E|E`=$ATE}`c`UD)$0|tbxaR5qB=sV6u00AMj1V)YG-yA77E_CUX$Sm zQF*t%O)|ygU`!2Aa&YODlQC@ZA&v%P7;?QOYIkFB^BmQ@(ZGn_IZ4u~iAD~#Q`*p4 zXE4GOM1?};dKL=ZqwXy&gg7Y@_eGmv-(n@PA%qUbspxf}pmLxkN-Lv_u9V)#4-#|O z%6vNV0D6xRQ+|X&i(mkpF&MA9r%G(JZzK>9N9^dYBnI+=qf=OC=5LB0VWKMy$hRQsxyoYnj!~=~S9fs~zH)-EmC(sn zfi@i*8%3#`tRx1Snh4=3W9i7`e!e@8N#u#^J4p^QP-~n`IS6~C%!8&vsU-_nY|%?D zTSQtr&_d`U@ibx*pOjf+;~41uo%SL!03BeJ#yANh{w(yZRU#KbtrrV5piN#VxQvt$ zY?KAoPWfDBA07@eg~8()8;29(QptH%;yE-0@z9>`hh63Dpmj%$Wp!e%@&KA*wK~}n z?Gr<*2pnj}z|r}dqrAev1Ix*3n*(dnsH=$I9H+~jxhzHCN$hD-@2-%!{2{$!Af;$s;8n@C!5Kcnfo8P7V9B*1GWm><7^iAWu8^h^eU-X* zLuXr~xUma23RP>c7mJBjRiP7Ya6)Ba9SXJg6I15ipjFl`Vp7N?xhT*6&p0^DcWSpp zBad#KsZd6Z5%jxls4=LJC#F*`AninS*-@N;pVB{W#lF`OR+dD$nCXGYd2|4%fc;x5 z6DN$*MoX~=O@;27q7}I^6v&CFNiYRH9cK$etM{J3!dECJ(>D)1 zUxYM~Jh4|SK1Hq)8DLB0>X0Q8)OI(f#XC;imgATv4sq+N;HupOQas$ckN-C zNYO=t2BLW`WvkH)6OXMT_iz)43}0G^bnm%Dt}U)1#6$;logTMnm(l9$J{hnB(77Ro z*~l{StmN*KWJhqT5bYC5HsKWy6k&=g9Wzt(C~-O(nc-q|)brek?Oew{93d5+(p^5T zbqM+)ds<3xoh5g9;nJznit*Ql2?`>^mZ4IL9*F5x7(yi=ABC7ACEt3X@OOWYM1+vB zk>Z46tUxlH%En{Q>2QE8_d?n4Jm4q>Vd$+V_IlSprN+fj=Kwjby-JS_ zR_LcNZOC1A_6b%@Y%?X%=QY2g^%*+%T_W4`+An8 z!HQmtZl@?;j3WJb4p(d{JeX6+HqMSvoSB;XP^=>0OwSS6Z?{6Ok4jF?5|y-2vkGVB zHkL#{uQheS#pyfddkH58Wg-p+xk0o>F5ZBizCl^fMQ4B*UoGtqD%i7e2PKhZ z6F7oK(fN-OZ1Nz5bYMTC&x+6ZtJR3;b+ zdr$Eb%%{XT`7NQsE`?PxXj;R?nX=@vx z#A7{UQP#phVu0vDBR@LZA11hH%hXB)w6L%`qV%E>C!MxT=FY*RuXU8=nBn0?5@kR; zmV@M?(ar%4SWr)&4ySEsvH3v%tni~!5DY)#ZOs7c~Azl`OtH??r1-lC1*XbnMsCfxb3SyityOimNp)&Exu75mw+7U{V;naJa-5A(;bLn(UFW}DXd0vT39~Rc4+<$sK z=i7)LiN)8@8X+%hCr*3&mEF)vVKIkbS(aR7M^RqKRMqR=bOEj$T1*wxQVySrTqBh9 zrpcRnBB>|b5d}u4;Hzpv;YmEC0xh115fV3+!5(y#BFcQd3pdAI5cWzpx1evWtcm2U2KI$1EC=OeB9-f`A|P5 znHMNf9+pUmSC;Pg`RI`AN#GGD&>Uj^gLthX4&%tDa#@G$JkfvVU@=LJw!ihdv6Ri@ zAgD`OG>K-!r@xyR% zes`gTi+R*yuICGjuvv+%&O#UBEY~!N>w{SdaH2oT~Y4JsKAQzberc|pRL;{lYi!!$zJ zRb(gT_QPShONL=%9X_%3riCa5A~^%6Mjr=SjIckt+{C`Fn$WwRe=cW8Wi^WkNSoIzH2AM3Yr$z6}b|5?3meQ{sY$rdNkCHO^Ud8CQ$T`lB0z zrRp$n6Dw4-0b67Yy}I8Y~)KBi{LjGro*ytx!i4AlQ`g$R?(8-BUdg%wsvyithAoP3+f2YN&);pmAR}6!~`-%(u zBDEixLXa8gLQvyJ?i$3y{4j+xbDe2Lx1sB43Uaf*Yy@T)b>vy8q!w33;wg$* z7{PV7rp*4Epkdds^$)vvoDi_nG3+q4YNK#DG0nlXj@Uy%0%;1-3I|-S&>wei@TR3QhfP z`$Ix@cOruADzahQ5KfZT(~{X3imsNlR*4gd51yowg$`$-l4ujk5U7R;!}-1#_b7<( z4zd|^`r*WQ>~PU>&<9r1$-PSDVJ6Wkr)k2n2HFyZSpM^KU|>VH zug@!>*r;0KGb!VA`WG08z8?KmrBeR*5zb~r0fqV6PbJvv(I)C1HxJ%K)sWcu7U3O2 zq0+FsTdZZa)S^u|^OUw``0ZGaU4=v-qRdwtXn`~{LSjQrEiBz@JnC)nP)PYBtL+SL zDdWKPu~(_6Y?GQ`D##g@hC$R*KRa0qP7gg%DMK(59?Ms(eM22H)n9wxoUrFL)ffgL z#}DytetC3s5Y!~whFm0M*3vx)u@@3^K6?n45*yiQl08_2qZ?9G@g@)qMQkvtCY+Gp zD$??p4s^Qcc{iaEeIe4;$NnJ(yttcAEmCqQ=AqW0Ci{vdIcVmHeMY5_&On-+*dW8o zIL02t*>30qP+puj<*faQfR&iM!eX%p*PdNN)3@)IZY0)=Z~%5VVPd`1MOKOoJ0gvv zU1LFktuuV;D?G9AGrIJgcN ziSHb7GVbQBl`wtrScrc$y&PoguCz z!mP;)0@oVLdaWqmjbRKKI2%!Tn9(4PTT2DResa%EX*LHRvP}u~mjF5$Vi5tceqOC6C2x7gTF%J(Y+HHloWDNf7(Up#;wKJvFm{LaM6tP-> zsaj!B5~i#~ z@!k~TwL5o$0C(47AlKC0eKj+JrhP2OE2I#5dwUAY?eI+Dn}%RT0H@DP0Yx;%>NazTyEO;NVk*L; zCq~$Mj+i9QMHrtoePX*I(y;yxRZzzH_YG64DJnm59mE>1jelN^%7z~89pmzn` zjY&%5nN3`hcyH5{hMV=m)G{{7z~bz7#C~DfP%rAiqQAzMu=FM(an#{SW5Szw%cr}w zz9u|fh<=`YJ#pDZ^vweY9pXV>_r{xA5Ra@NHd=;_WA`MCVMHB{06fidD=fp`q>+a? zl9Pmg5K$+=DlXB7#g4#xD0D=$flU_)*>o}HRpexWh5|VEp<@HTld$Y)G^RUA@Gw3I z!AcHkWrtN=@h=v-v&2eA`wmxTGzahS(-V%?SZj$!(xF;KzTh7J$#lp2HU65q9g`w@ z!lo~NJvNJ(H<`%g|H6_!-g819s^Q^l8q4Yw97b~c&uH;+p)iO-ZG;ij-FBipUrZ)x zvX$%z(+wE{546NxZtL`>uWUCKR_ciYKS>2KN<#kTNrq4n*$NR=S<#a}rI?ID60rya zu`r}eD+!i8RuSe}3iZg~)Qc9mV6Xk~mHlHznesPLvgc+*T@dECNSx%&;Y1jREOnrt z!iJL%kd7yoX=ou#F9e$7O*6{$#AtT=Q1q8@QBNSQI?$WZ z(^Zl;t=(s#fx-LV$j$cTIuVsDoW~c1v54=~^Y*7SRgsj>qp3v~tZ}0uI8yp^V!SO= zjz_=Pn`RVBiKi-42KkSOo`{Wj+JzC5z*k_?>&av}rj`dV3CLqtf&9r zR^oANTf9qn6e>4;D<93Lv~N4#apc#`xUH zn3lJfI&gW!ISx6Qqw4r;j9~j5-;u4UPN}SYua>CJ%nbmTE*bR*;S!sv8<{^}R$4r@hL2kgGh90*&;-23aF0ek*PIZVNSqX3yD=uSf=}5%)JLx72UEf zN=_;QvXmSoC<=%ONDylwqJXHVsHmV4C8H!2BS{2BB`8P^3j_tp5(FC&5F|=aksOv} z!~lXAZY|{B`#<~a^WJ%5oOkbCWAv)-*|TS;S+ja}SJzkg_7|Vw6_r+hWn&)aWZI=w zEv%209I__=FC41;11mWMmR-f~l{Tp<>-}2BgAas#+BmYSXy!s()Azzgd`d+H;=$%c z;-{chV-x)oI_)$z<`7O{B{NTMRV<}ghbMuP|0=%3G}D#GPZ<@FZ@f}8gC{PR9xUV5 z#Zr%Ht4q>}k70ym)C~%pHk%ZWq8VIGab?VXQGQ!WQu zcIUvhllkBt+Dm3j?X9LhI{<$fTaefEXCl;IoMJ(Nkyr;J5wy}y7~ zUbPU?YtXYd2m-Y306!Bt?C{1yrC1g?^W?&hwC6B+u@SD={eU$a>B#;o>B)s6%-F7_ zSPzL@;wDI@HmxR~q$*&2isVEc74qf)E%HXT?PQIfz2r7T!#~7nMXs;2#55D~%IJC+jb*Lf*Y3+m%$4H+0qE@f&PgJ6WQmjlAB!pG-&BiG1rN?=c@oK8@n` zoxH!0PAl?0EAn!QR(;)4EsdTfT3wP{TIoVdwbb(Xv|daLXt~;Wfm+^mNUgeoeZC1Z zYj1+`@euO&%L3$8?Xk!Y1RnWPk6;pV*f%}0;D&=_b1n)w!l{bPWn6<`E(1{FHUP+T zFvb{h!bb-oB?@E9VO%Z8A$l!iH3lvB&1M2y!*YGUvJG@Pz!|_>8ew)Nc zZf_7J3m-Kix9mPj{`&JGS&GGv{4CBB`*|JdE#$>5vL?rE9LrGh!%rbN2BG9EmoRKc z81g6#LFl;5r7F3IEA-&5rAeKzFRtL;&a9LPLA=g0<|&!gO& z$9$eBQ=VAX1#Y&76S9-8+egR(dB%Lm)(N8WbQ57Yd}%Ezyf z^Qy0*tX#)Aavf##I{8OhFwVt9vfo$+dGwb&#ryKJPkZ%9;)8kxM8a-q>$1Sjr%(wV}%we98h*{*PfEMCk7M4fSDb!5Zs`N@)C*H z)T3-YLB7nP-lN0*(V=`YVH=pRJS@`7rDQNo&)l7bp|YvF{ZykX(y8As0I4oA%Cu+ZcM;2N+2IEuEe@pkiYLx?|0*UqMvN3F@W_A;2i8H ze{N<(9sCR1$e|VcksIq4MmgltLb=!4V}2EZo34YaO)$z(Fzhdk#W@rQCbtuD&LpCa zPKU;Cxv^zv%KO0M6OZL2z~+Di)b$DQ@KYk5dycdjbsjV7G-lMZYf-*DP)=^( zTuVj1m!Yif#{RgX9^ut8=eiCrcZK5EWg^c7QQrk(nZd}n3Fz~bM0r#|**C!a_NZ%L z&`^X^R*+&>klshV=)kyc|3ndP`5#6R{&xYmwxP->XJlfO&q1%)rcPypm}( zBXj2yN5m6WK@4Ht5OXG8#w>bfp|6{njx&uiFp4oT3l;_8iQaz!7v?=#M$aZ0UxuI7 zFJJ)UA0iCXF>!C_LWE%hgdZjupQdNww?zL^0f#az^c$U`XA`?h$G|4$%jL}`amNrn zLL*KyaLA;h@0e^}Z~2l9C3MhZDeFt zNiwz!)>(PPCYyPc>A6qLd}i0hX&$iquJNCW556W~pY{40`X%<^UkS|(_7BYPKRjSt z(GAw)9YV~WrMba!ougwtKb*}f`kxVouXCdZ>~#s=+jNZR1zRH-X1I-AiYttc_bwgd zJri%ef1nL>|8a$-K@Bh94KMh@#$8uLH`qJ-5#Tl2TYufdKUl-)UYDMSfWw(Iz+rTV zy&;?1M8g{H4f!X`uz`Zeb2{|vyz&6l)VVkzod|?d_1S!8g8A`+K zUHC(X*bavmyEk{88=!f`(wP0nA=U!@iuttmAnLHs9`(`m7ke}&Oa7op<3KM04lg*w z-rhPt)p8qt)6r0;|6mQ{sIvtQJEj;cBjfVf%J+dmHW7 z{|7j+q0V%O(E(xYg9i_p{^dNYb&lG0_@tlI8)M76RPQ6F^u#!h+NS*tia6ZF_4UDU z)3bf~uXil?%U+tJxyt^*BSu(dKYkBB^MD0^S(zt%7B`hYytBBYBYi}1tARBMk%>L{ zt?%1D;Xk#{?L+$?Wa34qS(ON*w6rGz_8CPp0yL-D{|c1&ROgv$sRx}db-_&tO04;< zllCX%_F8YVuzTCVQ(f-wtR8fEbh;09o$d8o(|vC6?y2rS2-!_q;UYAQVv!3IMc+g& zuCH9vbMb52sW=2GKHGDNYnGmIZls{+^85t;am~}Sd$Z5HLh&m{a`;JEhH6MxORsr`>Ef*e}BU58*a0>eZ_4Ww;9~# zaO=nI3vSi8eaDR!ki8qXF5E_NqlNub|MB~GBT^)^5Q4u)aT~*J9Ji0S4dF%$bdJh) zl$q4aOiC?n3SahjWjp=9A==^pS7kfZ{-1O$Giu(AzFb@k%oa2__GDg*n;TVF?lCYh z;!cICG^-LDqYO8mNkwfglN`2xH6IhJ4(_NA-OPV;G+pgOq^l$+VZxA10E6O21fI&~ zd`paNI@JU*Uk(Vd9&ZAFry15)j!kf5-`n@|Moo}h`h1o20nBF^5}cdW1m@p_UsymB z%!{iStLinuYR7@SMk-BkFu}`M1ay9f#a4Y?XBA-MPq~2LSEz8yftIVf&@?n1C0S9%4 z!bTvy^g3J{(+Jz`-d8HMG=iq?-eB#DMo1D}_Moo55hPErQz{-e0^cQO+p5Gy;8vpb zv#`M#xYOdw;{G5!9Nr z9R+tZ!s@*tkMCc^e*U4v8bW8yu8c<5kaB<0eSaf_AAVd=Z`4F^@}650C(;PQ;*94# z<{E&ZVDdv6XCr*e_U^gH*9h4w;}}kEX#|6xC26{TjWFcs;6eG`0EZ=>HY;K|iW)b% z)2=qce8UF)rV~wst0gu3IbO%^-`(G**$5JGhqp}YHbPI*FHb+_M%dS$m6EfziCCmi zCta5H1MAgW|14puy#X9wwwktMzHP(tyHvk6fKHLp<-WTOAmS*5O=$opyZE{2r;Xsh zr}1fad;>hw-|DeyZ6mx%^|x_jZ6emj=qj|;HGssIYSE=zn!wg&Bg>mJO@vAYFGCUX zz4?rC&U(2fLTCwxk77^*u>W3dQzzL(GzgSrzVd5;?%N$3;?f&n+Pd}P-HQ!irnJ7p zzoZeK$SZGy1C1bW^eex_r4d)f<3{!~G{4VfVD|upCVN1v>3&{XaSsfoaTf7A^}vU_V~(5d_QJtU_A+7JOjV?>(#GRv;fe-cJpNPuG{NyTV2d6e8xHEBWc3oIFD9C@ zB68(_>DtcD=+8$GvynP4*IDGb|?}>UxRB zQ{+;9WxW1pkxlh%FY%<^IN#uw*sq#p-taBB4s{K0GU(&u}K z$E~?yVc&WPTU9HkXWYF6GwC4FJl8`=S1JDF`P4%kcswP)3h5liyWWW9FAMxYFZ;QN zFz02u?fkKa0ER2Gb4U*c_Ym0ubtb)-pIhL0)imb& z!RPvx2anwk#;^3j_O~B);ZZga9&eSKV4f-98qR-eHva*Y|K+v(>_yAn5bPim3Oe7P@!^sbm6Wi zNQ{z^WTeI3juoL@bp%tt`%iDA*4}+4FP7F5=Wv^a=CiqUcFGr(Y5simzefT&UN*K)~V){QIUFLVOFlHCo*%M$cP{fv*)n%SXoaz z2%(P;6s#wvR_Yr5;HxKCbd|hTW4exvxbZTvmG=;UEu$sS9VDht$sMccNQP6`3V07FI@J+8*WXSr@u(*rvDRs)T&yFI3HPJl z*AgoYQy<(etA{QQGv#|(sONe=uF93FC&X{}?s$d$+?Q;InHWgI?9E>j_5F zc?QqEIwFRLEA+H@J#lM6(>D<3c2?Pa!G0&iw8%D1))C$#WHFu^g7}!BYhb8NCB}k| znteY`B^11?3=)xk-&TG8cibS6@qGTF$yzGW$UP^YEKenl#zZF+T2L|Qh0>Qn0V?6? zsB*|$gi7Gx6-0v=~m@6qZaRMdwasNU)$Qi`vJ^Q&|bm9K0DzXbLXRhw*v<9d6EXB3Rg zaj=hge^`X_pt^%dQWviFwCf}^9!83dZtaBAtfOH&3;GF}#u6cqo_^wh1Z}4D6MLf0 z2qXsf6As#ys#ik$h)2hiW?Bu%9s2 z2y|`T*GFWzr*VJh?}wTDxm&4xeS~V9`XL$3equR$&%9t>FM-0QogCatID}0{1&s6( z<$U|9W8|+Ac`r>KH}za0GP2XSsO47(oAgIs>MbKXEZIP0I!@Wtzk_0PU-ztavp zW%d1)c>brK$VN}6b{O81^;>=OJHiWo{P@=UhS+$a-Q&GL8`1cc$Kw9wH^i`If%Z7C_Bm7uM1)Ky!E zUcZd?SG!t>6iYSsdHv2wUWsNf>y%!i8_UALjtSeixi1xNrLGdMPwX z95Lhzv(1|zKE*QDa_3ACX8eN1?V=MzlI!KbfwBognU(Fs@TYNNiqz$~d-){M>b>d` z^T-4d$?-|Iq-27yf9so9B{oUK4RS_R*G&+h#=*tBX_9D3+*%|2WRk$eHuVzL)Aq{u z#OSFBqIib+Yq{_w@#p?Dxp<$OAiAlmpRLH9Ach_26Q-voh=s?I?-RtSt4ngM<|c@B z4AI?{^Am*MU5<4Re@zewxPVL52|_{ldtM;YzDoyj$u~iW@R4T?SN9Xb3k%LjFicE@a=M>ipFOi<}36Qo4-daXZzYm__okGhUb4xed%HBCw^s;3TiF{ z!=^_gD%*Djg9_72tIpPUgr}il;GxMkMCs>C1JcL*iL7QGl}pUm2xf;fO3cT638`#K z=cQYyIGg9DWVPDis_b_EvfwsYzV3|hK;Rp&##ITas|{wm+-f;LwL!|-@Tv_ZZJ_At zRcle-1{O=x2WAu6K=iA_p-Ye3V8h0HBjMNEz*CuQo4T(JO2UdQg(TYGr~lWEw_dIA z;@!48K0Ix}62qlFjI`Il03U5yAtXnq+39O5tV=gC+=s^rdggjX^Q~}REJt_6$5t5o zxkJNgwiOuK$GSxE_|N*P75eTsNoG@8K`G9)U@W-}lFqK{jU8%(wL9+BWtY5xZ@2x1 zxm2+oby@q~IJZJ4Pf_pEUo9{>e5GimTpO_Wy{_W6Zv~!>QX#`utzbx3SskL*3I|vE zneuA3f`w-}&+@hJh>PoaRvL{cgAz^=o`*U>8LA8_aPTbwU3JBSku-M9g;IE8Jc1DoN06AJS8iSwAgrcEKCv zqnuw2JK@iLtpon7Pdgw_RA%*yb)B%4jed>?Y42Ow0=g5H&1+`wx!VODj;kWalsdt; zsH^spOBaS>DfhY~+X+G!%@hwRbipT`3nh0rJE0&qbBpfYE=U!mEdaY9+D@)+n?@H5 zq_9Lb7IhIq;b#so)^`#4B-#xN?>iwb-_*8t5pP6j6TA~P2d3kbU?)tP@Qvt9bb|fz z3kg?|mW6pS9xd*K#Autt;?xeHH>?agR@VvTyH1ZX)^tMbiLiL0sT1H=GtRJ1$WnKq zY*OeVsvgksHO6-V7fbAg<>B3g#O@NtEw^`u?4VN%zL^WLXleZP1KiZ2)*chvQS+IJP2znsq# zubj_bdpUQGsBEuII@WRqF0OqY^0MU(F+RJ2i>dkzL0mnKkK%6#?!FUDLHFMfr=qW= zmdCXa-KsKm7ouAT#aqGS+;%O*<})sY_P!RvF}-5TdRx^0ONE-6NiBquo~+E-xMt!& zp34S>^(_S9RTH$@zL`KJc_HdoGr_o~qf9upnFuhYy?AIQuFI!7-*0IqBDa_JwnR4* z__8fz1?kobQegVM;^lvT*;eyEY}t7CZxG$q;!DRxfKIeg{EImwgpn2=QGt2sS2d=8 zwP5_82#;8%ytQl z6nUD4;YqVwJcthoT(n!{^}kx5RmO8emKLH;Hm42)CFZx9*V1AnE|D)9&RMddboTQS zh39OWmTs)%bFJGb@{&%xS~96lu}*2AZd23t6+ucZh6nI1zmWyTNIV%D^v7D!eR07m zwD7lr7ATPxBavnUScs806d!s^eKa+90mE~_Zn1aE=Q{1Znlqg~%eT(<(*6dM8n^!V zI-HbRqyI9h7ZWMQa{)(SLa>F@?`U$M`KW-_9gE37E5cL=$o6-YsZeiepC zM9TyQ*3*>?HrlTzb@!itmd$953JXE-QjC-+S2XyKNQp*haOt8Uv;O-?iLE%?ZAKU= z5rZV|!x)LQNQr2Span?eJw(rV*vyY~2ys10hXjo2wT{^4FTi)=pZh;;6jroIi5Ni< zQ9chHb1G#%Y~oU{bJ*0Q%kS`U@^Jp!!BB+s;zNV`WgIM2T<6N_-3MI%oHolECt1W!L_-z;>d zlK-rbOO4cj6)uqm=lTB{F7d?W?=<7Xf(7Eg9WK#hxbN@b5^2Dm>mC;NBf95;1k(PV z4ENs<*ucQ9P)>V2UWLH3iVeCJK`QTUl7dvaX?Cz~THwvUTQqu4kVa3CGKtEoXwmqw zgl5v1+VJ(`D%y(tOwVIDyxQ=mk8T!()-WAzeVPd?`ErWeT%Up2OSA_2AFOUsO{cFGz(m89Jc@1sd{BESwaKRdc3gQ8x5d8z z5+48dtw3tpS2$#Z)@_FiPo>mOJ_V@GIoW?3zc^UGHk+%Q0%@VEhefI>@V#ui%kTgN zsI^%$LO&_+=FAU6+X@N@P0gA4PEkPVq_W1h7ij5st2wf6?qpoRjh%0Zj8pHiSF($C*>NivMQi|!W7 zr@-o-SdxipgYt*v+XOiLO%sFvk zkrae;_u{(3m<%;L-ZYgoQ$S3ia>+a7&lj7odCMk};RAVjz9U~U_}(=tsOX@;krPf@ zbj8WQO0!Iv+QR7$FH44QB5iYSItuv-`P^&&odtdd|TEW=g6bLxIw}f z0UV#E11*DmSCV1vPWr@?RTMC`V|DbAOa_lwVG`w8GO$!%leC?nfU3x9$>$s?u)4ik zAo6eucp6SO4Mb!6j2nNO(WhV-pk*ufV7=YE@df3~DR3ZB<>~Ks3RqC;*YwR|za921 zd4-Z-5MsqDe!k1z~OLr&M1 z9PGbw<|Pvkl$*~F`DUXclfkrTZH43IWcaOirklc91U3l=cJR0sL5R@7&xaTaVegWn zjO)_{Kt8nVu8&e7ygsa)QKVZ45-Pc$o^CG$Z<(U=_g57{MndcH?HxsMdUf0nZ|?$# z(q_BNp;8FXiHwtFn+sv`O;9tPS|QxnbMNR!JXR{=V;oa0gnK@>&;CSu;k!k?5uS(J z`(n={&7?>#NgK}wDI*fm@Hrb^i?og!BCR{s`gYI9Y`9p%#jV?t1Lr=EWXmz;!0Nb! zn&l}u5O6{E(y(_9oG^;cE5>kUU9SoTK3C+xv$KIqW=J_8c)3%8jOA>130f!Ol7r#% z272YjvN4>FcB1=SHrOn;b6<DHO8SnXtY$z~E6wN{Ef2}gr z9_fQq3P0{)dq}%F*vWV-Yxu!ag(U}KrV9G~FC4GzFR@B-8Gwo)Tx@w#XEra2l&RT}8KK4bcY)%Qhf zu-wll#Mtg6WWvdg@gw3hnV=WJxsA${1z9uCwax1?f%R01-IljYm2&A?uvZ zuJdeJus?5=M`=64^gkCk)xw3xUZgJ+9&V~59b_eM*vcb%c&QDY;2NGlNKmDYa z111f0F2Y-LfRVI4)RQ>}Zs;B#nmYObGzXS0e-waJp(Bm=-UCqR6`XT9h%ouZrT8}P z0VE5SaQMzXfUK_mYrbKrQ08;+k(GEV9OP+dJ+d+tI$qJAS++VA5-+cMV!b{IdZl%_ zpG7A^(GJpbwU>!tz$IY4%qJ1USlYg^a7cuiRSKIMOcLQ^N50Gp(?qay$Yd_voCwu? zwqNvA6Tzsli+vw|B2d=$9neSGwx^r>1fJKX?|6RwQX&`@B>D^`sJ4XjgO5ON3V?--S0I)eaRkpz0>Vg_!f* ziUx^L+pdw6p`Qq))l4|^6QTEG$z0BnM3CLMj;VfIBHT(oGHSO4^WAi0+rBjsrv2`G z!SDV+gRcMaah*i4jkr@eE|>^BIw5u=NI%qh_-!>#gzXChmoIFyI3G0kDue#Ea34=dU z`<~!@8SLINM+?7r-Kb*c>i4yMtORnV(Eq@dCt6?j;IcD~FzJd#yd>axgcQig`*`4#`zMaS9ur zgTv2vJ@d0=kfAnoGqj-$G;)u18Yw-6U!$R1>%JDi$B_N2y(CkClyzV=E&Slt%H>kK z3*LbGJ;5(>mTfS|HTuR7-_b~}zhvhi`Wl!W1TsH5*1<0MUYYt6| zV-aHi9P~ae2^`}s1Kqus(v*KbgE6Z;N7JAb3KPAnbsY;q!|Qu*)!A%l;rhPyxgF}$ z^u21*H?m=L8PDuI)?YVhWKC;_+m0pNn$n%{q%AvS=Y|d_x!@Rm)H@w$tE#%Kxj^Ua z#Z-Aa7p~W(I4*mV19w9s{Y|BEp?;8@H~$^=zPW762JT$AeZWmJ{Pz<$=b0?4EtCs9 z+(|c0cNV~M+sylK9WecjteWcT0w}i+R}s3G1ATXLM)`i^KzfIc=Ez76Z00|B?abzK z(2Dsyyu-L0__WntKi*ajiI>uQ3&fGnyW2E#ipn8J?=rsDeh$*rY^kT2UcjAi5AU)Q z<)FM~gCadr0hL$Z>nqBkpxHh*o96}Wk;%N1lWNdT(bf7du;3H<6PsJ03FMP9|UE>wrY_b8Eshq%ww$+ zo|&ju#?sFOXThzh^w5!OSrGry=lTXGT$f8)aOHl^g0z#dJz77rU@|}cTNPIhP`KY| zz51F3pGu4z>c+C5{5$y1VLmZkwJ$7RvVe8&w#KoUEO;-mp-K?*b*gNet_;Jlx0eU1 zuE%A8y*N*!#GNeAq%?#)d5{J9R|VKs;PHLgNl+Bdf*%?xrVr(^VA^XiMw>qiE~Q@n zc=ZRa|M~5T-W@3b>B4oHYbQQ}V&bm~9AF5MQ zoX*|IhqtXFQ31E|!Qu1ATSG{X|C)1XL7K0rpkm>l4@Q^SP8NmbgB_!_+l+5MR50sA z>`cmsTX(#+JiV9?v#Ro2PoB>Q@s>LeqCE3K^n=&syNda+(;~@yKhj~_GO>}CJh(<} zEMVT156&_-oAs;mV8n6tNI*hjzlNhmItyN~Nf?x4*yigp zT5I&OGGL~jv)eQ^13HZwgul(F!`q!BriGp9AoD@VuA(v>rW=Lo7(CJ;lFQY~V^2Cf z=1wtn-I@-9Z4=dY8`EJ6{)OzZ9`4Mono%yw6;vv+r3<_3Yc*~U^x*wi4 zc?c`CdF5v@%q+uT4>>^YA#CG4{>)YKAsF-?QKRR62-L|B=h?rdf!`*+yVf&lz$ts| z%8?IgaP7j?@k4ECP=E6<{nWEGSb3f~_EvEkcxxrCmn%qvn-4nJjl9x;ot)lW>y!rJ zzVG$FnWh1KL6(Q77kUpI6gu&ucR2YDep>wOT($$K0+yQ=9o-`aXyBZ%pAggnu<7C29m(@{)!tL561!hXO1l^cfz z6^2FMtrv@h66@X9FD{D(zmN9_ zV2g#z0kv9LjIofgQ}{v@eJli-y6!jm9Rr6vO?>Nr#z0JF^=6-$7kBsHU?Hn&V~NAh=CyG+DvbxOfT&Fd9-36fc8?<(6{9yyM#y#6fSF;-SssZ&TkLQe?Jlpip-`Q4Q0^~KXka)FgqI7Kl^b{)-M`F zy6;CypN$58ebViVuF+7FC~_jhG8&#{nnfHr5)FriY)=;MiiQe_4RYo?FyEE=18&=* z(bjW>cO?e(9Ijd-Vn!DYtkVOWQ$M31_?3g^(UB-fQPA)Y?2iIU{hP_so+!B9sk>ga zI|_bMHuX2ZjRN^=ZCekvM8Uu%_I_xJf?B7p6}uXuKswwgjju8atT(e%YE(qQ4s)TM zPl}_UVM=!Ad|?!9+H1^WnG*%dqcO?n9!9~M->Qj)Nm0OP@$x?B%_#W2BkZ0}KoqF3 zb)S3a6NO=nAMB8Fi2`2A-mx&tC`kU~COlvi1&*<$M}mZ-;F!jc=5F>Va5G͍> zCd7reQClN{Z?e8Vwj~m@KODCnsEma1`K8-FJ&y$PiOt`%N+N-BahteYK_n>ebQ|Zm z9SNG{BBIhhk??T1{5akDNH}&!Mp8&Q68JkA8S<1O;q>|kk9H_TLd+JW9cwm3E{58ouT1^3;^`L0#aYgw^6-1RxAWTX=B)@Q=g z;%9bc7qVb4uVO;z$4uy5D^enul?BPB_sYKE`bXx#nd_#g-#PUoZ08SWf%U_u%PP-h zLGZ1T(tOh_Ai|{ehitOo^ZCjHx0SLW?lMat=jJRBjnL)1B!vC2t=TH!fa{}OKV}5h z(8^ocI4Yk7x}6py+N-kQt7Q^jCDPyOow*12vLHNAvFsuCM|#VTJ3K3KebV{mumn=4 zW!#+;nC^lQs-3b~Ab(oqOUlbk__`Z*Hz#C4-`ex;x9GFsk(oy_V3)!Pg*I5;~E+-GZ>D%Spd6fq=%XdC{)BSpXFewMAmd2+K#p?MWc*8{cyk#D6 z2MlrR%ICq2j%7Q4Y{-MuHT@RW2lF5&_UI*dq-B}Gl5B_b;8tbF$LXW_kb6yQZ_&9t z^pe|c&SjhrP7B7dJg~fTa&Papd@#;1m`%Q%2deq+LcSlybp=z4C;N$f@RN}`VQHEV z4a|e|f>rqtlI6UHyDuM(y*MPWttTIzajlXH^1yWx=PxA+IUg2G>PTr8*XI5CfDdCn zn-1gyXYIlMDb;+q$=ukGn}qG`aiYA`&xcbMQXh`s`sU1}RdAEqK%qnrSRw5o>B;9SaR}JSt;1ZN_;leF9m@cDY>D~OF?<;=EobmO2O@8 zU&z3oQkbyb0Rl+9q-Y;^l)@K!uLcTI&4rIVOM&)9hZ!Clrz+QLV7}L)k>_3TSXlG2 zb4y$)Fr1P(vS(W<%*CI|9d#{*$L61+%4zjcdSwQY%C24dN-44g)Y;UJwuKY}?aOb; zuwpPc_`Y8NuWt*T+u1`Yh4d2nWy{(M!Qf0*$mqRzn6y%n=)M~d9@cBtj-H8!FO!#R z_b0@|hsRQbKCua~EcU>A+0+CmyZ-X_wxk4j^qS>qNmv5Z?GDr_&qx5}sF!96R}w(x zdr+C}k_0G@e`d8OJOPaCSxiq2$3x$}+Z2j_0>m?oAF0}s0423L7uQ(h`IqaGPkAQ5 zoi&rf?;R2VZJpOwk*=;Hy%?R#_zzxO75~HG2>+cI*Lt*d{%0>PzN-v>ytbBpRb?JV zColBan)(-at^ePRo&T4`Dtsw?w)4zO;e~*O3+^slo^-4ire6DtUZLZ;F6P6v|GK!p zSjhF&Hx!z$OWY=-XA*A+9b>5B{s5_P6Es|Lh12my&@tY($)Q;`(Y(nOO~)N`?lb$^ zgzKvV*Ct;7A2N2z{NG^g{Mv-Z=s(6zn&I)E#!i|amY*crIPcPon#q)AecAlM=3NSv zk}U>G|8TrgAEAWqS*uRZtnsNebT818798Htg^y;(=GjiaT!$acrn*uz7ZaMh)ox{K z>j6U@sWu~{g9q;(JZ$PtI<#QVG(A5}LJKFlOIZXTyoXjut9wUICEUG7b3l1>l-Hh~ z(ILO>&@o5apMWF1{ep$_OeM__TBE$m)Vk7qumZ{H*`Eu*4iu;j{o)f>uZ&xVpFYGxheqB6Jb#;JLG7k$^-;gVw zU43)oD^-ag<@e_#ZmEqHO5E1`J}YrYi$hH^SV!QzWQg9nBFWI*>T{A|`}EbM?jAgP zUh3WvyCSJ@Q_nf62(v&n>HAiZ=cOZU9~MbRITp`-_$y!`EkfrTTGYbsfK#!alQpN} zF8^Y;iTCEzw@J7r5@?ecDF4bP>DE>bbluZeABp)-0SniSrc&;qx%l4(EbNbb^!VxL z_m46O2Ep{Kg)p1=C!1XEC?DvS#5k7DmQ*PaL}$7`HO1GBQfmIHoW#G7dg*JjpoDz$A|;MUMRpjGgG+EChEU;=On}eDe)U=60*!5={%&Lgpp(I6{epi zKv87oOv?BKsK{ondhvb&T4#GiB41Cyy1u8!IBO>0L=B6w{i6v;i~3<>k~9I*ahoKj zq9;HqCI zuZ%vzmQo>)CjO7GBPH&u&g)4yUZ!NpH#Y$$x`kJ4h9}_pMmp2>_6eB$HOkSRIsta6 z)rX~mk;*&COW03m&7{r7ESCS&NAQ_j<{oh5Bi!0_vVX4~ z73f9SFb|21j)j@~YYp=hc({J>YRkb^&6wG&*Zc^6=MJ4^?w$mFE2>;h$t3K3Xs$FE zISJ;x_tjq@HRJB0Se}`LQ#-TE-$7fb zxS}x%Xr@9b8_voQo&K!HWVevzj71;w%_|S&OHjzlUvQUGLFI~tKHHa^CR&4 zWrys$*%1)>&VSf^W(0O;3?FUyJOYA4<=!t_MnFzz@{;$H5%9{9oTp0}ff7BzJh_Mw zC~mYXUI3`EQMYLdNFT174c5A-dyl~JuSwY}k$SM$XWzO! z0$eHM=*}_%Byriv*X|?WR#e&a)@=l$61qcIBCS%qGWPA%2uMU-h36L79>>1Y;S(cJ zu{STbYW)bXYjxN@Tr&bYRCCOPSB*f>*&}`rmydu+%J(g!J;QMPDBpBQ_Aq38J}W#E zISke(8&){nAI5mpk^;41!_c{M%{-6)Fwo6sh5a}+3~3(3DI@7&AkKDpKH(jPY>hC- zV~oSlXBwt?^v4jaWpdBi_hksWi`*)MJ`cgYF8(Z&@gXP~{Ti!3Is|=v-d)c-hTyfW zk;ujOL!dM0^q9GE2)b6U9Nt$q1d}3@4jgZ(kT+pDJ^Y#q+taVaCpA!kcXH!v&u3Iv zB|9xMTS|q}bNkXua;czqEGM~=LIn=J@eSHhR7hmrCqEiag`U{XFIVrOAXZ=baVUfe zj}p|+zYeAXSJMf<*qc-k%E+oVM7mnKzmasE3ROI@S9yJ@VBgv)wcd*gZPk_GSy!nL z!2PmAQeX2(+^b|Q6bgHRHt@773$V*j#o3J!p=P>$Br0a z{b~CW19nkidQAF>FVZ8E(y}{{S{tml_SC0BrozxbsU8*L`C$1nq~*j+I9UhFyLjQ& z=IvD2cltej0+|Z4*6jHgk?QfeBtOugLO8$eRyK7i++KR4%|?|9bkmmO<|yVDFWi~#(h6)>g?o%%op~5D&5sOh_DkM~vO#kMj zLUioJ@2yLyz^eY5_xO)N$b2_hoI5iJz7B5O)*l98+D+o@`o2N9Qy`R4*E0yeZ+Jv* zeLn~qv3VQ5w+%wF`6;dDra`#b__e;aVG!EG+q`$z4}y%6yA^ZYAcUwSI1J_vLhs&t zKl3ES4}!YsF|WvIY{&K&t=o_eFpkHcj~E0gWq*m9aID9DWOmKnLAY{9 znfqt(AQTR9Na)@kgbke=zm)n7f=DIP?kB#3a4o5l|FHKU#LS$ys&ruxR>ZxCOmQCs zwLa}S1!v4hZ8|J!j1Wd|d)$QYG=u58HFE#||N8HLt zqA-xE3*)2Ek9fh2c$B_L0k1mXe!x~o%-Yt;O3u;j^l2+gF$)(fOIvdXD=}9qH?gy} zZZ=}JPI9hhj;ArnV7pSJ5*?qjyPKS|wH!%R2w_?14E3}RSliQGw>szMVrJonMOeEy zJBqowJ35-VoELL-Gjp?bb+ff_-As2`o9?8!nJZ>-b`o>3I_>O2%kJiUTFl1Q4WAF! zl4MD96ORIx3Xx7?t6A`7M8|7DTW6f4XIMtZ#YXo@l`H=Fb_P0rZU%lnF$D#)O%~<~ zR+bo3Q%T8EWut|;nX0*hf~uKOn z*ti(0Z$#K<1YK5EmfNU|p)Y8Aw2OKEv{YGJ%kYGQm6^4LyPKPp%LxN*gA)sUb~>S? zj`-II|EZ41jcN*;)GZv`5icA|J0ZuT4EG8!fVu z2G@g~rXgPatxzT!2mvPJ|L?yie@RMI!(=xJW1uA;`cnwoa(8O0{;VMu#3r)0cDV_) zbKx`>o~GyLL#EQva-aN@n{2RM%-q4*!d}eM*~(SS$=OZJ!r95q%+^WF!_39j%*oAF z%*;*9&Bh8P?~J=Gin-YS|n{yCw}Np#)XTmo!y-*QT9>j@d9fr7b_c?ovg9tr%}BB-XVGvSxja{LQ7s8Ha2c;PX_BGHZpeTm;81m-OPXm zru}ugf+SV>v~`TuU%U{1Qw#lFDF^qk*x2H!Gb$RDOOwP8ymlMkByDPJc=IocW3Mn9QaRTLmWuQ7k8%|NS~P&_p4nKHxgG5g}dwCGb!vZ{tWuDkjno$Q!>Xw%KNK?sXMHsXCjNm{m5qh({CKLA9A`& zT~C|2*q?BawIwQ*8}uTQ{T<8{_QE+wG*Y^VT;rZdMUmS;UHn1Sz)2A|V6`l8QC56}w5TwOebK zu!w(z5X3)h2_lO~C?cWXJ@>tL@9aMJXJ{KF$-D2p?>qP0bI(2Jd&^oXS91MZ>};j9 zIk(c-g1_QJ-dI34Uh40)sz%)mSp950P8z$%F4;Z)!(m{@dhJraoVSY-vCgr&aiPOS zi9~_ba*5k$>~05}ptg?%>8ZdhMcBEXyaK zwYs{jZ29>@!75mrM2<$r?cW3wJM4;+1)c!^3eLG=F5~1jI*#o~6Teofban`be9tOn zoqVq_-nF+SY%bbU$rSSC{ugi>ICcSfI$pP>9S&=Wn2`cXlXzIdM8VqwbM&cxRg^}e zGRhabw_4?&*}k{MJQjP!QioOET+CT-NE?@E!v(v$am{x($r}rMD-JB#YGsPso8-__ z?ogga1ZX`Du}Z09gd}%|=3vSg2Q`G2&s!_3e0Pgmju@rHJ z;w5i8KXUBOOlMv!k~DL{qQ_{0$yU z#1}TS7_qvB>Ma)!v)%0SGwj4N9&2wkQqz{1)7PrV$*6#_#D>9!{~C=mcI$jJJ{U6i znP450e)&(0%pk4)uck!XjqLBIwFEw{3am*dy`-v{yrLCWAI}M zH89cgCoKZUjQ#@@JHWFAS24f@N*QQ4cD-1HMrj{t89}>Zf2yY=9X8wl_!B} zveWNJ<7tC`m{x9ZQypQBEoSd7#NGDRC8;Ntj55#dM#NG|zE;zg{!t`4U_EIWfROk3Naoms`_je@X zF?SXdP95TsQF4C=qC~VMCY6LDO4OB(ef)1CN{;-2qU7ibGm=8LsiLi}5 z$H%PpRy1=e4c;S*#{je3;Ais!W@V(VVg1AtolbH8s1vDqFv1Q;@CrZPm1y0Rn!d3! z=iNRAEbhVUVsXb||84BD&tE4$Pz+5jE658&AvP4kgZ$*{A}vx|X{tsufZ`-kd~`V) zUvKa?3o1${l_o@RGQ>`Y@Erf>=ZI6$o|+NStFk32?J9uhJ>oh1N7@bJ;PHYdpw=O# zOSSAlgpU^xL0+qC`u#8H0!HYcp`RVzsecN2S&-`NXPAuq^(vt23eN=F?6Dq}Y!>e)Wpzx6=~5#Y%mUie{Ywxt3_10vuq%vi?8KBNdh_lfM+uu@K6)ZkUq zpIB+bm}^VewFo;`!|`f6XZG&4Z(8!PgF>ut(}fVuiPceRj`CPUX?5% zmjx=1%GbXomh+MY{j%oLNpq2%fW$uc3cK_Q#|!+}BjHX@3^IL%h+U~k1q=uH^8(p$ z6E=Lu;AqpQvLUboktW^kXXE|ieLiwCT(!qNQA$)mViU@=;*ADaG)fkoxI!xotw0O# zNnWWc#Id#m{|qxo3mmVtw}!o9+!wg~V%*cf^wszmcoS?4yeP(@N{HIR4e&|4lj|jr zpIFO&y;i)*@BOkC$A!?&+^XqTO-j2AnDoFuM8KW>GqoB}K}4>YhTKyFRrtD;7)dUC&%-Aa2S*^Pxn*xE#$vcz;M9) zhscDZS1Amk^-N{Lr1}fCjP$YVeH<_HcYad`TRe@?jCsN%J>BfrhW;Y}3 zOby2){Jq~rMfEc$rPG|NvKrvHL!;sly$;(SBC8HwS-^Jl7#lqwT?zqpwQ1T;cRJIX zfiRaQjV=uVr3C58tZhy=r}3R^)ubm%dkSgDd62fsX~*tM_dqnuWy%@zeYQVYhH95= zOP0z`C0kCePIl$3Lavl-+qpkk%=E}xO7+77>IJA%xGq)-g$zF3CEEaDtxkIA$+rCm z4y>+55fb6tA$)yi?hs0RoI8f6R4Rmgv21l)4hClE45@A^0C<1)&fJ`vJAlwl)!R9{ zA{KDQ=1Zw{?(bKtvkIrHU}r=O&d`ez#>?OeX4cCwjRvfcXK7%SYOGLZ>+S%dS%J+q zbSCK1K9HXYGLDn!$FQqQ)0UM7x`yaJ3te=Nds3% zvtE4-%GJ#k*ld?dm)hrzBGmc0gD62?Va(b$UgI_o2=(D$9w79&!GC}1SoQBtai-}l zoew+fAoM?9D2SN_%&XWl^VvwWEIz3};lQoy-<`r86F#Q+$jMII#!q|{F)+iN3?#jc zn+Fnov{CQR%(1H82>Q-5ceF2|^!R@}^yXIrd`U;!|7-`Tk8dg{W_AX>dcZJWbj#^) z^*Afs`g=uVy0IzUv?A46$ro}f)~&Oe*JRhP*?^y@YPGU0P1)vbSEe=FytZ?l)z#eG f)Y#nAl-;m)6+ZakJNkd^k8%51{P5iK+Z+D_tu)gC literal 0 HcmV?d00001 diff --git a/tests/gentropy/data_samples/variant_sources/credible-sets/._SUCCESS.crc b/tests/gentropy/data_samples/variant_sources/credible-sets/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/tests/gentropy/data_samples/variant_sources/credible-sets/.part-00000-a9a641da-3820-4bf4-911e-4d61475173d0-c000.snappy.parquet.crc b/tests/gentropy/data_samples/variant_sources/credible-sets/.part-00000-a9a641da-3820-4bf4-911e-4d61475173d0-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..881413c1519d66412a717b231d6dc96adfea7b28 GIT binary patch literal 168 zcmV;Z09XHGa$^7h00IE$K6+zk)A1cpM0&M-oIn)(qlS0C zMaJ6lE@we4q8=TPu;U{Ih)LGCGsD2uz89-a1Mf(K6y2cGe;;i7{7}Di-(iH2q**TZp&bc|M4=pfoOS3rT;&8(5 zWSuUS;@vDkkhF_o9ca-Q!p%|y0mE&S+i7#!-MpQ&Q3TF#q?^M9Ro*i;j^sFoaXIWb z>*g7nBybz=vfEhF!7{j;Mf09vIEH0N+DW-_2QQEgC&#%39A|htMRP97hNe7&6D((A z><*T)yX>T002qRka#1dvA#j|gpbIHDX_t-W2!^w{?Jk-kcv^5V0)@K-9>;m;;^JK# z=OTH=Nsu<4v@;aW*;$*w+j!b$voX-c&f^sB<^j2lbn=9gb5gWm=N-7yWy77ELz7bG zBzc~&v$Vi6tlh!5;RlAWg5BjHSXG{uBppQ3ivyi52SYe`Cr>(Uv_LsX7jRQxRdb(V z+%6kS32t~}ce-7;-N6w!&(d}qAy72w1im^shHwiuj-^~S5DGy6Qv{Nu+wE|YB!k;|3b(syk{38PN!fWj1AG+#q?2YDJMLsS2QU@pO*!38hv2XiJSU(j zV{n`$1ds*I;Z7SMr&->?(ZE!Y2E0n${EWlt03Io2e6i<-5lWI zVCtm9ZDV*BPp~{ikQB=c4gsj9c#{ zfS=m!3+5}buvVB7K{~|4FJ$;rwsKUhb&aYhLe&Gy;UgSc%N3v#Um0Ft#5EC_<)1*|8| zSxJt;36iC)*#(iaXx_>RI0LH400Dp?MX=JmKmblIIu<9b98J(PtX-?6Kr?d^iCY<- z!x@GUBzmJ^RwPUdG=ryD7WzbPN`u*v01~VfgMNS>SCUkcE{+ulpbA||N5K%vO0y&Z zkO{OW8WT@jS)L^s27N5cS`Z2E86cPh0$6M0P#&mfNRs5Czr;0^P2pg|G%0|Jql0OQ zvAh*%BnXPbvEnqr%F{T|!_d~qL;OG(4Hl6jctRQtf?A}&j-c3%1LxK#Lk#ZoXqs10 zD8M7o)=C0bg=@43snQ~pDjcEVV&G!o;^5-p5>SikVd!B|k_37mLTX|V3QbU{5E^ZT zFsKS)QB@Wv0pz7$g7j-3P;da1DEMM+&9Sf$3f#>uF9lJ(+D9gTt_&|7b}nqzq;|^2 zRj-6c^SmPKh4spvhkis3uUv&12W?k=SNM%WHE5)=bnnElWxaNwzZATOR+J*)yZK#; zWhwV5d!I2NJwJ`noJs7Y=!pDF-SZ-f&d7r-g$H;m=jK*2r8Z8I2=|t5#1YSGC7XqDNiD~e$#KAi`gFiOazL8EanTIn>vLG+p( zKp967G;A7NI>%@r+tG_4|8pq2+s2VDmGX~4<$9tgcaUrCCg03PAJ1DRcl#ZE?I)$2ZcQyn6+ zYnp*xtTj6Mzl>jM&MS_im zR%z-4Smc?GqDm{c9RzOe&$jP^~tAv%^}`2S;1xR*dnuMqBM3TVaXSRw7|5^;jK+CBrL=Y!xo6qZFWx zNqED_gUu8&hI`7aWQL=(qyqZ8oEgBEg7yVK0yst8VK)67BZ>=?!jqH;-(HzgFoLia zyNXLaV-x!3OOj?Y2(VU?QT6I&v_?q_(d$yX87fAM^^_Jm6FTR!V12m^u#<#jYzYvr z0KC(I-jjWzJ8K${5jkztwhB*DV!W-a%vIvF4Szg3j=(`-8PzF-=y?jsry1BfxD+7B zGIn@n2{4x=EW_h^x+*=TCB}sYu-`N#^kT;&dF3Nv;w7aO)^h0Q8dFwU?n>AfM-mL! zarZdHz)%#&^SNw4r2RIn!&d1`THpUf9Ko<8OGM_!SC*AmcwDw(VPzamftBVn`zRg< z=ZX^G$^ioMU1fzMlio+Tbc@zrR9c#_O($7@vOSKFj}#(0hGuXfVNh?HW_VV31V)Uu zl~|oFYuCz>3e>7(tf{B7%4{VLz*14@uojjVxhjlDx{<(gLdYPjlKBUZlMqm~8dg@q z69(WdxCjYCvTxY< z_hC)XcRqM;XtyghF)N?3wAP#zuN>_%wJ~*eO`Bs2oC$OOTeGp6e~9aIwnqHtkS#Su z*J?T#79Q1%I9K!I&c`NR`^PykW8nJMV-}yOIeUJ>%$2px;`~Fk`}VgzE514UvAmJm z10wz6Tl5Ip=Ks|n&cmarRb3`^b556 zJpLGQ>&4SLI^KC{5s zG>e12S!ur1=diiM%fe+;F(B`HMrt&FzQ<=J#J~G`rn<&C*!riJRuC_>;cK$%er*!_U+qEa9xG# zJX}A)bs4Vj;kpjj&v1PQ*AH;L4c8^OuE5m{7fdM)>xiAW_0cb(9$;L)SJ=u39a%7X zQLEP4Z%>aI4V@ptGO(H>wL8&}*<-yWhcX^fwMGtBrFL>p`*)Y_eW!JS-sctAB?b0% zi3zMkTe?oHiZM>mN{s$NO|{N^1oVnV_03B78? z`_`;p_jTcBQT5gf8L5-Ei=Rc;9z46}nE2n1iwB$-{MX@DO=NT#71$`cywpkS4YHb`=}<@ytl-1Z(>_VuC*CtHV1 zCOl5&6atcgmGiR#e2yda>MTqx$!0G)HxXFVucFd9HnFJGQCXho_LLSUIx0P2sGW%w zrDcgD3M-f{DM<5NFwVu2b#x{^Rah~iw6Y?xuq3_QR$Nx(O3d$;=yADS9#@IOWd?}w z42H=uGSOM;Do+GcnCP?biKA_>_motWC)z3!D@M3r*r>`v4*>1^1PKiuSNVw2B4-l7 z7~t(Gk9L<aqTcumutDItw#n zt0%30sdX0ggy}yZds}6Nj&eys`c?KTbZLAyIq8KZZa@%^C1TgB>$k~E(HDM1(O%_K zL^ucaSIz4Wkabub_Dc1J8f6wd0AzK%+cJ$B)^<)%! z9adl)71*Iu|Gw5LM11Seq5h80e|eAg-Obkyx9g9PFPKrC`s&;LP<2Q1_fO#FQ-{0r z)j6rCPFtI)>kN_Aq~#o=QY(jjr7~!dH#()ObZY*SPAawf%t14}9=2jY349<24zg0` z^aKtdEhkmo%oDy_(Rug_Z2>%@F{^lO-JEeGs>2hJrJ@;V7*-HYnStJJ@Uw9C^-G~E zCpj%>q!1O_ty%nXTEo#Hwu|Dl=ciUI$-gXVsWkAt{WTjx9pAeLhR^$G#~9N&aqF|) z$2^$QBn|8cGM@a)&38hh27N&glMl{Xps7O<4?%fK+q>eQk@V&6>FdwaKo&*8L@~C9C?ytO^AVIA?~+Y zGE9gLhk#zl6c7lAx(G-jgo;uuXw5cybS(I~5ThXm0@4}`fPn4Y9D@8ktkOF8) z1a|;}TL@K+RvkD7EWy5s=vbDeS=NIjf}aKZFGQY3!2o+bP-2lN0oRb@P`F{T6DbG{ zrQoz6p6(D@UT>nJ@&4 zN7t%i;~3c7VTu42oB$q21=VLNQyc}EC6bG7gboms@Zg^dNIdvv5KdYPG&L#{Op1j( zh6jnGzzd-HByAE2<9H4xA&~iq4jdPrv-C=B4SgYhK~PB}5ld2QRct&ffDgo5E$P*0 zoEn_vq+;SbRjke%9S=lf<9LXlNG@qVHp3v`Bo4tgIRlY^faB2`H3*RIBMG)oXHC9N zU?8jOBsQ zK*X#B3H&P86?n%80>PjYusj`2Qw(iEb2_Dth=+_FWJWC6##HD4sR(i&qGbR``*D^< zD3>TjETF=<79lwf!VnIpEGbA9@gFtJ5FI+Oz>;gp&9%Uloo&f3$Sue&$c7J%5)DTa zbZ}>}pz+3Mb&w>ZINK0mk(MJFVS0M>d0>pdPXERjqXl1Dn0y?&rezs{Tb;zT1&tvn z)|DNdr-pI1ts7ofozp_JinDFeI2NSE(@Aey-a}Jia*#c=ja3#{jH}f=!K+NzOF%tE zr@n%f!hAp*Go2{Ht+<3_8&`t}AeTsxF^HOD={Gt?{~NlHG;4Xo_@$Nxca)4!=pbV& zulZuV|)D#7Bg7pBYPr~v8lADl<2rK4o$WaYd6D+IxQkh*q z%-4Yu({z0lV(C0z4Z7LmmFO}ZO+ox`L7D9^7<80;ObOj+j#C zVKZP~fP^E-ukI3E1k>XvcJh8~0w@MV4Gre#6yO=nbK7R3U#b~i*mVYwCQQ^a5VELJ zqdAZ<10HzB89>0X4Ck5wBN)g*Q{+T25D>%>)h4?!SE01;qL|4)Uf|95w`~0&H#>pVowb5zU=YHSs>K-^94hF6fl5-cAWC#VI;d(4LT`mYdtMMqFQ7IKjU*(Xd3 zRzgU<081N8J&jav(TWG0zfnd8w|95 zW*~O0Rj&~AT4>aAQ2j`yh&iw#7|ZoRuj;_4Go0mBY#z)G^cB6xBmsf2^F+08WgMb~ zqz_}+gr#V~#PIBgzzrG_-=|L_OwjP!Cqfe5{q4C)t?$T4P0O$jnSW`09T_tOUVYaG3oBCClv^{E$u!swX5Rl6#SO zuplh#&=&CKHb-I0qOhscPOnF^utrq!?iNK1TW*bAV5)5rBNfjb$lTQ^Cf$sDvS4?U zh(w}@L8Z=X7RUB(T77LtleiQ6ydS=?Nn9DpSLa4ee5cRtMsZd0x82XKZ4wWLpF5g= zrcrEq`2HkQ=ksFhsBOJx9%&TcW2fc)($p9^@Jdoxv-oZD!Zq95G>DzHDm%ZrxLz#Y zFyvrz|MQ}~Y2?=rmDP*auTx)qakD|JQ%{?>cEM#4&56xVLmI?i7Us8mkUS?o)c1!~ zk^36N$Lf-*vMx4=n;&|%|GBaC;;ln=^6Q1?M8mE=%?sihM9l+_;&s>0i-~0850k!Y z5dR(C^gL~75If|k*E{3SiCwykSeMQ;iiO`MZ~gl8d2!_=^FuS#jp9P`{&6#?^Ww{o z&X_$jw?UkB>{i_R2OGt$4Yot;zpWF$sXO*9n{-b6K6S!?n}ByO@7j{4YZO0OBE+r;KQ)M}4rU#2_O27PWu1G?ez{)EP?Ud7A3rO$Vs0+GaP++R`WK8c0&Ng4DW@)r zZKxBMK0Igso~!4@h&SiXOIcJes#@>6P!-WAE?u{~-A<}e96YuCpuS0sVw@pXn7^k% zthR3J|5fXHal`Hz%VRFqi~9zCa{aQsPP}^Tr<-e-I&s$Oz5QOkbzZ!&tnyaTk@KSY z#Zd=ciyOobx7aRkZfp<>CvP78(nob-Y}vGq-FDTByA9uexIeB@TwkPn@hx40cxb!A z!M{~6u6!hF%g_b&;-mK4GkeUd7q3k0GivY4=f#X6BR-G2)F3*$?3mM|zD|4?H4mC} zxj|e#Zf54ETk6F+d*b%RN9)95)SAyf{EOQyF)?}g;9H>Y;#H(i-2OUoOjPq#{)6+PB{e-`_m>Ug(a(>a+CRBo+&tCXVcImn z^KRF&LnrFQeY%6dEiI!(I==w!eM+69^8#J@}H+*OxlUKX6`b7TO^`FLzZW z+HK_!W|x*g$XzC7U<2Vj4nw8jH^-fYcw{J`F@?RZyO^N-i#P%NCctRC| z@jXQV<&}p~8R<<4scVyj^zeqd*H=88V@}LFO*|NuV{Uun@Q&f9mYW}4e)J`X!Oj1P ze{{L&-E_0LZSPYbZ%sEho5DW3__kn1hwVCFsacl06 z$;&i9eCh2N-4kZB;6$dq*u!it{qfl1`wL3U)ZzR3V9FBn)t{e6)G>_t+01j{x4)#! z<@YCFZF`0_M;?ecvhH7u`BJH{bYp6^dHRc;7q9!it2wI2kcnA8vE~mPue5unnlV59 zZr>=4xs%y3;J#B3{hP1l|HF8?va@+&`7+bOvd-pju47-Um_r0UK^%KkSgi_s>-)3c z!U_xs@?4Ou&1dcurLDfs^Gf|EBEimDJ+J9Kq>RD6eesN+>&B33$sq{)4sn) zY;?9x$)CL*+uU6GM#H~6M^i3{`mw}?Yi;+7Z>GK0G0S;Ybav>yG|YKi>|9*f_Su3x zV$-Ddu4CJFh)+{{+zD?qito?dc=mxUABl^XR6#cMvUsdr#b+Boy)3SJx^LHY%XWxG z_0ue~TkR0>6RD;FpBxB$0^<*9vDY=&H;XQ=3DxJd;Q@V~btm70nTS9oJRw%e@EQoq zQPZ6j@r*cdqM$H^bqcABW3j(m7l&~Wmb17u`q`ER@}&@dgFnhpPz!HSDvxZ3#>Ii- z3$-`wv5=ZMnyLC)6AKX&3^~TPD4$COdl)38Ef`!X*W)d!>6*thu{x;WQP$|%kg_^v zLIhQ;7Ru;CbbhxM6?UryLios7H6%RH8>&$$!E(D1_@m_^w}3v1M59uzKcwiMJ{ZN~ zctJVvCI%|*ST9vf8ZH$a{XEa5LnNaYBr1QA-2kjfET;gZS`Qu)GHl5k^g>_X$A|3Z~v znBUi+0$f;nHy4zxbxBl#o14`ic>~#e>b>FJ+%N@T=^7PQrNY)dJNr7+YXU6&ENC{b z2t|ZL9dZD`(dS-aM?jdMQvr(}eX}a&`Q;U#+ASe|YrqFJQj@ zikklu*nLxp!Tvvc>6hO@l0Crgok4atPnLyu+o!^}B^7?II$tRn4 z{;JUz#^&tH%ia!DkMh2yXYF}nW1xz(qC@n$Tb5uosdmD-y|Z51E}E(z+LZR~DQ`E0_hV78+7wcCnt@_d z;5^N%Q;%puc$ytp4&CJaXOlZk5m9+_y?Z@kKu|QLIS?|VQTdWl@JA2+DApLQFd7nn z=QJG)U4HLuCZ;gz5kv;3lunl!4Jz;9vS7OoPENEq24*z03ED#MxusxllrS35fuUff zgFZ%sQaU#j(j0fMV>(8I0gmK&X0Hx8F){OK)MSO#?n~>qrn_f z*OyLvFy7N1cb4`;2DYs5vmw-~{e^F%LG7}b1>QaG3N1Auy zKH88;K!nE_p4?h#48FSSNfI!f+WgIAl{4P&L!Y$R{M8VKpnc=VtkbO z4J^F&??0i0`}2+PI#eVB5M#Vvjnzhi#glIT6NMG{ZWpz|j)eQ}zrnC+3g3OxeZl+h zjo1Z$pOcB$^HIJ&jTzY9FyDO@iH-K%uB3uP_p#Vue=~;-_1)g$ux0*%OFLm*e9e=@ zK#vd1SWjQ;#}+K#cl&Zztjl_JpssIyZ3pa$W$((LC(?2Yo%} zK82m|lTkfBfN;hHtjyQrTJW}UBDPWGYhExN#lH7H%y0T7NOVz z)c0n}5)?b*A5-@limmfM%vz0NTjg#b%AeEowB={nXpaR59CcqF8c;OL#lnd9-4 zdb}jYmRH*4<0;)-7R9G=K8x9Y8N{hfEW8wK(z%WK>P!>TZ z3R8H&L;&7mASZOH(Nd;NN|eFD#K&B4q9wp>X&yR%?yR4yqDP6-HO5P0Oh|K4>2Lz? z+THs?TUO}-N*;6BO8S!Crq;fuLb*i(+DblS)2{-KQ+m0jmK$5?&nb)sGs*VXm3xc-DWS8WalMHbC(+b zaSgjeNAFZ4>2(MX^tdtJ_;ZR0PV{JvOgL=CEtedo(Bt}Bk^0(5lRAq<(tg*!{Cy`C zEHT6$AYug3uYV84^!goZP;42BnEnH27=a39xY}q4u5X!y1E~Hm3LQ-QBIAJ!ug_|b zJy^up5u#3|q})~$zM6fu1RL9`!3h^>)Pe5}wE|Xie9);Q^VrtBY$6EO~E}M0k*!RQqd&A&eR?rnO1i}H}dcZ*mwuYiO zB%_#Lg<@S0QxzG+;M*-^93K5>cn28#!Gl5Vk0|sHrQM@76hk9m(CZI-F|4HGF|}_F zcWgm2rdzV^tfWA9q-ew-`(={u1fN1-|148KXM?Oe`&d~c<@I>SPA0>?g6e-pP5W5W z8YbQw)fi;|P698*INqmwtj`5I-W6`Zq$b+>4vEcvCK zf@|Sk+qR4_Q{Tn3v{M!fF6$ltlW%UgP2-;?b09HoAW z5{8&gc8X_H(tbalGyLtS@gBl$lIgz!^w+@}`v6V|`fd8%K8DPNDpbD}g^s3EqGa3U zXwndjFYke3+WV6J9Nmm!JYxFT64cVW{1h3%1pN#Yx|!x$Kux8f)S#3iRJ}SaZ1kFT*zsG#vg{a zMB{G_MRP_*v;8f3Iw!jacefD7PsTD>|AgsecUaI;@NSSJd1*lhpF*Ktk)d7zsIa?L z=LVq$yvG2{>Rf;+Mg9gk4Ise`K83=ZT_fq!%D26O&I`f}aY1FM(t>?C?~Vn#TpoQG zM#F+V3Zr4cLXsxW>#(+1rrJ=q6d+cI=_hs4fBzs1%VN{g?#MbA?#xSs(!3q88uU$o zbs%Dz`;;U^$xPe}YtY~GqO6M1uZ)2)roAKLgAub|mtbL}r3K>XVCy{=$RU}Ay<24o zSC0>_^boIF=G#7{WLc}nn=U*pE%CpU>M;D()XF^wZ%$t)a782S^S29e@-p6r=-w z3+p}O8<~h3D*g8=X?P36;?F(-H$&`~zcrMKTA-q>{-&1-$U}szxVz@w!s8#NpA@M- zt}(%wEH=vL@(0sbzwPp$l(AM<%Pv18JS4Bhx6bdvdbcMwO#ib&e^X(Cfnb!RteDRg zi%$!A3Z3N@8Q+3;WTON*70D|3R`{>s^XD17qE_guAZl2InvVYqg^e%hk?8?v@aZ1Q zW6my#!5?^0Lk8!!#O!yRhv`qK^k-Bi=ni7_C-Dq{%$`0b)N}Ud`$8wYPR7~{SV6fU zRrFg}@ABS;>8n)wbt-8bp!X*U4QhYpP}1$YTSL)*Afw;-u8ba%Q@@42g+(2vua-R+ z7z3#N2}pz5pOw@P2sNkv)=<=^WYm{r)cQGxQ9W)_>$k2!;jf#G9Sw$3&+rUenGF(S z8FG#+0}>eJkoqyGs0=EDMhxxm>QO$bC(+f*sdR!Tmf0L zDTUP^be`8Mz4ZQ+j|FjYY$_GIZe@tN>hc)we7E-WI z5ybpwSCA~)!0-Um?z#K}Z-0ZOcT_$Ckdy7DrSN?nCD8B(?K(gT4T1g!B)P&89RJ6J z6ZBc`Rh0Sb(EbP9m;Y-U=ihT9{)e{5|DUVSZ7V9j)2DrWJYsmn=TD~>dlG#Pcp{NO zkQroRn!U2H$e9S={YJqz%;5q>@hp5h8|k3nTgzAnMZ>q=x#9cB;O|sPhd{T7iW?NN R|JVK_wg=(QA>jM{{vQ;-oNfRB literal 0 HcmV?d00001 diff --git a/tests/gentropy/data_samples/variant_sources/eva-test.jsonl b/tests/gentropy/data_samples/variant_sources/eva-test.jsonl new file mode 100644 index 000000000..e83f7dcd7 --- /dev/null +++ b/tests/gentropy/data_samples/variant_sources/eva-test.jsonl @@ -0,0 +1,50 @@ +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV001281540","releaseDate":"2021-01-17","targetFromSourceId":"ENSG00000061455","variantFunctionalConsequenceId":"SO_0001583","variantId":"5_123159516_C_T","variantRsId":"rs1333586171","cohortPhenotypes":["Heart, malformation of"],"diseaseFromSource":"Heart, malformation of","diseaseFromSourceId":"CN130023","diseaseFromSourceMappedId":"HP_0001627","variantHgvsId":"NC_000005.10:g.123159516C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV002030146","releaseDate":"2022-03-28","targetFromSourceId":"ENSG00000160200","variantFunctionalConsequenceId":"SO_0001583","variantId":"21_43063980_T_C","variantRsId":"rs777884368","cohortPhenotypes":["HYPERHOMOCYSTEINEMIA, THROMBOTIC, CBS-RELATED"],"diseaseFromSource":"HYPERHOMOCYSTEINEMIA, THROMBOTIC, CBS-RELATED","diseaseFromSourceId":"C3150344","diseaseFromSourceMappedId":"Orphanet_394","variantHgvsId":"NC_000021.9:g.43063980T>C"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["pathogenic"],"confidence":"no assertion criteria provided","studyId":"RCV000412534","releaseDate":"2017-01-09","targetFromSourceId":"ENSG00000139324","variantFunctionalConsequenceId":"SO_0001587","variantId":"12_88195521_C_T","variantRsId":"rs1057517697","cohortPhenotypes":["Lissencephaly 8"],"diseaseFromSource":"Lissencephaly 8","diseaseFromSourceId":"C4310646","diseaseFromSourceMappedId":"MONDO_0018838","variantHgvsId":"NC_000012.12:g.88195521C>T"} +{"alleleOrigins":null,"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["pathogenic"],"confidence":"criteria provided, single submitter","studyId":"RCV000987058","releaseDate":"2020-01-11","targetFromSourceId":"ENSG00000130561","variantFunctionalConsequenceId":"SO_0001587","variantId":"2_233328536_C_T","variantRsId":"rs1574942567","cohortPhenotypes":["Oguchi disease","Oguchi's disease","Stationary night blindness, Oguchi type"],"diseaseFromSource":"Oguchi disease","diseaseFromSourceId":"C1306122","diseaseFromSourceMappedId":"MONDO_0019152","variantHgvsId":"NC_000002.12:g.233328536C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV000850771","releaseDate":"2019-09-22","targetFromSourceId":"ENSG00000198804","variantFunctionalConsequenceId":"SO_0001631","variantId":"MT_4456_C_T","variantRsId":"rs1603219465","cohortPhenotypes":["Juvenile myopathy, encephalopathy, lactic acidosis AND stroke","MELAS syndrome","Mitochondrial encephalomyopathy lactic acidosis and stroke-like episodes"],"diseaseFromSource":"Juvenile myopathy, encephalopathy, lactic acidosis AND stroke","diseaseFromSourceId":"C0162671","diseaseFromSourceMappedId":"Orphanet_550","variantHgvsId":"NC_012920.1:m.4456C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, multiple submitters, no conflicts","studyId":"RCV001223098","releaseDate":"2020-07-16","targetFromSourceId":"ENSG00000104133","variantFunctionalConsequenceId":"SO_0001583","variantId":"15_44598352_G_C","variantRsId":"rs746116309","cohortPhenotypes":["Autosomal recessive hereditary spastic paraplegia, mental impairment, and thin corpus callosum","Hereditary spastic paraplegia 11","Hereditary spastic paraplegia mental impairment and thin corpus callosum","Nakamura Osame syndrome","SPASTIC PARAPLEGIA, AUTOSOMAL RECESSIVE, COMPLICATED, WITH THIN CORPUS CALLOSUM","SPASTIC PARAPLEGIA, AUTOSOMAL RECESSIVE, WITH MENTAL IMPAIRMENT AND THIN CORPUS CALLOSUM","Spastic paraplegia 11","Spastic paraplegia 11, autosomal recessive","Spastic paraplegia, mental retardation and thin corpus callosum"],"diseaseFromSource":"Hereditary spastic paraplegia 11","diseaseFromSourceId":"C1858479","diseaseFromSourceMappedId":"MONDO_0011445","variantHgvsId":"NC_000015.10:g.44598352G>C"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV000279062","releaseDate":"2016-12-06","targetFromSourceId":"ENSG00000175294","variantFunctionalConsequenceId":"SO_0001819","variantId":"11_66025978_C_T","variantRsId":"rs370953416","cohortPhenotypes":["CATSPER-Related Male Infertility","MALE INFERTILITY, NONSYNDROMIC, AUTOSOMAL RECESSIVE","Spermatogenic failure 7"],"diseaseFromSource":"Spermatogenic failure 7","diseaseFromSourceId":"C2751811","diseaseFromSourceMappedId":"MONDO_0013070","variantHgvsId":"NC_000011.10:g.66025978C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely pathogenic"],"confidence":"criteria provided, single submitter","studyId":"RCV003236645","releaseDate":"2023-06-24","targetFromSourceId":"ENSG00000184895","variantFunctionalConsequenceId":"SO_0001583","variantId":"Y_2787299_G_A","variantRsId":null,"cohortPhenotypes":["46,XX SEX REVERSAL, SRY-POSITIVE","46,XX sex reversal 1","SRY-positive 46,XX testicular disorder of sex development"],"diseaseFromSource":"46,XX sex reversal 1","diseaseFromSourceId":"C2748895","diseaseFromSourceMappedId":"MONDO_0100250","variantHgvsId":"NC_000024.10:g.2787299G>A"} +{"alleleOrigins":null,"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV000671195","releaseDate":"2018-08-05","targetFromSourceId":"ENSG00000170927","variantFunctionalConsequenceId":"SO_0001822","variantId":"6_51748030_CTTT_C","variantRsId":"rs1554218479","cohortPhenotypes":["AR polycystic kidney disease","Autosomal recessive polycystic kidney disease","POLYCYSTIC KIDNEY AND HEPATIC DISEASE 1","POLYCYSTIC KIDNEY DISEASE, INFANTILE, TYPE I","Polycystic kidney disease, infantile type"],"diseaseFromSource":"Autosomal recessive polycystic kidney disease","diseaseFromSourceId":"C0085548","diseaseFromSourceMappedId":"MONDO_0009889","variantHgvsId":"NC_000006.12:g.51748034_51748036del"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"no assertion criteria provided","studyId":"RCV003389101","releaseDate":"2023-11-11","targetFromSourceId":"ENSG00000114374","variantFunctionalConsequenceId":"SO_0001583","variantId":"Y_12842370_G_T","variantRsId":null,"cohortPhenotypes":["AZOOSPERMIA, NONOBSTRUCTIVE, Y-LINKED","OLIGOSPERMIA, NONOBSTRUCTIVE, Y-LINKED","OLIGOZOOSPERMIA, NONOBSTRUCTIVE, Y-LINKED","SPERMATOGENIC ARREST, Y-LINKED","SPERMATOGENIC FAILURE, NONOBSTRUCTIVE, Y-LINKED","Spermatogenic failure, Y-linked, 2"],"diseaseFromSource":"Spermatogenic failure, Y-linked, 2","diseaseFromSourceId":"C1839071","diseaseFromSourceMappedId":"MONDO_0015607","variantHgvsId":"NC_000024.10:g.12842370G>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV002083386","releaseDate":"2022-04-08","targetFromSourceId":"ENSG00000072778","variantFunctionalConsequenceId":"SO_0002169","variantId":"17_7224135_T_C","variantRsId":"rs745620433","cohortPhenotypes":["VLCAD deficiency","Very long chain acyl-CoA dehydrogenase deficiency"],"diseaseFromSource":"Very long chain acyl-CoA dehydrogenase deficiency","diseaseFromSourceId":"C3887523","diseaseFromSourceMappedId":"MONDO_0008723","variantHgvsId":"NC_000017.11:g.7224135T>C"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV002203969","releaseDate":"2022-04-12","targetFromSourceId":"ENSG00000125741","variantFunctionalConsequenceId":"SO_0001819","variantId":"19_45584735_C_T","variantRsId":"rs1131692018","cohortPhenotypes":["3-Methylglutaconic aciduria type 3","3-alpha methylglutaconic aciduria type III","3-methylglutaconic aciduria type III","Costeff optic atrophy syndrome","Iraqi Jewish optic atrophy plus","MGA type III","OPA3, AUTOSOMAL RECESSIVE","OPA3-Related 3-Methylglutaconic Aciduria","OPTIC ATROPHY 3, AUTOSOMAL DOMINANT","OPTIC ATROPHY 3, AUTOSOMAL RECESSIVE","Optic atrophy 3","Optic atrophy and cataract, autosomal dominant","Optic atrophy infantile with chorea and spastic paraplegia","Optic atrophy, cataract, and neurologic disorder"],"diseaseFromSource":"Optic atrophy 3","diseaseFromSourceId":"C1833809","diseaseFromSourceMappedId":"Orphanet_67036","variantHgvsId":"NC_000019.10:g.45584735C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV002676812","releaseDate":"2023-02-07","targetFromSourceId":"ENSG00000125826","variantFunctionalConsequenceId":"SO_0001583","variantId":"20_419394_C_A","variantRsId":null,"cohortPhenotypes":["POLYGLUCOSAN BODY MYOPATHY WITHOUT IMMUNODEFICIENCY","Polyglucosan body myopathy 1 with or without immunodeficiency","Polyglucosan body myopathy type 1"],"diseaseFromSource":"Polyglucosan body myopathy type 1","diseaseFromSourceId":"C4014605","diseaseFromSourceMappedId":"Orphanet_397937","variantHgvsId":"NC_000020.11:g.419394C>A"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV002680839","releaseDate":"2023-02-07","targetFromSourceId":"ENSG00000160789","variantFunctionalConsequenceId":"SO_0001583","variantId":"1_156137732_C_T","variantRsId":null,"cohortPhenotypes":["Charcot-Marie-Tooth disease type 2","Charcot-Marie-Tooth, Type 2"],"diseaseFromSource":"Charcot-Marie-Tooth disease type 2","diseaseFromSourceId":"C0270914","diseaseFromSourceMappedId":"MONDO_0018993","variantHgvsId":"NC_000001.11:g.156137732C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["pathogenic"],"confidence":"criteria provided, single submitter","studyId":"RCV002847567","releaseDate":"2023-02-07","targetFromSourceId":"ENSG00000075891","variantFunctionalConsequenceId":"SO_0001589","variantId":"10_100750706_C_CG","variantRsId":null,"cohortPhenotypes":["CAKUT WITH OR WITHOUT OCULAR ABNORMALITIES","CONGENITAL ANOMALIES OF THE KIDNEY AND URINARY TRACT WITH OR WITHOUT OCULAR ABNORMALITIES","Coloboma of optic nerve with renal disease","Focal segmental glomerulosclerosis 7","Optic coloboma, vesicoureteral reflux, and renal anomalies","Optic nerve coloboma with renal disease","PAPILLORENAL SYNDROME WITH MILD OCULAR ABNORMALITIES","Papillorenal syndrome","RENAL-COLOBOMA SYNDROME WITH MACULAR ABNORMALITIES","Renal coloboma syndrome"],"diseaseFromSource":"Focal segmental glomerulosclerosis 7","diseaseFromSourceId":"C4014925","diseaseFromSourceMappedId":"MONDO_0014451","variantHgvsId":"NC_000010.11:g.100750708dup"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["benign"],"confidence":"criteria provided, single submitter","studyId":"RCV003621924","releaseDate":"2024-02-20","targetFromSourceId":"ENSG00000008056","variantFunctionalConsequenceId":"SO_0001819","variantId":"X_47619585_G_C","variantRsId":null,"cohortPhenotypes":["Epilepsy, X-linked 1, with variable learning disabilities and behavior disorders","Epilepsy, X-linked, with variable learning disabilities and behavior disorders","X-linked epilepsy-learning disabilities-behavior disorders syndrome"],"diseaseFromSource":"Epilepsy, X-linked 1, with variable learning disabilities and behavior disorders","diseaseFromSourceId":"C5774177","diseaseFromSourceMappedId":"Orphanet_85294","variantHgvsId":"NC_000023.11:g.47619585G>C"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV003636240","releaseDate":"2024-02-20","targetFromSourceId":"ENSG00000143669","variantFunctionalConsequenceId":"SO_0001819","variantId":"1_235744114_A_G","variantRsId":null,"cohortPhenotypes":["Chediak-Higashi Syndrome","Chédiak-Higashi syndrome"],"diseaseFromSource":"Chédiak-Higashi syndrome","diseaseFromSourceId":"C0007965","diseaseFromSourceMappedId":"Orphanet_167","variantHgvsId":"NC_000001.11:g.235744114A>G"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely pathogenic"],"confidence":"criteria provided, single submitter","studyId":"RCV003735165","releaseDate":"2024-02-20","targetFromSourceId":"ENSG00000196569","variantFunctionalConsequenceId":"SO_0001575","variantId":"6_129453132_G_C","variantRsId":null,"cohortPhenotypes":["LAMA2-related muscular dystrophy","Laminin alpha 2-related dystrophy"],"diseaseFromSource":"LAMA2-related muscular dystrophy","diseaseFromSourceId":"C5679788","diseaseFromSourceMappedId":"MONDO_0100228","variantHgvsId":"NC_000006.12:g.129453132G>C"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV000797080","releaseDate":"2019-08-14","targetFromSourceId":"ENSG00000179348","variantFunctionalConsequenceId":"SO_0001583","variantId":"3_128485795_C_A","variantRsId":"rs1576748419","cohortPhenotypes":["COMBINED IMMUNODEFICIENCY WITH SUSCEPTIBILITY TO MYCOBACTERIAL, VIRAL, AND FUNGAL INFECTIONS","Deafness-lymphedema-leukemia syndrome","Dendritic cell, monocyte, B lymphocyte, and natural killer lymphocyte deficiency","Emberger syndrome","GATA2 DEFICIENCY","IMMUNODEFICIENCY 21","Lymphedema, primary, with myelodysplasia","MONOCYTOPENIA AND MYCOBACTERIAL INFECTION SYNDROME","MONOCYTOPENIA WITH SUSCEPTIBILITY TO MYCOBACTERIAL, FUNGAL, AND PAPILLOMAVIRUS INFECTIONS AND MYELODYSPLASIA","Monocytopenia with susceptibility to infections"],"diseaseFromSource":"Monocytopenia with susceptibility to infections","diseaseFromSourceId":"C3280030","diseaseFromSourceMappedId":"MONDO_0013607","variantHgvsId":"NC_000003.12:g.128485795C>A"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV001500249","releaseDate":"2021-06-08","targetFromSourceId":"ENSG00000159082","variantFunctionalConsequenceId":"SO_0001819","variantId":"21_32694306_C_T","variantRsId":"rs780954414","cohortPhenotypes":["Developmental and epileptic encephalopathy, 53","Early-onset Parkinson disease 20","Epileptic encephalopathy, early infantile, 53"],"diseaseFromSource":"Developmental and epileptic encephalopathy, 53","diseaseFromSourceId":"C4479313","diseaseFromSourceMappedId":"Orphanet_1934","variantHgvsId":"NC_000021.9:g.32694306C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV002392200","releaseDate":"2022-11-29","targetFromSourceId":"ENSG00000184058","variantFunctionalConsequenceId":"SO_0001819","variantId":"22_19766420_C_T","variantRsId":null,"cohortPhenotypes":["Cardiovascular phenotype"],"diseaseFromSource":"Cardiovascular phenotype","diseaseFromSourceId":"CN230736","diseaseFromSourceMappedId":"HP_0001626","variantHgvsId":"NC_000022.11:g.19766420C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV003774575","releaseDate":"2024-02-28","targetFromSourceId":"ENSG00000128591","variantFunctionalConsequenceId":"SO_0001583","variantId":"7_128841305_G_A","variantRsId":null,"cohortPhenotypes":["Cardiomyopathy, familial hypertrophic, 26","Dilated Cardiomyopathy, Dominant","Distal myopathy with posterior leg and anterior hand involvement","FILAMINOPATHY, AUTOSOMAL DOMINANT","Filaminopathy (type)","Hypertrophic cardiomyopathy 26","Myofibrillar myopathy 5","Myofibrillar myopathy, filamin C-related","Myopathy, distal, 4","WILLIAMS DISTAL MYOPATHY"],"diseaseFromSource":"Hypertrophic cardiomyopathy 26","diseaseFromSourceId":"C4310749","diseaseFromSourceMappedId":"EFO_0000538","variantHgvsId":"NC_000007.14:g.128841305G>A"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV003779225","releaseDate":"2024-02-28","targetFromSourceId":"ENSG00000151929","variantFunctionalConsequenceId":"SO_0001821","variantId":"10_119670132_G_GGCA","variantRsId":null,"cohortPhenotypes":["Dilated cardiomyopathy 1HH","Myofibrillar myopathy 6","Myofibrillar myopathy, BAG3-related"],"diseaseFromSource":"Dilated cardiomyopathy 1HH","diseaseFromSourceId":"C3151293","diseaseFromSourceMappedId":"MONDO_0013479","variantHgvsId":"NC_000010.11:g.119670135_119670137dup"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV001490387","releaseDate":"2021-06-08","targetFromSourceId":"ENSG00000134853","variantFunctionalConsequenceId":"SO_0001627","variantId":"4_54287551_T_C","variantRsId":"rs2110341918","cohortPhenotypes":["Gastrointestinal Stromal Sarcoma","Gastrointestinal stroma tumor","Gastrointestinal stromal tumor","Gastrointestinal stromal tumor, somatic"],"diseaseFromSource":"Gastrointestinal stromal tumor","diseaseFromSourceId":"C0238198","diseaseFromSourceMappedId":"MONDO_0011719","variantHgvsId":"NC_000004.12:g.54287551T>C"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV003041098","releaseDate":"2023-02-07","targetFromSourceId":"ENSG00000007314","variantFunctionalConsequenceId":"SO_0001583","variantId":"17_63948745_A_T","variantRsId":null,"cohortPhenotypes":["Adynamia episodica hereditaria with or without myotonia","Familial hyperkalemic periodic paralysis","Gamstorp disease","Gamstorp episodic adynamy","Hyperkalemic periodic paralysis"],"diseaseFromSource":"Hyperkalemic periodic paralysis","diseaseFromSourceId":"C0238357","diseaseFromSourceMappedId":"MONDO_0008224","variantHgvsId":"NC_000017.11:g.63948745A>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV003966094","releaseDate":"2024-03-16","targetFromSourceId":"ENSG00000164818","variantFunctionalConsequenceId":"SO_0001819","variantId":"7_756970_C_G","variantRsId":"rs571248637","cohortPhenotypes":["DNAAF5-related condition","DNAAF5-related disorder"],"diseaseFromSource":"DNAAF5-related disorder","diseaseFromSourceId":null,"diseaseFromSourceMappedId":null,"variantHgvsId":"NC_000007.14:g.756970C>G"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV003921540","releaseDate":"2024-03-16","targetFromSourceId":"ENSG00000134982","variantFunctionalConsequenceId":"SO_0001623","variantId":"5_112738418_A_G","variantRsId":null,"cohortPhenotypes":["APC-related condition","APC-related disorder"],"diseaseFromSource":"APC-related disorder","diseaseFromSourceId":null,"diseaseFromSourceMappedId":null,"variantHgvsId":"NC_000005.10:g.112738418A>G"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV001340631","releaseDate":"2021-03-22","targetFromSourceId":"ENSG00000139618","variantFunctionalConsequenceId":"SO_0001583","variantId":"13_32340936_T_C","variantRsId":"rs876660049","cohortPhenotypes":["Breast and ovarian cancer","Hereditary breast and ovarian cancer","Hereditary breast and ovarian cancer syndrome","Hereditary breast and ovarian cancer syndrome (HBOC)","Hereditary breast ovarian cancer syndrome"],"diseaseFromSource":"Hereditary breast ovarian cancer syndrome","diseaseFromSourceId":"C0677776","diseaseFromSourceMappedId":"MONDO_0003582","variantHgvsId":"NC_000013.11:g.32340936T>C"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["conflicting interpretations of pathogenicity"],"confidence":"no assertion criteria provided","studyId":"RCV000056173","releaseDate":"2013-10-01","targetFromSourceId":"ENSG00000196218","variantFunctionalConsequenceId":"SO_0001583","variantId":"19_38585013_C_T","variantRsId":"rs118192153","cohortPhenotypes":["Central core disease","Central core disease of muscle","Central core myopathy","Muscle core disease","Muscular central core disease","Myopathy, central fibrillar","Shy-Magee syndrome"],"diseaseFromSource":"Central core myopathy","diseaseFromSourceId":"C0751951","diseaseFromSourceMappedId":"EFO_1000855","variantHgvsId":"NC_000019.10:g.38585013C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV002249476","releaseDate":"2022-05-28","targetFromSourceId":"ENSG00000228253","variantFunctionalConsequenceId":"SO_0001631","variantId":"MT_7462_C_T","variantRsId":"rs1569484151","cohortPhenotypes":["COX deficiency","Complex 4 mitochondrial respiratory chain deficiency","Complex IV deficiency","Deficiency of mitochondrial respiratory chain complex4","Mitochondrial complex IV deficiency","Mitochondrial complex IV deficiency, nuclear type 1"],"diseaseFromSource":"Mitochondrial complex IV deficiency, nuclear type 1","diseaseFromSourceId":"C5435656","diseaseFromSourceMappedId":"MONDO_0859160","variantHgvsId":"NC_012920.1:m.7462C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV003093847","releaseDate":"2023-02-07","targetFromSourceId":"ENSG00000130294","variantFunctionalConsequenceId":"SO_0001630","variantId":"2_240767028_G_C","variantRsId":"rs748724769","cohortPhenotypes":["Hereditary sensory and autonomic neuropathy type IIC","Hereditary spastic paraplegia 30","Intellectual disability, autosomal dominant 9","Mental retardation, autosomal dominant 9","NESCAV SYNDROME","Neuropathy, hereditary sensory, type 2C","SPASTIC PARAPLEGIA 30, AUTOSOMAL DOMINANT","Spastic paraplegia 30, autosomal recessive"],"diseaseFromSource":"Neuropathy, hereditary sensory, type 2C","diseaseFromSourceId":"C3280168","diseaseFromSourceMappedId":"Orphanet_970","variantHgvsId":"NC_000002.12:g.240767028G>C"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV002016427","releaseDate":"2022-03-28","targetFromSourceId":"ENSG00000159884","variantFunctionalConsequenceId":"SO_0001631","variantId":"9_35657840_C_G","variantRsId":"rs1287720442","cohortPhenotypes":["Anauxetic dysplasia","SPONDYLOMETAEPIPHYSEAL DYSPLASIA, ANAUXETIC TYPE"],"diseaseFromSource":"Anauxetic dysplasia","diseaseFromSourceId":"C1846796","diseaseFromSourceMappedId":"MONDO_0011773","variantHgvsId":"NC_000009.12:g.35657840C>G"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV002625025","releaseDate":"2023-02-07","targetFromSourceId":"ENSG00000054983","variantFunctionalConsequenceId":"SO_0001583","variantId":"14_87976417_C_A","variantRsId":null,"cohortPhenotypes":["Galactocerebrosidase deficiency","Galactosylceramide beta-galactosidase deficiency","Globoid cell leukoencephalopathy","Krabbe leukodystrophy","Leukodystrophy, Globoid Cell"],"diseaseFromSource":"Galactosylceramide beta-galactosidase deficiency","diseaseFromSourceId":"C0023521","diseaseFromSourceMappedId":"MONDO_0009499","variantHgvsId":"NC_000014.9:g.87976417C>A"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV003516889","releaseDate":"2024-02-14","targetFromSourceId":"ENSG00000046604","variantFunctionalConsequenceId":"SO_0001583","variantId":"18_31520935_C_A","variantRsId":null,"cohortPhenotypes":["ARRHYTHMOGENIC RIGHT VENTRICULAR DYSPLASIA, FAMILIAL, 10","Arrhythmogenic Right Ventricular Dysplasia/Cardiomyopathy10","Arrhythmogenic right ventricular cardiomyopathy, type 10","Arrhythmogenic right ventricular dysplasia 10","Arrhythmogenic right ventricular dysplasia/cardiomyopathy, type 10"],"diseaseFromSource":"Arrhythmogenic right ventricular dysplasia 10","diseaseFromSourceId":"C1857777","diseaseFromSourceMappedId":"Orphanet_247","variantHgvsId":"NC_000018.10:g.31520935C>A"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV003583433","releaseDate":"2024-02-14","targetFromSourceId":"ENSG00000163932","variantFunctionalConsequenceId":"SO_0001583","variantId":"3_53186260_A_G","variantRsId":null,"cohortPhenotypes":["Autoimmune lymphoproliferative syndrome, type III","Autoimmune lymphoproliferative syndrome, type III caused by mutation in PRKCD"],"diseaseFromSource":"Autoimmune lymphoproliferative syndrome, type III caused by mutation in PRKCD","diseaseFromSourceId":"C3809928","diseaseFromSourceMappedId":"Orphanet_3261","variantHgvsId":"NC_000003.12:g.53186260A>G"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV001419044","releaseDate":"2021-05-16","targetFromSourceId":"ENSG00000171759","variantFunctionalConsequenceId":"SO_0001819","variantId":"12_102912824_A_C","variantRsId":"rs773425620","cohortPhenotypes":["Folling disease","Oligophrenia phenylpyruvica","Phenylketonuria","Phenylketonurias"],"diseaseFromSource":"Phenylketonuria","diseaseFromSourceId":"C0031485","diseaseFromSourceMappedId":"MONDO_0009861","variantHgvsId":"NC_000012.12:g.102912824A>C"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV001861075","releaseDate":"2022-03-28","targetFromSourceId":"ENSG00000138622","variantFunctionalConsequenceId":"SO_0001583","variantId":"15_73324179_T_G","variantRsId":"rs1161776375","cohortPhenotypes":["Brugada syndrome 8"],"diseaseFromSource":"Brugada syndrome 8","diseaseFromSourceId":"C2751083","diseaseFromSourceMappedId":"MONDO_0013148","variantHgvsId":"NC_000015.10:g.73324179T>G"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV002138447","releaseDate":"2022-04-08","targetFromSourceId":"ENSG00000162065","variantFunctionalConsequenceId":"SO_0001627","variantId":"16_2499947_G_A","variantRsId":"rs1173044946","cohortPhenotypes":["Autosomal dominant nonsyndromic hearing loss 65","Caused by mutation in the TBC1 domain family, member 24","Deafness, autosomal dominant 65","Developmental and epileptic encephalopathy, 1","Epileptic encephalopathy, early infantile, 1","INFANTILE SPASM SYNDROME, X-LINKED 1","OHTAHARA SYNDROME, X-LINKED","Tonic spasms with clustering, arrest of psychomotor development and hypsarrhythmia on EEG","West's syndrome","X-Linked Infantile Spasm Syndrome","X-linked infantile spasms"],"diseaseFromSource":"Developmental and epileptic encephalopathy, 1","diseaseFromSourceId":"C3463992","diseaseFromSourceMappedId":"Orphanet_364063","variantHgvsId":"NC_000016.10:g.2499947G>A"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV002933690","releaseDate":"2023-02-07","targetFromSourceId":"ENSG00000165280","variantFunctionalConsequenceId":"SO_0001819","variantId":"9_35063060_A_C","variantRsId":null,"cohortPhenotypes":["Amyotrophic lateral sclerosis 14, with or without frontotemporal dementia","Frontotemporal dementia and/or amyotrophic lateral sclerosis 6","Inclusion body myopathy with Paget disease of bone and frontotemporal dementia","Inclusion body myopathy with early-onset Paget disease and frontotemporal dementia","VCP-Related Amyotrophic Lateral Sclerosis","VCP-Related Amyotrophic Lateral Sclerosis/Frontotemporal Dementia"],"diseaseFromSource":"Inclusion body myopathy with Paget disease of bone and frontotemporal dementia","diseaseFromSourceId":"C1833662","diseaseFromSourceMappedId":"Orphanet_52430","variantHgvsId":"NC_000009.12:g.35063060A>C"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV003804888","releaseDate":"2024-02-28","targetFromSourceId":"ENSG00000178209","variantFunctionalConsequenceId":"SO_0001819","variantId":"8_143920449_G_A","variantRsId":null,"cohortPhenotypes":["Autosomal recessive limb-girdle muscular dystrophy type 2Q","EPIDERMOLYSIS BULLOSA SIMPLEX 5A, OGNA TYPE","EPIDERMOLYSIS BULLOSA SIMPLEX AND LIMB-GIRDLE MUSCULAR DYSTROPHY","Epidermolysa bullosa simplex and limb girdle muscular dystrophy","Epidermolysis bullosa simplex 5B, with muscular dystrophy","Epidermolysis bullosa simplex 5C, with pyloric atresia","Epidermolysis bullosa simplex with muscular dystrophy","Epidermolysis bullosa simplex with nail dystrophy","Epidermolysis bullosa simplex with pyloric atresia","Epidermolysis bullosa simplex, Ogna type","Limb-girdle muscular dystrophy, type 2Q","MUSCULAR DYSTROPHY, LIMB-GIRDLE, AUTOSOMAL RECESSIVE 17","PLEC1-Related Epidermolysis Bullosa with Pyloric Atresia","Pidermolysis bullosa simplex 5A, Ogna type"],"diseaseFromSource":"Epidermolysis bullosa simplex 5B, with muscular dystrophy","diseaseFromSourceId":"C2931072","diseaseFromSourceMappedId":"Orphanet_257","variantHgvsId":"NC_000008.11:g.143920449G>A"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV003805362","releaseDate":"2024-02-28","targetFromSourceId":"ENSG00000101310","variantFunctionalConsequenceId":"SO_0002169","variantId":"20_18515639_C_T","variantRsId":null,"cohortPhenotypes":["CDA 2","Congenital dyserythropoietic anemia, type II","Cowden syndrome 7","Dyserythropoietic anemia, congenital type 2","HEMPAS anemia","Hereditary Erythroblastic Multinuclearity with Positive Acidified-Serum test'"],"diseaseFromSource":"Cowden syndrome 7","diseaseFromSourceId":"C4225179","diseaseFromSourceMappedId":"MONDO_0014802","variantHgvsId":"NC_000020.11:g.18515639C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely pathogenic"],"confidence":"criteria provided, single submitter","studyId":"RCV003806863","releaseDate":"2024-02-28","targetFromSourceId":"ENSG00000134569","variantFunctionalConsequenceId":"SO_0001574","variantId":"11_46883977_C_T","variantRsId":null,"cohortPhenotypes":["Cenani syndactylism","Cenani-Lenz syndactyly syndrome","Congenital myasthenic syndrome 17","SYNDACTYLY, TYPE VII","Sclerosteosis 2","Syndactyly Cenani Lenz type","Syndactyly type 7"],"diseaseFromSource":"Sclerosteosis 2","diseaseFromSourceId":"C3280402","diseaseFromSourceMappedId":"MONDO_0013679","variantHgvsId":"NC_000011.10:g.46883977C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV003850101","releaseDate":"2024-02-28","targetFromSourceId":"ENSG00000054983","variantFunctionalConsequenceId":"SO_0002169","variantId":"14_87988221_T_C","variantRsId":null,"cohortPhenotypes":["Galactocerebrosidase deficiency","Galactosylceramide beta-galactosidase deficiency","Globoid cell leukoencephalopathy","Krabbe leukodystrophy","Leukodystrophy, Globoid Cell"],"diseaseFromSource":"Galactosylceramide beta-galactosidase deficiency","diseaseFromSourceId":"C0023521","diseaseFromSourceMappedId":"MONDO_0009499","variantHgvsId":"NC_000014.9:g.87988221T>C"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV001944434","releaseDate":"2022-03-28","targetFromSourceId":"ENSG00000178209","variantFunctionalConsequenceId":"SO_0001583","variantId":"8_143917391_C_T","variantRsId":"rs541897170","cohortPhenotypes":["Autosomal recessive limb-girdle muscular dystrophy type 2Q","EPIDERMOLYSIS BULLOSA SIMPLEX 5A, OGNA TYPE","EPIDERMOLYSIS BULLOSA SIMPLEX AND LIMB-GIRDLE MUSCULAR DYSTROPHY","Epidermolysa bullosa simplex and limb girdle muscular dystrophy","Epidermolysis bullosa simplex 5B, with muscular dystrophy","Epidermolysis bullosa simplex 5C, with pyloric atresia","Epidermolysis bullosa simplex with muscular dystrophy","Epidermolysis bullosa simplex with nail dystrophy","Epidermolysis bullosa simplex with pyloric atresia","Epidermolysis bullosa simplex, Ogna type","Limb-girdle muscular dystrophy, type 2Q","MUSCULAR DYSTROPHY, LIMB-GIRDLE, AUTOSOMAL RECESSIVE 17","PLEC1-Related Epidermolysis Bullosa with Pyloric Atresia","Pidermolysis bullosa simplex 5A, Ogna type"],"diseaseFromSource":"Autosomal recessive limb-girdle muscular dystrophy type 2Q","diseaseFromSourceId":"C3150989","diseaseFromSourceMappedId":"Orphanet_254361","variantHgvsId":"NC_000008.11:g.143917391C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV000547369","releaseDate":"2017-12-26","targetFromSourceId":"ENSG00000008056","variantFunctionalConsequenceId":"SO_0001819","variantId":"X_47574166_C_T","variantRsId":"rs967215240","cohortPhenotypes":["Epilepsy, X-linked 1, with variable learning disabilities and behavior disorders","Epilepsy, X-linked, with variable learning disabilities and behavior disorders","X-linked epilepsy-learning disabilities-behavior disorders syndrome"],"diseaseFromSource":"Epilepsy, X-linked 1, with variable learning disabilities and behavior disorders","diseaseFromSourceId":"C5774177","diseaseFromSourceMappedId":"MONDO_0010339","variantHgvsId":"NC_000023.11:g.47574166C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["uncertain significance"],"confidence":"criteria provided, single submitter","studyId":"RCV002912729","releaseDate":"2023-02-07","targetFromSourceId":"ENSG00000173040","variantFunctionalConsequenceId":"SO_0001583","variantId":"4_5640686_T_C","variantRsId":null,"cohortPhenotypes":["Acrofacial dysostosis of Weyers","Chondroectodermal dysplasia","Curry-Hall syndrome","Ellis-van Creveld syndrome","Mesoectodermal dysplasia","WEYERS ACRODENTAL DYSOSTOSIS"],"diseaseFromSource":"Ellis-van Creveld syndrome","diseaseFromSourceId":"C0013903","diseaseFromSourceMappedId":"Orphanet_289","variantHgvsId":"NC_000004.12:g.5640686T>C"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely benign"],"confidence":"criteria provided, single submitter","studyId":"RCV002933106","releaseDate":"2023-02-07","targetFromSourceId":"ENSG00000187535","variantFunctionalConsequenceId":"SO_0001819","variantId":"16_1523551_C_T","variantRsId":null,"cohortPhenotypes":["Conorenal syndrome","Renal dysplasia, retinal pigmentary dystrophy, cerebellar ataxia and skeletal dysplasia","SHORT-RIB THORACIC DYSPLASIA 9 WITH OR WITHOUT POLYDACTYLY","SHORT-RIB THORACIC DYSPLASIA 9 WITHOUT POLYDACTYLY","Saldino-Mainzer syndrome"],"diseaseFromSource":"Saldino-Mainzer syndrome","diseaseFromSourceId":"C1849437","diseaseFromSourceMappedId":"Orphanet_140969","variantHgvsId":"NC_000016.10:g.1523551C>T"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["pathogenic"],"confidence":"criteria provided, single submitter","studyId":"RCV003398441","releaseDate":"2023-11-20","targetFromSourceId":"ENSG00000123191","variantFunctionalConsequenceId":"SO_0001583","variantId":"13_51958333_C_A","variantRsId":"rs28942074","cohortPhenotypes":["ATP7B-related condition","ATP7B-related disorder"],"diseaseFromSource":"ATP7B-related disorder","diseaseFromSourceId":null,"diseaseFromSourceMappedId":null,"variantHgvsId":"NC_000013.11:g.51958333C>A"} +{"alleleOrigins":["germline"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["likely pathogenic","pathogenic"],"confidence":"criteria provided, multiple submitters, no conflicts","studyId":"RCV000116005","releaseDate":"2014-05-17","targetFromSourceId":"ENSG00000183765","variantFunctionalConsequenceId":"SO_0001587","variantId":"22_28687974_G_A","variantRsId":"rs200432447","cohortPhenotypes":["Cancer predisposition","Hereditary Cancer Syndrome","Hereditary cancer-predisposing syndrome","Hereditary neoplastic syndrome","Neoplastic Syndromes, Hereditary","Tumor predisposition"],"diseaseFromSource":"Hereditary cancer-predisposing syndrome","diseaseFromSourceId":"C0027672","diseaseFromSourceMappedId":"MONDO_0015356","variantHgvsId":"NC_000022.11:g.28687974G>A"} +{"alleleOrigins":["de novo"],"datasourceId":"eva","datatypeId":"genetic_association","clinicalSignificances":["pathogenic"],"confidence":"criteria provided, single submitter","studyId":"RCV000855501","releaseDate":"2019-11-08","targetFromSourceId":"ENSG00000152217","variantFunctionalConsequenceId":"SO_0001583","variantId":"18_44951952_T_C","variantRsId":"rs267607038","cohortPhenotypes":["Arthrogryposis multiplex congenita","Congenital arthromyodysplasia","Congenital multiple arthrogryposis","Fetal akinesia deformation sequence 1","Fetal akinesia sequence","Fibrous ankylosis of multiple joints","Guerin-Stern syndrome","Guérin-Stern syndrome","Lethal Pena-Shokeir 1 syndrome","Myodystrophia fetalis deformans","Otto syndrome","Pena Shokeir syndrome, type 1","Pena-Shokeir syndrome type I","Rocher-Sheldon syndrome","Rossi syndrome"],"diseaseFromSource":"Fetal akinesia deformation sequence 1","diseaseFromSourceId":"C1276035","diseaseFromSourceMappedId":"Orphanet_994","variantHgvsId":"NC_000018.10:g.44951952T>C"} diff --git a/tests/gentropy/data_samples/variant_sources/pharmacogenomics-test.jsonl b/tests/gentropy/data_samples/variant_sources/pharmacogenomics-test.jsonl new file mode 100644 index 000000000..b5cbb5dca --- /dev/null +++ b/tests/gentropy/data_samples/variant_sources/pharmacogenomics-test.jsonl @@ -0,0 +1,46 @@ +{"genotypeId":"7_87531302_A_A,C","variantId":"7_87531302_A_C","genotypeAnnotationText":"Patients with the AC genotype may have an increased risk of biopsy-proven acute rejection at 12 month post-transplant when treated with cyclosporine and mycophenolate mofetil as compared to patients with the CC genotype. Other genetic and clinical factors may also influence a patient's response to mycophenolate mofetil.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"cyclosporine"}],"evidenceLevel":"3","genotype":"AC","literature":["18444945"],"pgxCategory":"efficacy","studyId":"982040619","targetFromSourceId":"ENSG00000085563","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs2032582","phenotypeText":"increased risk of biopsy-proven acute rejection at 12 month post-transplant"} +{"genotypeId":"3_41237949_A_G,G","variantId":"3_41237949_A_G","genotypeAnnotationText":"Patients with the GG genotype and multiple myeloma may have a decreased response to cyclophosphamide, dexamethasone, and thalidomide as compared to patients with the AA genotypes. However, they may also be at decreased risk for neutropenia when treated with lenalidomide. Other genetic and clinical factors may also influence a patient's response to cyclophosphamide, dexamethasone, and thalidomide, and risk of neutropenia when treated with lenalidomide.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"cyclophosphamide"}],"evidenceLevel":"3","genotype":"GG","literature":["26521987","26521987"],"pgxCategory":"toxicity","studyId":"1447953328","targetFromSourceId":"ENSG00000168036","variantFunctionalConsequenceId":"SO_0001627","variantRsId":"rs4135385","phenotypeText":null} +{"genotypeId":"18_63312127_C_T,T","variantId":"18_63312127_C_T","genotypeAnnotationText":"Patients with the TT genotype and ovarian cancer may have an increased risk of neurotoxicity when treated with carboplatin in combination with either docetaxel or paclitaxel, as compared to patients with the CC or CT genotype. Other genetic and clinical factors may also influence risk of neurotoxicity.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"docetaxel"}],"evidenceLevel":"3","genotype":"TT","literature":["23963862"],"pgxCategory":"toxicity","studyId":"1183631554","targetFromSourceId":"ENSG00000171791","variantFunctionalConsequenceId":"SO_0001627","variantRsId":"rs2849380","phenotypeText":"increased risk of neurotoxicity"} +{"genotypeId":"20_34927985_A_T,T","variantId":"20_34927985_A_T","genotypeAnnotationText":"No patients with the TT genotype were available for analysis, but patients with the AT genotype and non-small-cell lung cancer may have shorter overall survival times when treated with platinum agents in combination with gemcitabine or taxanes, as compared to patients with the AA genotype. Other genetic and clinical factors may also influence overall survival times in non-small-cell lung cancer patients.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"carboplatin"}],"evidenceLevel":"3","genotype":"TT","literature":["21636554"],"pgxCategory":"efficacy","studyId":"1447960379","targetFromSourceId":"ENSG00000100983","variantFunctionalConsequenceId":"SO_0001632","variantRsId":"rs17309872","phenotypeText":"shorter overall survival times"} +{"genotypeId":"9_84948455_T_C,T","variantId":"9_84948455_T_C","genotypeAnnotationText":"Patients with the CT genotype and heroin addiction may require a higher dose of methadone when undergoing methadone maintenance treatment as compared to patients with the TT genotype. Other genetic and clinical factors may also influence methadone dose required for effective treatment.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"methadone"}],"evidenceLevel":"3","genotype":"CT","literature":["23651024"],"pgxCategory":"dosage","studyId":"982032396","targetFromSourceId":"ENSG00000148053","variantFunctionalConsequenceId":"SO_0001630","variantRsId":"rs2289658","phenotypeText":"require a higher dose of methadone"} +{"genotypeId":"21_36135203_G_A,A","variantId":"21_36135203_G_A","genotypeAnnotationText":"Patients with the AA genotype and breast cancer who are treated with doxorubicin: 1) may have decreased metabolism of doxorubicin 2) may have greater tumor reduction 3) may have increased severity of neutropenia as compared to patients with the GG genotype. Other genetic and clinical factors may also influence a patient's response to doxorubicin treatment and risk of toxicity.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"doxorubicin"}],"evidenceLevel":"3","genotype":"AA","literature":["18551042","18551042","18551042","18551042"],"pgxCategory":"toxicity","studyId":"652882846","targetFromSourceId":"ENSG00000159231","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs8133052","phenotypeText":"greater tumor reduction"} +{"genotypeId":"14_47546779_A_A,C","variantId":"14_47546779_A_C","genotypeAnnotationText":"Patients with the AC genotype and major depression who are treated with fluvoxamine, milnacipran or paroxetine may have an increased risk of sexual dysfunction as compared to patients with the CC genotype or may have a decreased, but not absent, risk of sexual dysfunction as compared to patients with the AA genotype. Other genetic and clinical factors may also affect patients' response to fluvoxamine, milnacipran or paroxetine.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"paroxetine"}],"evidenceLevel":"3","genotype":"AC","literature":["22445761"],"pgxCategory":"toxicity","studyId":"1444700688","targetFromSourceId":"ENSG00000139915","variantFunctionalConsequenceId":"SO_0001627","variantRsId":"rs1160351","phenotypeText":"increased risk of sexual dysfunction"} +{"genotypeId":"15_78590583_G_A,A","variantId":"15_78590583_G_A","genotypeAnnotationText":"Patients with the AA genotype who are in chronic pain and receive opioid medications for treatment may be at increased risk for addiction as compared to patients with the GG genotype. Other genetic and clinical factors may also influence risk of opiate addiction.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"Opium alkaloids and derivatives"}],"evidenceLevel":"3","genotype":"AA","literature":["20725741"],"pgxCategory":"toxicity","studyId":"1448101149","targetFromSourceId":"ENSG00000169684","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs16969968","phenotypeText":"increased risk for addiction"} +{"genotypeId":"11_113475530_G_G,GG","variantId":"11_113475530_G_GG","genotypeAnnotationText":"Patients with the G/del genotype and Schizophrenia who are treated with antipsychotics 1) may have decreased response 2) may have increased time until response, compared to patients with the GG genotype. Please note that there is contradictory evidence from studies that report no association with these alleles and response to antipsychotics. Other genetic and clinical factors may also influence a patient's response to antipsychotics.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"antipsychotics"}],"evidenceLevel":"3","genotype":"G/del","literature":["18926547","16513877","15830237","9858029","9858029","9918131","17105675","15694263","11505224","20194480","28673279"],"pgxCategory":"efficacy","studyId":"655388510","targetFromSourceId":"ENSG00000149295","variantFunctionalConsequenceId":"SO_0001631","variantRsId":"rs1799732","phenotypeText":"increased time until response"} +{"genotypeId":"15_78601997_G_A,A","variantId":"15_78601997_G_A","genotypeAnnotationText":"Patients with the AA genotype may be less likely to remain abstinent from smoking when treated with placebo as compared to patients with the GG genotype. Other genetic and clinical factors may also affect a patient's success at smoking cessation.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"Drugs used in nicotine dependence"}],"evidenceLevel":"3","genotype":"AA","literature":["23249876"],"pgxCategory":"other","studyId":"1450929137","targetFromSourceId":"ENSG00000080644","variantFunctionalConsequenceId":"SO_0001819","variantRsId":"rs1051730","phenotypeText":"less likely to remain abstinent from smoking"} +{"genotypeId":"1_46405089_C_A,C","variantId":"1_46405089_C_A","genotypeAnnotationText":"Patients with the AC genotype and Psychotic Disorders who are treated with aripiprazole, clozapine, haloperidol, olanzapine, quetiapine or risperidone may have an increased likelihood of weight gain of more than 7% of baseline body weight as compared to patients with the CC genotype. However, this is contradicted in one study with risperidone. Other genetic and clinical factors may also influence a patient's risk for treatment-induced weight gain.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"haloperidol"}],"evidenceLevel":"3","genotype":"AC","literature":["20631561","23799528"],"pgxCategory":"toxicity","studyId":"1043880750","targetFromSourceId":"ENSG00000117480","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs324420","phenotypeText":"increased likelihood of weight gain of more than 7% of baseline body weight"} +{"genotypeId":"4_88174909_G_A,G","variantId":"4_88174909_G_A","genotypeAnnotationText":"Patients with the AG genotype may have increased tumor response rate and increased risk of grade 3-4 nonhematological toxicity when treated with fluorouracil, irinotecan and leucovorin as compared to patients with the GG genotype or may have decreased tumor response rate and decreased risk of grade 3-4 nonhematological toxicity when treated with fluorouracil, irinotecan and leucovorin as compared to patients with the AA genotype. Other genetic and clinical factors may also influence a patient's response to fluorouracil, irinotecan and leucovorin.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"leucovorin"}],"evidenceLevel":"3","genotype":"AG","literature":["24018773","24018773"],"pgxCategory":"efficacy","studyId":"1444700860","targetFromSourceId":"ENSG00000118777","variantFunctionalConsequenceId":"SO_0001627","variantRsId":"rs7699188","phenotypeText":"increased tumor response rate"} +{"genotypeId":"X_154536002_C_T,T","variantId":"X_154536002_C_T","genotypeAnnotationText":"Patients with the TT genotype with Malaria who are treated with artesunate, chlorproguanil and dapsone may have an increased risk of hemolysis and severe/unsafe hemoglobin decreases as compared to patients with the CC genotype. Other genetic and clinical factors may also influence a patient's response to artesunate, chlorproguanil and dapsone.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"artesunate"}],"evidenceLevel":"4","genotype":"TT","literature":["19112496","19690618","19690618"],"pgxCategory":"toxicity","studyId":"981352141","targetFromSourceId":"ENSG00000160211","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs1050828","phenotypeText":"increased risk of hemolysis and severe/unsafe hemoglobin decreases"} +{"genotypeId":"17_18328782_G_A,A","variantId":"17_18328782_G_A","genotypeAnnotationText":"Pediatric patients with the AA genotype and with Precursor Cell Lymphoblastic Leukemia-Lymphoma who are treated with methotrexate may have increased catalytic activity of TYMS as compared to pediatric patients with the AG and GG genotype. Patients with the AA genotype and with Precursor Cell Lymphoblastic Leukemia-Lymphoma who are treated with methotrexate may have increased likelihood of Toxic liver disease as compared to patients with the AG genotype. However, this association is contradicted in other studies. Other genetic and clinical factors may also influence a patient's response to methotrexate.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"methotrexate"}],"evidenceLevel":"3","genotype":"AA","literature":["22838948","18368069","15797993"],"pgxCategory":"toxicity","studyId":"981238370","targetFromSourceId":"ENSG00000176974","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs1979277","phenotypeText":"increased catalytic activity of TYMS"} +{"genotypeId":"4_88131171_G_T,T","variantId":"4_88131171_G_T","genotypeAnnotationText":"Patients with the TT genotype and HIV infection who are treated with efavirenz may have an increased risk of abnormal dreams as compared to patients with the GG genotype. Other genetic and clinical factors may also influence a patient's risk of efavirenz-induced side effects.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"efavirenz"}],"evidenceLevel":"3","genotype":"TT","literature":["23859571"],"pgxCategory":"toxicity","studyId":"1183614716","targetFromSourceId":"ENSG00000118777","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs2231142","phenotypeText":"increased risk of abnormal dreams"} +{"genotypeId":"19_45351661_T_G,G","variantId":"19_45351661_T_G","genotypeAnnotationText":"Patients with the GG genotype and Colorectal Neoplasms who are treated with fluorouracil and leucovorin or fluorouracil, leucovorin and oxaliplatin may have 1) an increased risk of Drug Toxicity 2) an increased risk of early relapse and 3) decreased progression free survival as compared to patients with the TT genotype. Other genetic and clinical factors may also influence a patient's response to fluorouracil, leucovorin and oxaliplatin.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"fluorouracil"}],"evidenceLevel":"3","genotype":"GG","literature":["20385995","18267032","20078613"],"pgxCategory":"efficacy","studyId":"981237959","targetFromSourceId":"ENSG00000104884","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs13181","phenotypeText":"increased risk of Drug Toxicity"} +{"genotypeId":"6_35639794_T_C,C","variantId":"6_35639794_T_C","genotypeAnnotationText":"Patients with the CC genotype may 1) have decreased response to antidepressants 2) have decreased, but not absent, risk for suicide ideation with paroxetine, venlafaxine, clomipramine, lithium, liothyronine or nefazodone as compared to patients with the CT or TT genotype. However, contradictory findings regarding an association of the opposite allele or no association with response have been reported. Other genetic and clinical factors may also influence a patient's response to antidepressants.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"antidepressants"}],"evidenceLevel":"3","genotype":"CC","literature":["20709156","17467808","23733030","23733030","18597649","21449676","21449676","19676097","15565110"],"pgxCategory":"efficacy","studyId":"655384568","targetFromSourceId":"ENSG00000096060","variantFunctionalConsequenceId":"SO_0001627","variantRsId":"rs1360780","phenotypeText":"decreased response to antidepressants"} +{"genotypeId":"5_112865397_A_A,G","variantId":"5_112865397_A_G","genotypeAnnotationText":"Patients with the AG genotype and major depressive disorder may be less likely to respond when treated with citalopram, fluoxetine, paroxetine or sertraline as compared to patients with the GG genotype, or more likely to respond when treated with citalopram, fluoxetine, paroxetine or sertraline as compared to patients with the AA genotype. Other genetic and clinical factors may also influence response to citalopram, fluoxetine, paroxetine or sertraline.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"citalopram"}],"evidenceLevel":"3","genotype":"AG","literature":["22795047"],"pgxCategory":"efficacy","studyId":"1183590959","targetFromSourceId":"ENSG00000153037","variantFunctionalConsequenceId":"SO_0001627","variantRsId":"rs495794","phenotypeText":"less likely to respond"} +{"genotypeId":"22_19963748_G_A,G","variantId":"22_19963748_G_A","genotypeAnnotationText":"Patients with the AG genotype and major Depressive Disorder may have an increased response to fluvoxamine treatment as compared to patients with the GG genotype. Other genetic and clinical factors may also influence a patient's response to fluvoxamine.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"fluvoxamine"}],"evidenceLevel":"3","genotype":"AG","literature":["20619611"],"pgxCategory":"efficacy","studyId":"1043880039","targetFromSourceId":"ENSG00000093010","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs4680","phenotypeText":"increased response"} +{"genotypeId":"10_94761900_C_T,T","variantId":"10_94761900_C_T","genotypeAnnotationText":"Patients with the TT genotype and breast cancer may have a decreased risk for leukopenia when treated with cyclophosphamide, doxorubicin and fluorouracil (FAC) as compared to patients with CC genotype. Other genetic and clinical factors may also influence risk for leukopenia in patients taking FAC chemotherapy.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"cyclophosphamide"}],"evidenceLevel":"3","genotype":"TT","literature":["29507678"],"pgxCategory":"toxicity","studyId":"1449718265","targetFromSourceId":"ENSG00000165841","variantFunctionalConsequenceId":"SO_0001631","variantRsId":"rs12248560","phenotypeText":"decreased risk for leukopenia"} +{"genotypeId":"3_114171968_C_T,T","variantId":"3_114171968_C_T","genotypeAnnotationText":"Patients with the TT genotype and Schizophrenia who are treated with clozapine may have a better response to treatment as compared to patients with the CC or CT genotype. Please note; this association was not found in a meta-analysis. Other genetic and clinical factors may also influence a patient's response to clozapine treatment.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"clozapine"}],"evidenceLevel":"3","genotype":"TT","literature":["21332319","20029384"],"pgxCategory":"efficacy","studyId":"981203578","targetFromSourceId":"ENSG00000151577","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs6280","phenotypeText":"better response to treatment"} +{"genotypeId":"19_45409478_C_A,G","variantId":"19_45409478_C_A","genotypeAnnotationText":"Patients with Mesothelioma and the AC genotype may have worse overall and progression-free survival when treated with cisplatin and gemcitabine as compared to patients with the CC genotype. Other clinical and genetic factors may also influence response to cisplatin and gemcitabine in patients with mesothelioma.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"cisplatin"}],"evidenceLevel":"3","genotype":"AG","literature":["28422153"],"pgxCategory":"efficacy","studyId":"1449004745","targetFromSourceId":"ENSG00000012061","variantFunctionalConsequenceId":"SO_0001624","variantRsId":"rs3212986","phenotypeText":"worse overall and progression-free survival"} +{"genotypeId":"2_233671807_A_A,AT","variantId":"2_233671807_A_AT","genotypeAnnotationText":"Patients with the rs3832043 T/del genotype and non-small cell lung cancer may have decreased glucuronidation of SN-38 as compared to patients with the TT genotype, or increased glucuronidation of SN-38 as compared to patients with the del/del genotype. SN-38 is the active metabolite of irinotecan, and is glucuronidated by UGT1A9 into an inactive form (SN-38G). This annotation only covers the pharmacokinetic relationship between rs3832043 and SN-38 and does not include evidence about clinical outcomes. Other genetic and clinical factors may also influence SN-38 metabolism.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"SN-38"}],"evidenceLevel":"3","genotype":"T/del","literature":["24897286","18221820","16636344"],"pgxCategory":"metabolism/pk","studyId":"1183615199","targetFromSourceId":"ENSG00000242515","variantFunctionalConsequenceId":"SO_0001627","variantRsId":"rs3832043","phenotypeText":"decreased glucuronidation"} +{"genotypeId":"18_673016_C_T,T","variantId":"18_673016_C_T","genotypeAnnotationText":"Patients with the TT genotype and cancer who are treated with Capecitabine may have an increased risk of of nausea and vomiting as compared to patients with the CC or CT genotypes and a decreased likelihood of asthenia as compared to the CC genotype. Other clinical and genetic factors may also influence nausea and vomiting in patients with cancer who are treated with Capecitabine.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"capecitabine"}],"evidenceLevel":"3","genotype":"TT","literature":["28347776","28347776"],"pgxCategory":"toxicity","studyId":"1448616922","targetFromSourceId":"ENSG00000132199","variantFunctionalConsequenceId":"SO_0001624","variantRsId":"rs699517","phenotypeText":"decreased likelihood of asthenia"} +{"genotypeId":"17_5379957_A_G,G","variantId":"17_5379957_A_G","genotypeAnnotationText":"Patients with bipolar, depressive, psychotic, or schizoaffective disorders and the GG genotype who are administered amisulpride, aripiprazole, clozapine, olanzapine, quetiapine, paliperidone, risperidone, lithium, valproate or/and mirtazapine may have smaller elevations of fasting glucose concentrations as compared to patients with the AA genotype. Other clinical and genetic factors may also influence fasting glucose concentrations in patients administered these medications.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"valproic acid"}],"evidenceLevel":"3","genotype":"GG","literature":["28694205"],"pgxCategory":"other","studyId":"1449005382","targetFromSourceId":"ENSG00000108559","variantFunctionalConsequenceId":"SO_0001627","variantRsId":"rs1000940","phenotypeText":"smaller elevations of fasting glucose concentrations"} +{"genotypeId":"20_54684529_G_A,G","variantId":"20_54684529_G_A","genotypeAnnotationText":"Pediatric patients with acute lymphoblastic leukemia (ALL) and the AG genotype may have an increased risk of developing osteonecrosis when treated with cyclophosphamide, cytarabine, daunorubicin, dexamethasone, doxorubicin, methotrexate, pegaspargase, prednisone, thioguanine and vincristine as compared to pediatric ALL patients with the GG genotypes and a decreased risk of osteonecrosis as compared to pediatric patients with the AA genotype. Other clinical and genetic factors may also influence the risk of developing osteonecrosis in pediatric ALL patients.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"pegaspargase"}],"evidenceLevel":"3","genotype":"AG","literature":["26590194"],"pgxCategory":"toxicity","studyId":"1448099093","targetFromSourceId":null,"variantFunctionalConsequenceId":null,"variantRsId":"rs117532069","phenotypeText":"increased risk of developing osteonecrosis"} +{"genotypeId":"8_18390208_T_A,A","variantId":"8_18390208_T_A","genotypeAnnotationText":"Patients with the AA genotype and tuberculosis (TB) may have an increased risk for anti-TB drug-induced hepatitis as compared to patients with the TT genotype. Cells with the A allele have been shown to result in decreased transcription of the NAT2 gene as compared to those with the T allele. Other genetic and clinical factors may also influence risk of hepatitis in patients taking anti-TB drugs.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"rifampin"}],"evidenceLevel":"3","genotype":"AA","literature":["19891553","19891553","19891553"],"pgxCategory":"metabolism/pk","studyId":"1447964123","targetFromSourceId":"ENSG00000156006","variantFunctionalConsequenceId":"SO_0001631","variantRsId":"rs4646244","phenotypeText":"increased risk for anti-TB drug-induced hepatitis"} +{"genotypeId":"21_45537880_T_C,T","variantId":"21_45537880_T_C","genotypeAnnotationText":"Patients with the rs1051266 CT genotype and rheumatoid arthritis may have decreased response when treated with methotrexate as compared to patients with the TT genotype. However, conflicting evidence has been reported. Other genetic and clinical factors may also influence methotrexate response.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"methotrexate"}],"evidenceLevel":"2A","genotype":"CT","literature":["19827168","15677700","22450926","15457444","18322994","17325736","17325736","31099054","26616421","27992285"],"pgxCategory":"efficacy","studyId":"1451245360","targetFromSourceId":"ENSG00000173638","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs1051266","phenotypeText":"decreased response"} +{"genotypeId":"9_9687487_C_T,T","variantId":"9_9687487_C_T","genotypeAnnotationText":"Patients with the TT genotype may have higher risk for resistant hypertension in whites and Hispanics patients treated with verapamil and trandolapril as compared to patients with genotype CC. Other genetic and clinical factors may also influence the response to verapamil.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"trandolapril"}],"evidenceLevel":"3","genotype":"TT","literature":["26425837"],"pgxCategory":"efficacy","studyId":"1446902927","targetFromSourceId":"ENSG00000153707","variantFunctionalConsequenceId":"SO_0001627","variantRsId":"rs4742610","phenotypeText":"higher risk for resistant hypertension"} +{"genotypeId":"12_21178615_T_C,C","variantId":"12_21178615_T_C","genotypeAnnotationText":"Patients with precursor cell lymphoblastic leukemia-lymphoma and the rs4149056 CC genotype may have a decreased response to methotrexate as compared to patients with the CT and TT genotypes. However, conflicting evidence has been reported. Other clinical and genetic factors may also influence response to methotrexate.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"methotrexate"}],"evidenceLevel":"3","genotype":"CC","literature":["24386571","28525903"],"pgxCategory":"efficacy","studyId":"1449004841","targetFromSourceId":"ENSG00000134538","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs4149056","phenotypeText":"decreased response to methotrexate"} +{"genotypeId":"X_114584047_C_T","variantId":"X_114584047_C_T","genotypeAnnotationText":"Patients with one X-chromosome, the T genotype and psychiatric disorders who are treated with olanzapine may have a decreased risk of weight gain as compared to patients with the C genotype. This gene is on the X chromosome and males have only one allele. However, some studies find no association with weight gain. Other genetic and clinical factors may also influence a patient's risk for weight gain with antipsychotic treatment.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"olanzapine"}],"evidenceLevel":"3","genotype":"T","literature":["17702092","22967772","21121776","19636338","15666332","19434072","25152019","18718676","17016522","20504252","19193342"],"pgxCategory":"toxicity","studyId":"1451256440","targetFromSourceId":"ENSG00000147246","variantFunctionalConsequenceId":"SO_0001631","variantRsId":"rs3813929","phenotypeText":"decreased risk of weight gain"} +{"genotypeId":"5_148826877_G_A,G","variantId":"5_148826877_G_A","genotypeAnnotationText":"Patients with the AG genotype and hypertension may have a greater decrease in diastolic blood pressure when treated with benazepril as compared to patients with the AA genotype. No significant results have been seen for systolic blood pressure. Additionally, the same study reported no significant differences in systolic or diastolic blood pressure between genotypes in a different cohort. Other genetic and clinical factors may also influence change in diastolic or systolic blood pressure.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"benazepril"}],"evidenceLevel":"4","genotype":"AG","literature":["15554460","15554460","15554460"],"pgxCategory":"efficacy","studyId":"1183614850","targetFromSourceId":"ENSG00000169252","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs1042713","phenotypeText":"greater decrease in diastolic blood pressure"} +{"genotypeId":"16_28606193_C_C,T","variantId":"16_28606193_C_T","genotypeAnnotationText":"Patients with the CT genotype may have a decreased response to acetaminophen (paracetamol) as compared to patients with the CC genotype. Other genetic and clinical factors may also affect a patient's response to acetaminophen.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"acetaminophen"}],"evidenceLevel":"3","genotype":"CT","literature":["30908574"],"pgxCategory":"efficacy","studyId":"1450374089","targetFromSourceId":"ENSG00000196502","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs1042028","phenotypeText":"decreased response to acetaminophen (paracetamol)"} +{"genotypeId":"13_46834899_G_A,A","variantId":"13_46834899_G_A","genotypeAnnotationText":"Patients with the AA genotype and major depressive disorder who are treated with antidepressants and other treatments may have a reduced response and reduced likelihood of remission as compared to patients with the AG or GG genotype. Other genetic and clinical factors may also influence a patient's response to treatment for major depressive disorder and likelihood of remission.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"antidepressants"}],"evidenceLevel":"3","genotype":"AA","literature":["11311507","18253134","18253134","20075642"],"pgxCategory":"efficacy","studyId":"1183618924","targetFromSourceId":"ENSG00000102468","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs6314","phenotypeText":"reduced response and reduced likelihood of remission"} +{"genotypeId":"16_55810697_G_G,T","variantId":"16_55810697_G_T","genotypeAnnotationText":"Patients with the GT genotype and Atrial Fibrillation who are treated with dabigatran may have 1) a decreased adjusted trough concentrations of dabigatran, 2) a decreased, but not absent, risk for bleeding when treated with dabigatran as compared to patients with the TT genotype. Other genetic and clinical factors may also influence a patient's risk for bleeding.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"dabigatran"}],"evidenceLevel":"3","genotype":"GT","literature":["23467860","23467860","27261537"],"pgxCategory":"toxicity","studyId":"1183490952","targetFromSourceId":"ENSG00000198848","variantFunctionalConsequenceId":"SO_0001627","variantRsId":"rs2244613","phenotypeText":"decreased, but not absent, risk for bleeding"} +{"genotypeId":"22_19963748_G_A,A","variantId":"22_19963748_G_A","genotypeAnnotationText":"Patients with the AA genotype with substance withdrawal syndrome may have a decreased likelihood of headache when discontinuing the use of analgesics (such as opioids, NSAIDs, triptans, ergot) as compared to patients with the AG and GG genotypes. Other clinical and genetic factors may also influence likelihood of headache in patients with withdrawal syndrome who discontinue the use of analgesics.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"sumatriptan"}],"evidenceLevel":"3","genotype":"AA","literature":["25096645"],"pgxCategory":"other","studyId":"1184987573","targetFromSourceId":"ENSG00000093010","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs4680","phenotypeText":"decreased likelihood of headache"} +{"genotypeId":"10_99804058_G_A,A","variantId":"10_99804058_G_A","genotypeAnnotationText":"Patients with the AA genotype and gastrointestinal stromal tumors may have increased progression-free survival times when treated with imatinib as compared to patients with the GG genotype. Other genetic and clinical factors may also influence progression-free survival tumes in patients receiving imatinib.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"imatinib"}],"evidenceLevel":"3","genotype":"AA","literature":["30237583"],"pgxCategory":"efficacy","studyId":"1450373604","targetFromSourceId":"ENSG00000023839","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs2273697","phenotypeText":"increased progression-free survival times"} +{"genotypeId":"6_154039662_A_G,G","variantId":"6_154039662_A_G","genotypeAnnotationText":"Patients with the rs1799971 GG genotype may have increased alfentanil dose requirements as compared to patients with the AA genotype. This drug-variant pair has been assigned a “no recommendation” by CPIC, as it was determined to be not clinically actionable. Other genetic or clinical factors may also affect a alfentanil dose requirements.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"alfentanil"}],"evidenceLevel":"3","genotype":"GG","literature":["19605407"],"pgxCategory":"dosage","studyId":"1450826899","targetFromSourceId":"ENSG00000112038","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs1799971","phenotypeText":"increased alfentanil dose requirements"} +{"genotypeId":"14_103699416_G_A,A","variantId":"14_103699416_G_A","genotypeAnnotationText":"Patients with the AA genotype and non-small cell lung cancer may have an improved response when treated with platinum compounds as compared to patients with the GG genotype, although this is contradicted in one study. Other clinical or genetic factors may also influence a patient's response to platinum compounds in people with non-small cell lung cancer.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"Platinum compounds"}],"evidenceLevel":"3","genotype":"AA","literature":["23940523","23940523","27248474"],"pgxCategory":"efficacy","studyId":"1043872993","targetFromSourceId":"ENSG00000126215","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs861539","phenotypeText":"improved response"} +{"genotypeId":"11_67585218_A_A,G","variantId":"11_67585218_A_G","genotypeAnnotationText":"Patients with the AG genotype and Ovarian Neoplasms who are treated with cisplatin and cyclophosphamide may have a decreased likelihood of progression free survival as compared to patients with the AA genotype. However, this association was contradicted in other studies. Other genetic and clinical factors may also influence a patient's response to cisplatin and cyclophosphamide treatment.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"cyclophosphamide"}],"evidenceLevel":"3","genotype":"AG","literature":["22188361","19786980"],"pgxCategory":"efficacy","studyId":"981237950","targetFromSourceId":"ENSG00000084207","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs1695","phenotypeText":"decreased likelihood of progression free survival"} +{"genotypeId":"7_87509329_A_A,G","variantId":"7_87509329_A_G","genotypeAnnotationText":"Patients with the AG genotype and schizophrenia who responded to treatment with antipsychotics may require a decreased dose of antipsychotics as compared to patients with the GG genotype, or an increased dose as compared to patients with the AA genotype. Other genetic and clinical factors may also influence dose of antipsychotics.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"antipsychotics"}],"evidenceLevel":"3","genotype":"AG","literature":["22909202"],"pgxCategory":"dosage","studyId":"1447983818","targetFromSourceId":"ENSG00000085563","variantFunctionalConsequenceId":"SO_0001819","variantRsId":"rs1045642","phenotypeText":"require a decreased dose of antipsychotics"} +{"genotypeId":"12_21178615_T_C,C","variantId":"12_21178615_T_C","genotypeAnnotationText":"Patients with the rs4149056 CC genotype may have increased concentrations of pitavastatin when treated with pitavastatin as compared to patients with TT genotype. Other genetic and clinical factors may also influence the metabolism of pitavastatin. This annotation only covers the pharmacokinetic relationship between rs4149056 and pitavastatin and does not include evidence about clinical outcomes.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"pitavastatin"}],"evidenceLevel":"1A","genotype":"CC","literature":["23556337","17460607"],"pgxCategory":"metabolism/pk","studyId":"1451678210","targetFromSourceId":"ENSG00000134538","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs4149056","phenotypeText":"increased concentrations of pitavastatin"} +{"genotypeId":"8_18400860_G_A,A","variantId":"8_18400860_G_A","genotypeAnnotationText":"Patients with the AA genotype may have decreased but not absent risk of toxicity with docetaxel and thalidomide as compared to patients with the AG or GG genotypes. Other genetic and clinical factors may also influence treatment response.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"thalidomide"}],"evidenceLevel":"3","genotype":"AA","literature":["20038957"],"pgxCategory":"toxicity","studyId":"655386176","targetFromSourceId":"ENSG00000156006","variantFunctionalConsequenceId":"SO_0001583","variantRsId":"rs1799931","phenotypeText":"decreased risk of toxicity"} +{"genotypeId":"13_46895805_G_A,A","variantId":"13_46895805_G_A","genotypeAnnotationText":"Patients with the rs6313 AA genotype and major depressive disorder may be more likely to develop sexual dysfunction and less likely to develop heart palpitations and when treated with citalopram as compared to patients with the AG or GG genotype. The current evidence base suggests that there is no association between the genotype and gastrointestinal toxicity. Other genetic and clinical factors may also influence likelihood of developing sexual dysfunction when treated with citalopram.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"citalopram"}],"evidenceLevel":"3","genotype":"AA","literature":["31792367","23158458","23158458","30221791"],"pgxCategory":"toxicity","studyId":"1451407329","targetFromSourceId":"ENSG00000102468","variantFunctionalConsequenceId":"SO_0001819","variantRsId":"rs6313","phenotypeText":"less likely to develop heart palpitations"} +{"genotypeId":"2_233772999_G_C,C","variantId":"2_233772999_G_C","genotypeAnnotationText":"Patients with the CC genotype and HIV may have a decreased risk of nephrolithiasis when treated with atazanavir and ritonavir as compared to patients with the CG and GG genotypes. Other genetic and clinical factors may also affect risk of nephrolithiasis in patients with HIV who are taking atazanavir and ritonavir.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"ritonavir"}],"evidenceLevel":"3","genotype":"CC","literature":["25151207"],"pgxCategory":"toxicity","studyId":"1185000369","targetFromSourceId":"ENSG00000244474","variantFunctionalConsequenceId":"SO_0001624","variantRsId":"rs8330","phenotypeText":"decreased risk of nephrolithiasis"} +{"genotypeId":"1_161215085_A_A,G","variantId":"1_161215085_A_G","genotypeAnnotationText":"Patients with the AG genotype and asthma may have a decreased risk for aspirin sensitivity but patients with chronic urticaria may have an increased risk for aspirin sensitivity as compared to patients with the AA genotype. Other genetic and clinical factors may also influence a patient's risk for aspirin sensitivity.","datasourceId":"pharmgkb","datasourceVersion":"2024-07-05","datatypeId":"clinical_annotation","drugs":[{"drugFromSource":"aspirin"}],"evidenceLevel":"3","genotype":"AG","literature":["18534082","18595682"],"pgxCategory":"toxicity","studyId":"1043858680","targetFromSourceId":"ENSG00000158869","variantFunctionalConsequenceId":"SO_0001631","variantRsId":"rs11587213","phenotypeText":"decreased risk for aspirin sensitivity"} diff --git a/tests/gentropy/data_samples/variant_sources/uniprot-test-sort.jsonl b/tests/gentropy/data_samples/variant_sources/uniprot-test-sort.jsonl new file mode 100644 index 000000000..c408b4fe4 --- /dev/null +++ b/tests/gentropy/data_samples/variant_sources/uniprot-test-sort.jsonl @@ -0,0 +1,9 @@ +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Pontocerebellar hypoplasia, hypotonia, and respiratory insufficiency syndrome, neonatal lethal","diseaseFromSourceId":"OMIM:618810","diseaseFromSourceMappedId":"MONDO_0032931","literature":"['31727539']","targetFromSourceId":"Q9NVI7","targetModulation":"up_or_down","variantRsId":"rs1570345942","variantFunctionalConsequenceId":"SO_0001583","variantId":"1_1525242_T_G"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Charcot-Marie-Tooth disease, demyelinating, 1B","diseaseFromSourceId":"OMIM:118200","diseaseFromSourceMappedId":"MONDO_0007307","literature":"['8797476', '7688964', '10737979', '11437164', '12221176', '14711881']","targetFromSourceId":"P25189","targetModulation":"up_or_down","variantRsId":"rs121913589","variantFunctionalConsequenceId":"SO_0001583","variantId":"1_161306863_C_T"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Achromatopsia 2","diseaseFromSourceId":"OMIM:216900","diseaseFromSourceMappedId":null,"literature":"['9662398', '11536077', '18521937']","targetFromSourceId":"Q16281","targetModulation":"up_or_down","variantRsId":"rs104893614","variantFunctionalConsequenceId":"SO_0001583","variantId":"2_98396018_G_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Brugada syndrome 1","diseaseFromSourceId":"OMIM:601144","diseaseFromSourceMappedId":"MONDO_0011001","literature":"['20129283']","targetFromSourceId":"Q14524","targetModulation":"up_or_down","variantRsId":"rs199473172","variantFunctionalConsequenceId":"SO_0001583","variantId":"3_38585800_C_T"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Ataxia telangiectasia","diseaseFromSourceId":"OMIM:208900","diseaseFromSourceMappedId":"Orphanet_100","literature":"['10817650', '10873394', '9288106', '9443866']","targetFromSourceId":"Q13315","targetModulation":"up_or_down","variantRsId":"rs587779872","variantFunctionalConsequenceId":"SO_0001583","variantId":"11_108345818_C_G"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Autoimmune polyendocrine syndrome 1, with or without reversible metaphyseal dysplasia","diseaseFromSourceId":"OMIM:240300","diseaseFromSourceMappedId":"Orphanet_3453","literature":"['11524733', '11836330', '15712268', '14974083', '11524731']","targetFromSourceId":"O43918","targetModulation":"up_or_down","variantRsId":"rs179363880","variantFunctionalConsequenceId":"SO_0001583","variantId":"21_44286656_T_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Lowe oculocerebrorenal syndrome","diseaseFromSourceId":"OMIM:309000","diseaseFromSourceMappedId":"MONDO_0010645","literature":"['10923037']","targetFromSourceId":"Q01968","targetModulation":"up_or_down","variantRsId":"rs137853854","variantFunctionalConsequenceId":"SO_0001583","variantId":"X_129562612_G_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"46,XY sex reversal 1","diseaseFromSourceId":"OMIM:400044","diseaseFromSourceMappedId":"MONDO_0020712","literature":"['2247149', '1570829']","targetFromSourceId":"Q05066","targetModulation":"up_or_down","variantRsId":"rs104894957","variantFunctionalConsequenceId":"SO_0001583","variantId":"Y_2787426_C_G"} +{"confidence":"medium","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Colorectal cancer","diseaseFromSourceId":"OMIM:114500","diseaseFromSourceMappedId":"MONDO_0005575","literature":"['16407113', '19218458']","targetFromSourceId":"P00395","targetModulation":"up_or_down","variantRsId":"rs281865417","variantFunctionalConsequenceId":"SO_0001583","variantId":"MT_6277_G_A"} diff --git a/tests/gentropy/data_samples/variant_sources/uniprot-test.jsonl b/tests/gentropy/data_samples/variant_sources/uniprot-test.jsonl new file mode 100644 index 000000000..f3a637b1b --- /dev/null +++ b/tests/gentropy/data_samples/variant_sources/uniprot-test.jsonl @@ -0,0 +1,50 @@ +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Brugada syndrome 1","diseaseFromSourceId":"OMIM:601144","diseaseFromSourceMappedId":"MONDO_0011001","literature":"['20129283']","targetFromSourceId":"Q14524","targetModulation":"up_or_down","variantRsId":"rs199473172","variantFunctionalConsequenceId":"SO_0001583","variantId":"3_38585800_C_T"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Lowe oculocerebrorenal syndrome","diseaseFromSourceId":"OMIM:309000","diseaseFromSourceMappedId":"MONDO_0010645","literature":"['10923037']","targetFromSourceId":"Q01968","targetModulation":"up_or_down","variantRsId":"rs137853854","variantFunctionalConsequenceId":"SO_0001583","variantId":"X_129562612_G_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Pontocerebellar hypoplasia, hypotonia, and respiratory insufficiency syndrome, neonatal lethal","diseaseFromSourceId":"OMIM:618810","diseaseFromSourceMappedId":"MONDO_0032931","literature":"['31727539']","targetFromSourceId":"Q9NVI7","targetModulation":"up_or_down","variantRsId":"rs1570345942","variantFunctionalConsequenceId":"SO_0001583","variantId":"1_1525242_T_G"} +{"confidence":"medium","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Colorectal cancer","diseaseFromSourceId":"OMIM:114500","diseaseFromSourceMappedId":"MONDO_0005575","literature":"['16407113', '19218458']","targetFromSourceId":"P00395","targetModulation":"up_or_down","variantRsId":"rs281865417","variantFunctionalConsequenceId":"SO_0001583","variantId":"MT_6277_G_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Ataxia telangiectasia","diseaseFromSourceId":"OMIM:208900","diseaseFromSourceMappedId":"Orphanet_100","literature":"['10817650', '10873394', '9288106', '9443866']","targetFromSourceId":"Q13315","targetModulation":"up_or_down","variantRsId":"rs587779872","variantFunctionalConsequenceId":"SO_0001583","variantId":"11_108345818_C_G"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Long QT syndrome 3","diseaseFromSourceId":"OMIM:603830","diseaseFromSourceMappedId":"MONDO_0011377","literature":"['10911008']","targetFromSourceId":"Q14524","targetModulation":"up_or_down","variantRsId":"rs137854605","variantFunctionalConsequenceId":"SO_0001583","variantId":"3_38581337_GA_AT"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Alzheimer disease 3","diseaseFromSourceId":"OMIM:607822","diseaseFromSourceMappedId":"MONDO_0011913","literature":"['11524469', '9384602', '12552037']","targetFromSourceId":"P49768","targetModulation":"up_or_down","variantRsId":"rs63750450","variantFunctionalConsequenceId":"SO_0001583","variantId":"14_73173571_A_G"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Telangiectasia, hereditary hemorrhagic, 2","diseaseFromSourceId":"OMIM:600376","diseaseFromSourceMappedId":null,"literature":"['8640225']","targetFromSourceId":"P37023","targetModulation":"up_or_down","variantRsId":"rs28936399","variantFunctionalConsequenceId":"SO_0001583","variantId":"12_51916114_T_G"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Cystathionine beta-synthase deficiency","diseaseFromSourceId":"OMIM:236200","diseaseFromSourceMappedId":"Orphanet_394","literature":"['16205833']","targetFromSourceId":"P35520","targetModulation":"up_or_down","variantRsId":"rs141502207","variantFunctionalConsequenceId":"SO_0001583","variantId":"21_43063045_C_G"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Autoimmune polyendocrine syndrome 1, with or without reversible metaphyseal dysplasia","diseaseFromSourceId":"OMIM:240300","diseaseFromSourceMappedId":"Orphanet_3453","literature":"['11524733', '11836330', '15712268', '14974083', '11524731']","targetFromSourceId":"O43918","targetModulation":"up_or_down","variantRsId":"rs179363880","variantFunctionalConsequenceId":"SO_0001583","variantId":"21_44286656_T_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Retinitis pigmentosa 1","diseaseFromSourceId":"OMIM:180100","diseaseFromSourceMappedId":null,"literature":"['15933747']","targetFromSourceId":"P56715","targetModulation":"up_or_down","variantRsId":"rs200135800","variantFunctionalConsequenceId":"SO_0001583","variantId":"8_54626833_A_G"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Meier-Gorlin syndrome 4","diseaseFromSourceId":"OMIM:613804","diseaseFromSourceMappedId":"Orphanet_2554","literature":"['21358632']","targetFromSourceId":"Q9H211","targetModulation":"up_or_down","variantRsId":"rs200672589","variantFunctionalConsequenceId":"SO_0001583","variantId":"16_88807362_C_T"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Breast cancer","diseaseFromSourceId":"OMIM:114480","diseaseFromSourceMappedId":"EFO_0003869","literature":"['15026808', '15635067']","targetFromSourceId":"P51587","targetModulation":"up_or_down","variantRsId":"rs28897754","variantFunctionalConsequenceId":"SO_0001583","variantId":"13_32379412_G_C"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Ceroid lipofuscinosis, neuronal, 6","diseaseFromSourceId":"OMIM:601780","diseaseFromSourceMappedId":"Orphanet_228363","literature":"['21990111']","targetFromSourceId":"Q9NWW5","targetModulation":"up_or_down","variantRsId":"rs150363441","variantFunctionalConsequenceId":"SO_0001583","variantId":"15_68208301_C_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"46,XY sex reversal 1","diseaseFromSourceId":"OMIM:400044","diseaseFromSourceMappedId":"MONDO_0020712","literature":"['2247149', '1570829']","targetFromSourceId":"Q05066","targetModulation":"up_or_down","variantRsId":"rs104894957","variantFunctionalConsequenceId":"SO_0001583","variantId":"Y_2787426_C_G"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Houge-Janssens syndrome 3","diseaseFromSourceId":"OMIM:618354","diseaseFromSourceMappedId":"MONDO_0032697","literature":"['30595372']","targetFromSourceId":"P67775","targetModulation":"up_or_down","variantRsId":"rs1580636665","variantFunctionalConsequenceId":"SO_0001583","variantId":"5_134200405_T_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Lymphatic malformation 9","diseaseFromSourceId":"OMIM:619319","diseaseFromSourceMappedId":null,"literature":"['31215153']","targetFromSourceId":"Q9NYQ6","targetModulation":"up_or_down","variantRsId":"rs369237672","variantFunctionalConsequenceId":"SO_0001583","variantId":"22_46397727_G_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Tyrosinemia 1","diseaseFromSourceId":"OMIM:276700","diseaseFromSourceMappedId":"Orphanet_882","literature":"['7757089']","targetFromSourceId":"P16930","targetModulation":"up_or_down","variantRsId":"rs121965077","variantFunctionalConsequenceId":"SO_0001583","variantId":"15_80181120_A_G"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Miyoshi muscular dystrophy 1","diseaseFromSourceId":"OMIM:254130","diseaseFromSourceMappedId":"MONDO_0024545","literature":"['16010686', '18853459']","targetFromSourceId":"O75923","targetModulation":"up_or_down","variantRsId":"rs1258728780","variantFunctionalConsequenceId":"SO_0001583","variantId":"2_71517029_G_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Adenine phosphoribosyltransferase deficiency","diseaseFromSourceId":"OMIM:614723","diseaseFromSourceMappedId":"MONDO_0013869","literature":"['1746557']","targetFromSourceId":"P07741","targetModulation":"up_or_down","variantRsId":"rs104894506","variantFunctionalConsequenceId":"SO_0001583","variantId":"16_88810550_T_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Developmental and epileptic encephalopathy 7","diseaseFromSourceId":"OMIM:613720","diseaseFromSourceMappedId":null,"literature":"['25818041']","targetFromSourceId":"O43526","targetModulation":"up_or_down","variantRsId":"rs796052665","variantFunctionalConsequenceId":"SO_0001059","variantId":"20_63413478_GC_TT"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Hirschsprung disease 1","diseaseFromSourceId":"OMIM:142623","diseaseFromSourceMappedId":null,"literature":"['8114939', '10618407']","targetFromSourceId":"P07949","targetModulation":"up_or_down","variantRsId":"rs76764689","variantFunctionalConsequenceId":"SO_0001583","variantId":"10_43100480_C_G"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Hirschsprung disease 1","diseaseFromSourceId":"OMIM:142623","diseaseFromSourceMappedId":null,"literature":"['22174939']","targetFromSourceId":"P07949","targetModulation":"up_or_down","variantRsId":"rs746970700","variantFunctionalConsequenceId":"SO_0001583","variantId":"10_43109201_G_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Macular dystrophy, vitelliform, 2","diseaseFromSourceId":"OMIM:153700","diseaseFromSourceMappedId":"MONDO_0007931","literature":"['12324875']","targetFromSourceId":"O76090","targetModulation":"up_or_down","variantRsId":"rs281865262","variantFunctionalConsequenceId":"SO_0001583","variantId":"11_61959534_G_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Hyperornithinemia-hyperammonemia-homocitrullinuria syndrome","diseaseFromSourceId":"OMIM:238970","diseaseFromSourceMappedId":"MONDO_0009393","literature":"['19242930']","targetFromSourceId":"Q9Y619","targetModulation":"up_or_down","variantRsId":"rs121908533","variantFunctionalConsequenceId":"SO_0001583","variantId":"13_40799111_T_G"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Bosma arhinia microphthalmia syndrome","diseaseFromSourceId":"OMIM:603457","diseaseFromSourceMappedId":"Orphanet_2250","literature":"['28067909']","targetFromSourceId":"A6NHR9","targetModulation":"up_or_down","variantRsId":"rs1135402741","variantFunctionalConsequenceId":"SO_0001583","variantId":"18_2688480_C_G"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Left ventricular non-compaction 1","diseaseFromSourceId":"OMIM:604169","diseaseFromSourceMappedId":null,"literature":"['11238270']","targetFromSourceId":"Q9Y4J8","targetModulation":"up_or_down","variantRsId":"rs104894654","variantFunctionalConsequenceId":"SO_0001583","variantId":"18_34794250_C_T"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Mismatch repair cancer syndrome 4","diseaseFromSourceId":"OMIM:619101","diseaseFromSourceMappedId":null,"literature":"['18602922', '24027009', '27435373']","targetFromSourceId":"P54278","targetModulation":"up_or_down","variantRsId":"rs587779342","variantFunctionalConsequenceId":"SO_0001583","variantId":"7_5999199_T_G"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Frontotemporal dementia and\/or amyotrophic lateral sclerosis 2","diseaseFromSourceId":"OMIM:615911","diseaseFromSourceMappedId":null,"literature":"['24934289']","targetFromSourceId":"Q8WYQ3","targetModulation":"up_or_down","variantRsId":"rs587777574","variantFunctionalConsequenceId":"SO_0001583","variantId":"22_23767459_G_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Congenital bilateral absence of the vas deferens","diseaseFromSourceId":"OMIM:277180","diseaseFromSourceMappedId":"MONDO_0010178","literature":"['17329263']","targetFromSourceId":"P13569","targetModulation":"up_or_down","variantRsId":"rs115545701","variantFunctionalConsequenceId":"SO_0001583","variantId":"7_117509089_C_T"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Deafness, autosomal dominant, 6","diseaseFromSourceId":"OMIM:600965","diseaseFromSourceMappedId":null,"literature":"['11709537']","targetFromSourceId":"O76024","targetModulation":"up_or_down","variantRsId":"rs104893883","variantFunctionalConsequenceId":"SO_0001583","variantId":"4_6302281_T_C"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Osteoporosis","diseaseFromSourceId":"OMIM:166710","diseaseFromSourceMappedId":"EFO_0003882","literature":"['23499309']","targetFromSourceId":"P04628","targetModulation":"up_or_down","variantRsId":"rs387907359","variantFunctionalConsequenceId":"SO_0001583","variantId":"12_48981230_C_T"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"MASA syndrome","diseaseFromSourceId":"OMIM:303350","diseaseFromSourceMappedId":"MONDO_0010559","literature":"['7920660', '8556302', '7920659', '22973895', '24155914']","targetFromSourceId":"P32004","targetModulation":"up_or_down","variantRsId":"rs28933683","variantFunctionalConsequenceId":"SO_0001583","variantId":"X_153870854_G_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Hypercholesterolemia, familial, 1","diseaseFromSourceId":"OMIM:143890","diseaseFromSourceMappedId":"MONDO_0007750","literature":"['24529145']","targetFromSourceId":"P01130","targetModulation":"up_or_down","variantRsId":"rs121908043","variantFunctionalConsequenceId":"SO_0001583","variantId":"19_11113307_C_T"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Developmental and epileptic encephalopathy 14","diseaseFromSourceId":"OMIM:614959","diseaseFromSourceMappedId":"MONDO_0013989","literature":"['26993267', '24029078']","targetFromSourceId":"Q5JUK3","targetModulation":"up_or_down","variantRsId":"rs587777264","variantFunctionalConsequenceId":"SO_0001583","variantId":"9_135759686_G_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Adrenal hyperplasia 3","diseaseFromSourceId":"OMIM:201910","diseaseFromSourceMappedId":"MONDO_0008728","literature":"['10364682']","targetFromSourceId":"P08686","targetModulation":"up_or_down","variantRsId":"rs72552751","variantFunctionalConsequenceId":"SO_0001583","variantId":"6_32039444_G_C"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"46,XY sex reversal 1","diseaseFromSourceId":"OMIM:400044","diseaseFromSourceMappedId":"MONDO_0020712","literature":"['12107262']","targetFromSourceId":"Q05066","targetModulation":"up_or_down","variantRsId":"rs104894973","variantFunctionalConsequenceId":"SO_0001583","variantId":"Y_2787224_T_C"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Galactosemia 1","diseaseFromSourceId":"OMIM:230400","diseaseFromSourceMappedId":"MONDO_0009258","literature":"['10408771']","targetFromSourceId":"P07902","targetModulation":"up_or_down","variantRsId":"rs111033741","variantFunctionalConsequenceId":"SO_0001583","variantId":"9_34648419_T_C"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Hypercholesterolemia, familial, 1","diseaseFromSourceId":"OMIM:143890","diseaseFromSourceMappedId":"EFO_0004911","literature":"['9104431']","targetFromSourceId":"P01130","targetModulation":"up_or_down","variantRsId":"rs121908033","variantFunctionalConsequenceId":"SO_0001583","variantId":"19_11105429_G_T"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Spastic paraplegia 79B, autosomal recessive","diseaseFromSourceId":"OMIM:615491","diseaseFromSourceMappedId":"Orphanet_352654","literature":"['28007905']","targetFromSourceId":"P09936","targetModulation":"up_or_down","variantRsId":"rs1057519600","variantFunctionalConsequenceId":"SO_0001583","variantId":"4_41268048_C_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Breast cancer","diseaseFromSourceId":"OMIM:114480","diseaseFromSourceMappedId":"EFO_0003869","literature":"['17924331', '21473589', '23867111', '14746861', '25472942']","targetFromSourceId":"P38398","targetModulation":"up_or_down","variantRsId":"rs55770810","variantFunctionalConsequenceId":"SO_0001583","variantId":"17_43063931_G_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Achromatopsia 2","diseaseFromSourceId":"OMIM:216900","diseaseFromSourceMappedId":null,"literature":"['9662398', '11536077', '18521937']","targetFromSourceId":"Q16281","targetModulation":"up_or_down","variantRsId":"rs104893614","variantFunctionalConsequenceId":"SO_0001583","variantId":"2_98396018_G_A"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Aicardi-Goutieres syndrome 5","diseaseFromSourceId":"OMIM:612952","diseaseFromSourceMappedId":"Orphanet_51","literature":"['24183309', '28229507', '19525956']","targetFromSourceId":"Q9Y3Z3","targetModulation":"up_or_down","variantRsId":"rs515726140","variantFunctionalConsequenceId":"SO_0001583","variantId":"20_36912462_T_C"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Rhizomelic chondrodysplasia punctata 1","diseaseFromSourceId":"OMIM:215100","diseaseFromSourceMappedId":"MONDO_0008972","literature":"['9090381']","targetFromSourceId":"O00628","targetModulation":"up_or_down","variantRsId":"rs121909151","variantFunctionalConsequenceId":"SO_0001583","variantId":"6_136869909_C_T"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Lysinuric protein intolerance","diseaseFromSourceId":"OMIM:222700","diseaseFromSourceMappedId":"MONDO_0009109","literature":"['17764084', '10631139', '15776427']","targetFromSourceId":"Q9UM01","targetModulation":"up_or_down","variantRsId":"rs386833799","variantFunctionalConsequenceId":"SO_0001583","variantId":"14_22774441_G_T"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Neurofibromatosis 1","diseaseFromSourceId":"OMIM:162200","diseaseFromSourceMappedId":"MONDO_0018975","literature":"['9003501', '15060124']","targetFromSourceId":"P21359","targetModulation":"up_or_down","variantRsId":"rs199474743","variantFunctionalConsequenceId":"SO_0001583","variantId":"17_31260403_A_T"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Charcot-Marie-Tooth disease, demyelinating, 1B","diseaseFromSourceId":"OMIM:118200","diseaseFromSourceMappedId":"MONDO_0007307","literature":"['8797476', '7688964', '10737979', '11437164', '12221176', '14711881']","targetFromSourceId":"P25189","targetModulation":"up_or_down","variantRsId":"rs121913589","variantFunctionalConsequenceId":"SO_0001583","variantId":"1_161306863_C_T"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Atrial septal defect 7, with or without atrioventricular conduction defects","diseaseFromSourceId":"OMIM:108900","diseaseFromSourceMappedId":"MONDO_0007173","literature":"['15810002', '14607454']","targetFromSourceId":"P52952","targetModulation":"up_or_down","variantRsId":"rs387906774","variantFunctionalConsequenceId":"SO_0001583","variantId":"5_173233164_G_C"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Mitochondrial infantile bilateral striatal necrosis","diseaseFromSourceId":"OMIM:500003","diseaseFromSourceMappedId":null,"literature":"['7668837', '9270604', '9501263']","targetFromSourceId":"P00846","targetModulation":"up_or_down","variantRsId":"rs199476135","variantFunctionalConsequenceId":"SO_0001583","variantId":"MT_9176_T_C"} +{"confidence":"high","datasourceId":"uniprot_variants","datatypeId":"genetic_association","diseaseFromSource":"Charcot-Marie-Tooth disease, axonal, 2K","diseaseFromSourceId":"OMIM:607831","diseaseFromSourceMappedId":"MONDO_0011916","literature":"['22206013']","targetFromSourceId":"Q8TB36","targetModulation":"up_or_down","variantRsId":"rs375431837","variantFunctionalConsequenceId":"SO_0001583","variantId":"8_74364135_G_A"} diff --git a/tests/gentropy/step/test_convert_to_vcf_step.py b/tests/gentropy/step/test_convert_to_vcf_step.py new file mode 100644 index 000000000..945f0fce6 --- /dev/null +++ b/tests/gentropy/step/test_convert_to_vcf_step.py @@ -0,0 +1,154 @@ +"""Test convert to vcf step.""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +import pandas as pd +import pytest + +from gentropy.common.session import Session +from gentropy.variant_index import ConvertToVcfStep + +if TYPE_CHECKING: + from typing import Any, Literal + + +@pytest.mark.step_test +class TestConvertToVcfStep: + """Test ConvertToVcfStep. + + Test if the step correctly read multiple variant sources and extracts + non duplicated variants and collects to sorted vcf partitions. + """ + + @pytest.mark.parametrize( + ["sources", "partition_size", "expected_partition_number"], + [ + pytest.param( + [ + { + "path": "tests/gentropy/data_samples/variant_sources/uniprot-test.jsonl", + "format": "json", + "n_variants": 50, # 2 variants per chromosome + }, + { + "path": "tests/gentropy/data_samples/variant_sources/eva-test.jsonl", + "format": "json", + "n_variants": 50, # 2 variants per chromosome + }, + { + "path": "tests/gentropy/data_samples/variant_sources/pharmacogenomics-test.jsonl", + "format": "json", + "n_variants": 44, # missing Y and MT, input contains two duplicated variants 22_19963748_G_A and 12_21178615_T_C and + }, + { + "path": "tests/gentropy/data_samples/variant_sources/credible-sets", + "format": "parquet", + "n_variants": 42, # after loci explosion + }, + ], + 10, + 19, # 186 variants / 10 size ~ 19 partitions + id="Multiple OT datasets", + ), + pytest.param( + [ + { + "path": "tests/gentropy/data_samples/variant_sources/credible-sets-extended", + "format": "parquet", + "n_variants": 1187, # after deduplication of locus object + } + ], + 2000, + 1, # 1199 variants / 2000 size ~ 1 partition + id="More variants than spark default partition size", + ), + ], + ) + def test_step( + self, + session: Session, + tmp_path: Path, + sources: list[dict[Literal["path", "format", "n_variants"], Any]], + partition_size: int, + expected_partition_number: int, + ) -> None: + """Test step. + + Expect that step outputs asserted number of partitions, where each partition + contains expected number of variants. + """ + source_paths = [s["path"] for s in sources] + source_formats = [s["format"] for s in sources] + output_path = str(tmp_path / "variants") + ConvertToVcfStep( + session, source_paths, source_formats, output_path, partition_size + ) + + variants_df = session.spark.read.csv(output_path, sep="\t", header=True) + # 40 variants (10 variants from each source) + expected_variant_count = sum(c["n_variants"] for c in sources) + assert ( + variants_df.count() == expected_variant_count + ), "Found incorrect number of variants" + partitions = [ + str(p) for p in Path(output_path).iterdir() if str(p).endswith("csv") + ] + assert ( + len(partitions) == expected_partition_number + ), "Found incorrect number of partitions" + + def test_sorting( + self, + session: Session, + tmp_path: Path, + ) -> None: + """Test sorting in partitions. + + Test if variants within single partition are sorted correctly. + The partition should be naturally sorted by #CHROM and POS fields. + """ + source_path = ( + "tests/gentropy/data_samples/variant_sources/uniprot-test-sort.jsonl" + ) + output_path = str(tmp_path / "variants") + ConvertToVcfStep(session, [source_path], ["json"], output_path, 10) + + partitions = [ + str(p) for p in Path(output_path).iterdir() if str(p).endswith("csv") + ] + assert len(partitions) == 1, "Must be one partition to test variant sorting" + df = pd.read_csv( + partitions[0], + usecols=[0, 1], # just read #CHROM and POS + sep="\t", + ) + assert df.equals( + # values comes from input file tests/gentropy/data_samples/variant_sources/uniprot-test-sorting.jsonl + # NOTE: Natural ordering in CHROM (str) and POS (int) + pd.DataFrame( + [ + ("1", 1525242), + ("1", 161306863), + ("11", 108345818), + ("2", 98396018), + ("21", 44286656), + ("3", 38585800), + ("MT", 6277), + ("X", 129562612), + ("Y", 2787426), + ], + columns=["#CHROM", "POS"], + ) + ), "Variant sorting does not match expectations." + + def test_raises_assertion_imbalanced_arg_ratios(self, session: Session) -> None: + """Test imbalanced argument ratio exception. + + Test if passing uneven number of sources to paths, not 1:1 ratio should result in assertion + """ + with pytest.raises(AssertionError) as e: + ConvertToVcfStep(session, ["dummy_path"], ["json", "json"], "output", 10) + assert e.value[0] == "Must provide format for each source path." From e0304fc49f6c5e25dfbf76ed02b8a04b1048022a Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Mon, 4 Nov 2024 15:14:33 +0000 Subject: [PATCH 150/188] feat: deconvolute studies upon ingestion of GWAS Catalog datasets (#887) * feat: adding logic to deconvolute studies with the same id * fix: import type * fix: import again * fix: flagging condition * feat: adding flag to gwas catalog studies to indicate no summary statistics * feat: adding flags for gwas catalog hop hit ingestion * feat: remove not curated flag if no sumstats available * fix: removing test for duplicated study locus * fix: changing flag name to clarify meaning * fix: flagging condition --- src/gentropy/dataset/study_index.py | 157 ++++++++++++++++++ src/gentropy/dataset/study_locus.py | 92 +++++----- .../datasource/gwas_catalog/study_index.py | 16 ++ src/gentropy/gwas_catalog_top_hits.py | 9 +- src/gentropy/study_locus_validation.py | 6 +- src/gentropy/study_validation.py | 4 +- tests/gentropy/dataset/test_study_locus.py | 37 +---- 7 files changed, 237 insertions(+), 84 deletions(-) diff --git a/src/gentropy/dataset/study_index.py b/src/gentropy/dataset/study_index.py index 92bdc61a4..da310f6f1 100644 --- a/src/gentropy/dataset/study_index.py +++ b/src/gentropy/dataset/study_index.py @@ -10,6 +10,8 @@ from typing import TYPE_CHECKING from pyspark.sql import functions as f +from pyspark.sql.types import ArrayType, StringType, StructType +from pyspark.sql.window import Window from gentropy.assets import data from gentropy.common.schemas import parse_spark_schema @@ -590,3 +592,158 @@ def annotate_sumstats_qc( _df=df, _schema=StudyIndex.get_schema(), ) + + def deconvolute_studies(self: StudyIndex) -> StudyIndex: + """Deconvolute the study index dataset. + + When ingesting the study index dataset, the same studyId might be ingested from more than one source. + In such cases, the data needs to be merged and the quality control flags need to be combined. + + Returns: + StudyIndex: Deconvoluted study index dataset. + """ + # Windowing by study ID assume random order, but this is OK, because we are not selecting rows by a specific order. + study_id_window = Window.partitionBy("studyId").orderBy(f.rand()) + + # For certain aggregation, the full window is needed to be considered: + full_study_id_window = study_id_window.orderBy("studyId").rangeBetween( + Window.unboundedPreceding, Window.unboundedFollowing + ) + + # Temporary columns to drop at the end: + columns_to_drop = ["keepTopHit", "mostGranular", "rank"] + + return StudyIndex( + _df=( + self.df + # Initialising quality controls column, if not present: + .withColumn( + "qualityControls", + f.when( + f.col("qualityControls").isNull(), + f.array().cast(ArrayType(StringType())), + ).otherwise(f.col("qualityControls")), + ) + # Keeping top hit studies unless the same study is available from a summmary statistics source: + # This value will be set for all rows for the same `studyId`: + .withColumn( + "keepTopHit", + f.when( + f.array_contains( + f.collect_set(f.col("hasSumstats")).over( + full_study_id_window + ), + True, + ), + f.lit(False), + ).otherwise(True), + ) + # For studies without summary statistics, we remove the "Not curated by Open Targets" flag: + .withColumn( + "qualityControls", + f.when( + ~f.col("hasSumstats"), + f.array_remove( + f.col("qualityControls"), + StudyQualityCheck.NO_OT_CURATION.value, + ), + ).otherwise(f.col("qualityControls")), + ) + # If top hits are not kept, we remove the "sumstats not available" flag from all QC lists: + .withColumn( + "qualityControls", + f.when( + ~f.col("keepTopHit"), + f.array_remove( + f.col("qualityControls"), + StudyQualityCheck.SUMSTATS_NOT_AVAILABLE.value, + ), + ).otherwise(f.col("qualityControls")), + ) + # Then propagate quality checks for all sources of the same study: + .withColumn( + "qualityControls", + f.array_distinct( + f.flatten( + f.collect_set("qualityControls").over(full_study_id_window) + ) + ), + ) + # Propagating sumstatQCValues -> map, cannot be flatten: + .withColumn( + "sumstatQCValues", + f.first("sumstatQCValues", ignorenulls=True).over( + full_study_id_window + ), + ) + # Propagating analysisFlags: + .withColumn( + "analysisFlags", + f.flatten( + f.collect_list("analysisFlags").over(full_study_id_window) + ), + ) + # Propagating hasSumstatsFlag - if no flag, leave null: + .withColumn( + "hasSumstats", + f.when( + # There's a true: + f.array_contains( + f.collect_set("hasSumstats").over(full_study_id_window), + True, + ), + f.lit(True), + ).when( + # There's a false: + f.array_contains( + f.collect_set("hasSumstats").over(full_study_id_window), + False, + ), + f.lit(False), + ), + ) + # Propagating disease: when different sets of diseases available for the same study, + # we pick the shortest list, becasuse we assume, that is the most accurate disease assignment: + .withColumn( + "mostGranular", + f.size(f.col("traitFromSourceMappedIds")) + == f.min(f.size(f.col("traitFromSourceMappedIds"))).over( + full_study_id_window + ), + ) + # Remove less granular disease mappings: + .withColumn( + "traitFromSourceMappedIds", + f.when(f.col("mostGranular"), f.col("traitFromSourceMappedIds")), + ) + # Propagate mapped disease: + .withColumn( + "traitFromSourceMappedIds", + f.last(f.col("traitFromSourceMappedIds"), True).over( + full_study_id_window + ), + ) + # Repeating these steps for the `traitFromSource` column: + .withColumn( + "traitFromSource", + f.when(f.col("mostGranular"), f.col("traitFromSource")), + ) + # Propagate disease: + .withColumn( + "traitFromSource", + f.last(f.col("traitFromSource"), True).over(full_study_id_window), + ) + # Distinct study types are joined together into a string. So, if there's ambiguite, the study will be flagged when the study type is validated: + .withColumn( + "studyType", + f.concat_ws( + ",", f.collect_set("studyType").over(full_study_id_window) + ), + ) + # At this point, all studies in one window is expected to be identical. Let's just pick one: + .withColumn("rank", f.row_number().over(study_id_window)) + .filter(f.col("rank") == 1) + .drop(*columns_to_drop) + ), + _schema=StudyIndex.get_schema(), + ) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index e685d828f..468ab0efc 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -21,6 +21,7 @@ from gentropy.common.utils import get_logsum from gentropy.config import WindowBasedClumpingStepConfig from gentropy.dataset.dataset import Dataset +from gentropy.dataset.study_index import StudyQualityCheck from gentropy.dataset.study_locus_overlap import StudyLocusOverlap from gentropy.dataset.variant_index import VariantIndex from gentropy.method.clump import LDclumping @@ -74,7 +75,7 @@ class StudyLocusQualityCheck(Enum): WINDOW_CLUMPED (str): Explained by a more significant variant in the same window NO_POPULATION (str): Study does not have population annotation to resolve LD NOT_QUALIFYING_LD_BLOCK (str): LD block does not contain variants at the required R^2 threshold - FAILED_STUDY (str): Flagging study loci if the study has failed QC + FLAGGED_STUDY (str): Study has quality control flag(s) MISSING_STUDY (str): Flagging study loci if the study is not found in the study index as a reference DUPLICATED_STUDYLOCUS_ID (str): Study-locus identifier is not unique INVALID_VARIANT_IDENTIFIER (str): Flagging study loci where identifier of any tagging variant was not found in the variant index @@ -85,6 +86,7 @@ class StudyLocusQualityCheck(Enum): ABNORMAL_PIPS (str): Flagging study loci with a sum of PIPs that are not in [0.99,1] OUT_OF_SAMPLE_LD (str): Study locus finemapped without in-sample LD reference INVALID_CHROMOSOME (str): Chromosome not in 1:22, X, Y, XY or MT + TOP_HIT_AND_SUMMARY_STATS (str): Curated top hit is flagged because summary statistics are available for study """ SUBSIGNIFICANT_FLAG = "Subsignificant p-value" @@ -101,7 +103,7 @@ class StudyLocusQualityCheck(Enum): NOT_QUALIFYING_LD_BLOCK = ( "LD block does not contain variants at the required R^2 threshold" ) - FAILED_STUDY = "Study has failed quality controls" + FLAGGED_STUDY = "Study has quality control flag(s)" MISSING_STUDY = "Study not found in the study index" DUPLICATED_STUDYLOCUS_ID = "Non-unique study locus identifier" INVALID_VARIANT_IDENTIFIER = ( @@ -114,8 +116,13 @@ class StudyLocusQualityCheck(Enum): TOP_HIT = "Study locus from curated top hit" EXPLAINED_BY_SUSIE = "Study locus in region explained by a SuSiE credible set" OUT_OF_SAMPLE_LD = "Study locus finemapped without in-sample LD reference" - ABNORMAL_PIPS = "Study locus with a sum of PIPs that not in the expected range [0.99,1]" + ABNORMAL_PIPS = ( + "Study locus with a sum of PIPs that not in the expected range [0.99,1]" + ) INVALID_CHROMOSOME = "Chromosome not in 1:22, X, Y, XY or MT" + TOP_HIT_AND_SUMMARY_STATS = ( + "Curated top hit is flagged because summary statistics are available for study" + ) class CredibleInterval(Enum): @@ -143,7 +150,8 @@ def validate_study(self: StudyLocus, study_index: StudyIndex) -> StudyLocus: """Flagging study loci if the corresponding study has issues. There are two different potential flags: - - failed study: flagging locus if the corresponding study has failed a quality check. + - flagged study: flagging locus if the study has quality control flags. + - study with summary statistics for top hit: flagging locus if the study has available summary statistics. - missing study: flagging locus if the study was not found in the reference study index. Args: @@ -159,6 +167,7 @@ def validate_study(self: StudyLocus, study_index: StudyIndex) -> StudyLocus: else f.lit(None).cast(StringType()) ) + # The study Id of the study index needs to be kept, because we would not know which study was in the index after the left join: study_flags = study_index.df.select( f.col("studyId").alias("study_studyId"), qc_select_expression.alias("study_qualityControls"), @@ -169,13 +178,30 @@ def validate_study(self: StudyLocus, study_index: StudyIndex) -> StudyLocus: self.df.join( study_flags, f.col("studyId") == f.col("study_studyId"), "left" ) - # Flagging loci with failed studies: + # Flagging loci with flagged studies - without propagating the actual flags: .withColumn( "qualityControls", StudyLocus.update_quality_flag( f.col("qualityControls"), f.size(f.col("study_qualityControls")) > 0, - StudyLocusQualityCheck.FAILED_STUDY, + StudyLocusQualityCheck.FLAGGED_STUDY, + ), + ) + # Flagging top-hits, where the study has available summary statistics: + .withColumn( + "qualityControls", + StudyLocus.update_quality_flag( + f.col("qualityControls"), + # Condition is true, if the study has summary statistics available and the locus is a top hit: + f.array_contains( + f.col("qualityControls"), + StudyLocusQualityCheck.TOP_HIT.value, + ) + & ~f.array_contains( + f.col("study_qualityControls"), + StudyQualityCheck.SUMSTATS_NOT_AVAILABLE.value, + ), + StudyLocusQualityCheck.TOP_HIT_AND_SUMMARY_STATS, ), ) # Flagging loci where no studies were found: @@ -396,7 +422,7 @@ def _qc_subsignificant_associations( def qc_abnormal_pips( self: StudyLocus, sum_pips_lower_threshold: float = 0.99, - sum_pips_upper_threshold: float = 1.0001, # Set slightly above 1 to account for floating point errors + sum_pips_upper_threshold: float = 1.0001, # Set slightly above 1 to account for floating point errors ) -> StudyLocus: """Filter study-locus by sum of posterior inclusion probabilities to ensure that the sum of PIPs is within a given range. @@ -414,32 +440,36 @@ def qc_abnormal_pips( else f.lit(None).cast(ArrayType(StringType())) ) - flag = (self.df.withColumn( + flag = self.df.withColumn( "sumPosteriorProbability", f.aggregate( f.col("locus"), f.lit(0.0), - lambda acc, x: acc + x["posteriorProbability"] - )).withColumn( - "pipOutOfRange", - f.when( - (f.col("sumPosteriorProbability") < sum_pips_lower_threshold) | - (f.col("sumPosteriorProbability") > sum_pips_upper_threshold), - True - ).otherwise(False))) + lambda acc, x: acc + x["posteriorProbability"], + ), + ).withColumn( + "pipOutOfRange", + f.when( + (f.col("sumPosteriorProbability") < sum_pips_lower_threshold) + | (f.col("sumPosteriorProbability") > sum_pips_upper_threshold), + True, + ).otherwise(False), + ) return StudyLocus( - _df=(flag + _df=( + flag # Flagging loci with failed studies: .withColumn( "qualityControls", self.update_quality_flag( qc_select_expression, f.col("pipOutOfRange"), - StudyLocusQualityCheck.ABNORMAL_PIPS + StudyLocusQualityCheck.ABNORMAL_PIPS, ), - ).drop("sumPosteriorProbability", "pipOutOfRange")), - _schema=self.get_schema() + ).drop("sumPosteriorProbability", "pipOutOfRange") + ), + _schema=self.get_schema(), ) @staticmethod @@ -641,28 +671,6 @@ def get_QC_mappings(cls: type[StudyLocus]) -> dict[str, str]: """ return {member.name: member.value for member in StudyLocusQualityCheck} - def filter_by_study_type(self: StudyLocus, study_type: str) -> StudyLocus: - """Creates a new StudyLocus dataset filtered by study type. - - Args: - study_type (str): Study type to filter for. Can be one of `gwas`, `eqtl`, `pqtl`, `eqtl`. - - Returns: - StudyLocus: Filtered study-locus dataset. - - Raises: - ValueError: If study type is not supported. - """ - if study_type not in ["gwas", "eqtl", "pqtl", "sqtl"]: - raise ValueError( - f"Study type {study_type} not supported. Supported types are: gwas, eqtl, pqtl, sqtl." - ) - new_df = self.df.filter(f.col("studyType") == study_type).drop("studyType") - return StudyLocus( - _df=new_df, - _schema=self._schema, - ) - def filter_credible_set( self: StudyLocus, credible_interval: CredibleInterval, diff --git a/src/gentropy/datasource/gwas_catalog/study_index.py b/src/gentropy/datasource/gwas_catalog/study_index.py index 421f53d0f..c01d6d263 100644 --- a/src/gentropy/datasource/gwas_catalog/study_index.py +++ b/src/gentropy/datasource/gwas_catalog/study_index.py @@ -647,6 +647,22 @@ def apply_inclusion_list( _schema=StudyIndexGWASCatalog.get_schema(), ) + def add_no_sumstats_flag(self: StudyIndexGWASCatalog) -> StudyIndexGWASCatalog: + """Add a flag to the study index if no summary statistics are available. + + Returns: + StudyIndexGWASCatalog: Updated study index. + """ + self.df = self.df.withColumn( + "qualityControls", + StudyIndex.update_quality_flag( + f.col("qualityControls"), + ~f.col("hasSumstats"), + StudyQualityCheck.SUMSTATS_NOT_AVAILABLE, + ), + ) + return self + @staticmethod def _parse_gwas_catalog_study_id(sumstats_path_column: str) -> Column: """Extract GWAS Catalog study accession from the summary statistics path. diff --git a/src/gentropy/gwas_catalog_top_hits.py b/src/gentropy/gwas_catalog_top_hits.py index 95722c768..2295900e8 100644 --- a/src/gentropy/gwas_catalog_top_hits.py +++ b/src/gentropy/gwas_catalog_top_hits.py @@ -60,7 +60,14 @@ def __init__( ), ) # Load - study_index.df.write.mode(session.write_mode).parquet(catalog_studies_out) + ( + study_index + # Flag all studies without sumstats + .add_no_sumstats_flag() + # Save dataset: + .df.write.mode(session.write_mode) + .parquet(catalog_studies_out) + ) ( study_locus.window_based_clumping(distance) diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index 1c8ae161c..1d1d128b6 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -46,13 +46,13 @@ def __init__( .qc_redundant_top_hits_from_PICS() # Flagging top hits from studies with PICS summary statistics .qc_explained_by_SuSiE() # Flagging credible sets in regions explained by SuSiE # Flagging credible sets with PIP > 1 or PIP < 0.99 - .qc_abnormal_pips(sum_pips_lower_threshold=0.99,sum_pips_upper_threshold=1.0001) + .qc_abnormal_pips( + sum_pips_lower_threshold=0.99, sum_pips_upper_threshold=1.0001 + ) # Annotates credible intervals and filter to only keep 99% credible sets .filter_credible_set(credible_interval=CredibleInterval.IS99) # Annotate credible set confidence: .assign_confidence() - # Flagging credible sets that are duplicated: - .validate_unique_study_locus_id() ).persist() # we will need this for 2 types of outputs study_locus_with_qc.valid_rows(invalid_qc_reasons, invalid=True).df.write.mode( diff --git a/src/gentropy/study_validation.py b/src/gentropy/study_validation.py index a9bebe25e..08f601f1e 100644 --- a/src/gentropy/study_validation.py +++ b/src/gentropy/study_validation.py @@ -62,8 +62,8 @@ def __init__( # Running validation: study_index_with_qc = ( - study_index.validate_unique_study_id() # Flagging duplicated study ids - .validate_study_type() # Flagging non-supported study types. + study_index.deconvolute_studies() # Deconvolute studies where the same study is ingested from multiple sources + .validate_study_type() # Flagging non-supported study types .validate_target(target_index) # Flagging QTL studies with invalid targets .validate_disease(disease_index) # Flagging invalid EFOs .validate_biosample( diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index 7f15a11a6..1d34479e1 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -151,41 +151,6 @@ def test_find_overlaps(mock_study_locus: StudyLocus) -> None: assert isinstance(mock_study_locus.find_overlaps(), StudyLocusOverlap) -@pytest.mark.parametrize( - "study_type, expected_sl_count", [("gwas", 1), ("eqtl", 1), ("pqtl", 0)] -) -def test_filter_by_study_type( - spark: SparkSession, study_type: str, expected_sl_count: int -) -> None: - """Test filter by study type.""" - # Input data - sl = StudyLocus( - _df=spark.createDataFrame( - [ - { - # from gwas - "studyLocusId": "1", - "variantId": "lead1", - "studyId": "study1", - "studyType": "gwas", - }, - { - # from eqtl - "studyLocusId": "2", - "variantId": "lead2", - "studyId": "study2", - "studyType": "eqtl", - }, - ], - StudyLocus.get_schema(), - ), - _schema=StudyLocus.get_schema(), - ) - - observed = sl.filter_by_study_type(study_type) - assert observed.df.count() == expected_sl_count - - def test_annotate_locus_statistics( mock_study_locus: StudyLocus, mock_summary_statistics: SummaryStatistics ) -> None: @@ -797,7 +762,7 @@ def test_study_validation_correctness(self: TestStudyLocusValidation) -> None: self.study_locus.validate_study(self.study_index) .df.filter( f.array_contains( - f.col("qualityControls"), StudyLocusQualityCheck.FAILED_STUDY.value + f.col("qualityControls"), StudyLocusQualityCheck.FLAGGED_STUDY.value ) ) .count() From 3639b23829eb1d167d28726d6262df2ddebc18e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Mon, 4 Nov 2024 16:12:19 +0000 Subject: [PATCH 151/188] fix(`credibleSetConfidence`): inner join between study locus and variant index to avoid null genes (#890) --- src/gentropy/dataset/l2g_features/other.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gentropy/dataset/l2g_features/other.py b/src/gentropy/dataset/l2g_features/other.py index 4c28c2a0c..2fc32592b 100644 --- a/src/gentropy/dataset/l2g_features/other.py +++ b/src/gentropy/dataset/l2g_features/other.py @@ -291,10 +291,10 @@ def compute( ), ), on="variantId", - how="left", + how="inner", ) # Annotate credible set confidence - .join(full_credible_set, ["variantId", "studyId"], "left") + .join(full_credible_set, ["variantId", "studyId"]) .select("studyLocusId", "geneId", cls.feature_name) ), id_vars=("studyLocusId", "geneId"), From 04b1e222c51bc24387116f2b7535e40931a06458 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Tue, 5 Nov 2024 11:24:12 +0000 Subject: [PATCH 152/188] feat(feature_matrix): impute values for gene attribute cols (#895) * feat(feature_matrix): impute values for gene attribute cols + semantic test * fix: change window * chore: fill na in the feature matrix generation step --- src/gentropy/dataset/l2g_feature_matrix.py | 22 ++++++- src/gentropy/dataset/l2g_gold_standard.py | 2 +- src/gentropy/dataset/study_locus.py | 2 +- src/gentropy/l2g.py | 46 ++++++++------- .../dataset/test_l2g_feature_matrix.py | 57 +++++++++++++++++++ 5 files changed, 102 insertions(+), 27 deletions(-) diff --git a/src/gentropy/dataset/l2g_feature_matrix.py b/src/gentropy/dataset/l2g_feature_matrix.py index bb4942312..9caa4b58a 100644 --- a/src/gentropy/dataset/l2g_feature_matrix.py +++ b/src/gentropy/dataset/l2g_feature_matrix.py @@ -5,6 +5,8 @@ from functools import reduce from typing import TYPE_CHECKING, Type +import pyspark.sql.functions as f +from pyspark.sql import Window from typing_extensions import Self from gentropy.common.spark_helpers import convert_from_long_to_wide @@ -128,18 +130,32 @@ def calculate_feature_missingness_rate( } def fill_na( - self: L2GFeatureMatrix, value: float = 0.0, subset: list[str] | None = None + self: L2GFeatureMatrix, na_value: float = 0.0, subset: list[str] | None = None ) -> L2GFeatureMatrix: """Fill missing values in a column with a given value. + For features that correspond to gene attributes, missing values are imputed using the mean of the column. + Args: - value (float): Value to replace missing values with. Defaults to 0.0. + na_value (float): Value to replace missing values with. Defaults to 0.0. subset (list[str] | None): Subset of columns to consider. Defaults to None. Returns: L2GFeatureMatrix: L2G feature matrix dataset """ - self._df = self._df.fillna(value, subset=subset) + cols_to_impute = ["proteinGeneCount500kb", "geneCount500kb", "isProteinCoding"] + for col in cols_to_impute: + if col not in self._df.columns: + continue + else: + self._df = self._df.withColumn( + col, + f.when( + f.col(col).isNull(), + f.mean(f.col(col)).over(Window.partitionBy("studyLocusId")), + ).otherwise(f.col(col)), + ) + self._df = self._df.fillna(na_value, subset=subset) return self def select_features( diff --git a/src/gentropy/dataset/l2g_gold_standard.py b/src/gentropy/dataset/l2g_gold_standard.py index f1df3a700..ec99f7141 100644 --- a/src/gentropy/dataset/l2g_gold_standard.py +++ b/src/gentropy/dataset/l2g_gold_standard.py @@ -135,7 +135,7 @@ def build_feature_matrix( .drop("studyId", "variantId") .distinct(), with_gold_standard=True, - ) + ).fill_na() def filter_unique_associations( self: L2GGoldStandard, diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 468ab0efc..908c093b6 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -806,7 +806,7 @@ def build_feature_matrix( self, features_list, features_input_loader, - ) + ).fill_na() def annotate_credible_sets(self: StudyLocus) -> StudyLocus: """Annotate study-locus dataset with credible set flags. diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 1004fa0fb..79be2385a 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -274,7 +274,6 @@ def _annotate_gold_standards_w_feature_matrix(self) -> L2GFeatureMatrix: gold_standards.build_feature_matrix( self.feature_matrix, self.credible_set ) - .fill_na() .select_features(self.features_list) .persist() ) @@ -322,6 +321,7 @@ def __init__( .json(evidence_output_path) ) + class LocusToGeneAssociationsStep: """Locus to gene associations step.""" @@ -343,39 +343,41 @@ def __init__( indirect_associations_output_path (str): Path to the indirect associations output dataset """ # Read in the disease index - disease_index = ( - session.spark.read.parquet(disease_index_path) - .select( - f.col("id").alias("diseaseId"), - f.explode("ancestors").alias("ancestorDiseaseId") - ) + disease_index = session.spark.read.parquet(disease_index_path).select( + f.col("id").alias("diseaseId"), + f.explode("ancestors").alias("ancestorDiseaseId"), ) # Read in the L2G evidence - disease_target_evidence = ( - session.spark.read.json(evidence_input_path) - .select( - f.col("targetFromSourceId").alias("targetId"), - f.col("diseaseFromSourceMappedId").alias("diseaseId"), - f.col("resourceScore") - ) + disease_target_evidence = session.spark.read.json(evidence_input_path).select( + f.col("targetFromSourceId").alias("targetId"), + f.col("diseaseFromSourceMappedId").alias("diseaseId"), + f.col("resourceScore"), ) # Generate direct assocations and save file ( - disease_target_evidence - .groupBy("targetId", "diseaseId") + disease_target_evidence.groupBy("targetId", "diseaseId") .agg(f.collect_set("resourceScore").alias("scores")) - .select("targetId", "diseaseId", calculate_harmonic_sum(f.col("scores")).alias("harmonicSum")) - .write.mode(session.write_mode).parquet(direct_associations_output_path) + .select( + "targetId", + "diseaseId", + calculate_harmonic_sum(f.col("scores")).alias("harmonicSum"), + ) + .write.mode(session.write_mode) + .parquet(direct_associations_output_path) ) # Generate indirect assocations and save file ( - disease_target_evidence - .join(disease_index, on="diseaseId", how="inner") + disease_target_evidence.join(disease_index, on="diseaseId", how="inner") .groupBy("targetId", "ancestorDiseaseId") .agg(f.collect_set("resourceScore").alias("scores")) - .select("targetId", "ancestorDiseaseId", calculate_harmonic_sum(f.col("scores")).alias("harmonicSum")) - .write.mode(session.write_mode).parquet(indirect_associations_output_path) + .select( + "targetId", + "ancestorDiseaseId", + calculate_harmonic_sum(f.col("scores")).alias("harmonicSum"), + ) + .write.mode(session.write_mode) + .parquet(indirect_associations_output_path) ) diff --git a/tests/gentropy/dataset/test_l2g_feature_matrix.py b/tests/gentropy/dataset/test_l2g_feature_matrix.py index 76661e170..f821daaac 100644 --- a/tests/gentropy/dataset/test_l2g_feature_matrix.py +++ b/tests/gentropy/dataset/test_l2g_feature_matrix.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING +import pyspark.sql.functions as f import pytest from pyspark.sql.types import ( ArrayType, @@ -184,3 +185,59 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: ), _schema=GeneIndex.get_schema(), ) + + +def test_fill_na(spark: SparkSession) -> None: + """Tests L2GFeatureMatrix.fill_na, particularly the imputation logic.""" + sample_fm = L2GFeatureMatrix( + _df=spark.createDataFrame( + [ + { + "studyLocusId": "1", + "geneId": "gene1", + "proteinGeneCount500kb": 3.0, + "geneCount500kb": 8.0, + "isProteinCoding": 1.0, + "anotherFeature": None, + }, + { + "studyLocusId": "1", + "geneId": "gene2", + "proteinGeneCount500kb": 4.0, + "geneCount500kb": 10.0, + "isProteinCoding": 1.0, + "anotherFeature": None, + }, + { + "studyLocusId": "1", + "geneId": "gene3", + "proteinGeneCount500kb": None, + "geneCount500kb": None, + "isProteinCoding": None, + "anotherFeature": None, + }, + ], + schema="studyLocusId STRING, geneId STRING, proteinGeneCount500kb DOUBLE, geneCount500kb DOUBLE, isProteinCoding DOUBLE, anotherFeature DOUBLE", + ), + ) + observed_df = sample_fm.fill_na()._df.filter(f.col("geneId") == "gene3") + expected_df_missing_row = spark.createDataFrame( + [ + { + "studyLocusId": "1", + "geneId": "gene3", + "proteinGeneCount500kb": 3.5, + "geneCount500kb": 9.0, + "isProteinCoding": 1.0, + "anotherFeature": 0.0, + }, + ], + ).select( + "studyLocusId", + "geneId", + "proteinGeneCount500kb", + "geneCount500kb", + "isProteinCoding", + "anotherFeature", + ) + assert observed_df.collect() == expected_df_missing_row.collect() From 94abc792f0b515d605d8b4d8b9908c5a8c2b6789 Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Tue, 5 Nov 2024 13:23:25 +0000 Subject: [PATCH 153/188] feat: adding l2g features to prediction table (#899) * feat: adding l2g features to prediction table * fix: renaming method for better name * fix: remove show statement * fix: dropping locusToGeneFeatures if already exist * feat: dropping features with null values from the map --- .../assets/schemas/l2g_predictions.json | 11 ++++ src/gentropy/dataset/l2g_prediction.py | 51 +++++++++++++++++++ src/gentropy/l2g.py | 6 +-- 3 files changed, 65 insertions(+), 3 deletions(-) diff --git a/src/gentropy/assets/schemas/l2g_predictions.json b/src/gentropy/assets/schemas/l2g_predictions.json index 238ff4087..57247a49a 100644 --- a/src/gentropy/assets/schemas/l2g_predictions.json +++ b/src/gentropy/assets/schemas/l2g_predictions.json @@ -18,6 +18,17 @@ "type": "double", "nullable": false, "metadata": {} + }, + { + "metadata": {}, + "name": "locusToGeneFeatures", + "nullable": true, + "type": { + "keyType": "string", + "type": "map", + "valueContainsNull": true, + "valueType": "float" + } } ] } diff --git a/src/gentropy/dataset/l2g_prediction.py b/src/gentropy/dataset/l2g_prediction.py index 169f5a846..64ce964c7 100644 --- a/src/gentropy/dataset/l2g_prediction.py +++ b/src/gentropy/dataset/l2g_prediction.py @@ -126,3 +126,54 @@ def to_disease_target_evidence( "studyLocusId", ) ) + + def add_locus_to_gene_features( + self: L2GPrediction, feature_matrix: L2GFeatureMatrix + ) -> L2GPrediction: + """Add features to the L2G predictions. + + Args: + feature_matrix (L2GFeatureMatrix): Feature matrix dataset + + Returns: + L2GPrediction: L2G predictions with additional features + """ + # Testing if `locusToGeneFeatures` column already exists: + if "locusToGeneFeatures" in self.df.columns: + self.df = self.df.drop("locusToGeneFeatures") + + # Columns identifying a studyLocus/gene pair + prediction_id_columns = ["studyLocusId", "geneId"] + + # L2G matrix columns to build the map: + columns_to_map = [ + column + for column in feature_matrix._df.columns + if column not in prediction_id_columns + ] + + # Aggregating all features into a single map column: + aggregated_features = ( + feature_matrix._df.withColumn( + "locusToGeneFeatures", + f.create_map( + *sum( + [ + (f.lit(colname), f.col(colname)) + for colname in columns_to_map + ], + (), + ) + ), + ) + # from the freshly created map, we filter out the null values + .withColumn( + "locusToGeneFeatures", + f.expr("map_filter(locusToGeneFeatures, (k, v) -> v is not null)"), + ) + .drop(*columns_to_map) + ) + return L2GPrediction( + _df=self.df.join(aggregated_features, on=prediction_id_columns, how="left"), + _schema=self.get_schema(), + ) diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 79be2385a..cc1b5a5d1 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -190,9 +190,9 @@ def run_predict(self) -> None: hf_token=access_gcp_secret("hfhub-key", "open-targets-genetics-dev"), download_from_hub=self.download_from_hub, ) - predictions.df.write.mode(self.session.write_mode).parquet( - self.predictions_path - ) + predictions.add_locus_to_gene_features(self.feature_matrix).df.write.mode( + self.session.write_mode + ).parquet(self.predictions_path) self.session.logger.info("L2G predictions saved successfully.") def run_train(self) -> None: From 4d8e7c42e8e94473495d12d50efb7d7942afbe73 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Tue, 5 Nov 2024 13:39:24 +0000 Subject: [PATCH 154/188] fix: ensure the #CHROM is not quoted (#896) Co-authored-by: Szymon Szyszkowski Co-authored-by: Daniel Suveges --- src/gentropy/variant_index.py | 20 +++++++-- .../gentropy/step/test_convert_to_vcf_step.py | 41 ++++++++++--------- 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/src/gentropy/variant_index.py b/src/gentropy/variant_index.py index 4843553e8..9eac684b2 100644 --- a/src/gentropy/variant_index.py +++ b/src/gentropy/variant_index.py @@ -64,7 +64,13 @@ def __init__( class ConvertToVcfStep: - """Convert dataset with variant annotation to VCF step.""" + """Convert dataset with variant annotation to VCF step. + + This step converts in-house data source formats to VCF like format. + + NOTE! Due to the csv DataSourceWriter limitations we can not save the column name + `#CHROM` as in vcf file. The column is replaced with `CHROM`. + """ def __init__( self, @@ -112,8 +118,14 @@ def __init__( .sortWithinPartitions(f.col("#CHROM").asc(), f.col("POS").asc()) # Due to the large number of partitions ensure we do not lose the partitions before saving them .persist() + # FIXME the #CHROM column is saved as "#CHROM" by pyspark which fails under VEP, + # The native solution would be to implement the datasource with proper writer + # see https://docs.databricks.com/en/pyspark/datasources.html. + # Proposed solution will require adding # at the start of the first line of + # vcf before processing it in orchestration. + .withColumnRenamed("#CHROM", "CHROM") ) # Write - partitioned_variants.write.mode(session.write_mode).csv( - output_path, sep="\t", header=True - ) + partitioned_variants.write.mode(session.write_mode).option("sep", "\t").option( + "quote", "" + ).option("quoteAll", False).option("header", True).csv(output_path) diff --git a/tests/gentropy/step/test_convert_to_vcf_step.py b/tests/gentropy/step/test_convert_to_vcf_step.py index 945f0fce6..cc4ec800d 100644 --- a/tests/gentropy/step/test_convert_to_vcf_step.py +++ b/tests/gentropy/step/test_convert_to_vcf_step.py @@ -115,7 +115,6 @@ def test_sorting( ) output_path = str(tmp_path / "variants") ConvertToVcfStep(session, [source_path], ["json"], output_path, 10) - partitions = [ str(p) for p in Path(output_path).iterdir() if str(p).endswith("csv") ] @@ -125,24 +124,28 @@ def test_sorting( usecols=[0, 1], # just read #CHROM and POS sep="\t", ) - assert df.equals( - # values comes from input file tests/gentropy/data_samples/variant_sources/uniprot-test-sorting.jsonl - # NOTE: Natural ordering in CHROM (str) and POS (int) - pd.DataFrame( - [ - ("1", 1525242), - ("1", 161306863), - ("11", 108345818), - ("2", 98396018), - ("21", 44286656), - ("3", 38585800), - ("MT", 6277), - ("X", 129562612), - ("Y", 2787426), - ], - columns=["#CHROM", "POS"], - ) - ), "Variant sorting does not match expectations." + # values comes from input file tests/gentropy/data_samples/variant_sources/uniprot-test-sorting.jsonl + # NOTE: Natural ordering in CHROM (str) and POS (int) + with open(partitions[0]) as fp: + assert fp.readline().startswith("CHROM\tPOS") + + expected_df = pd.DataFrame( + [ + ("1", 1525242), + ("1", 161306863), + ("11", 108345818), + ("2", 98396018), + ("21", 44286656), + ("3", 38585800), + ("MT", 6277), + ("X", 129562612), + ("Y", 2787426), + ], + columns=["CHROM", "POS"], + ) + + assert list(df.columns) == list(expected_df.columns) + assert df.equals(expected_df), "Variant sorting does not match expectations." def test_raises_assertion_imbalanced_arg_ratios(self, session: Session) -> None: """Test imbalanced argument ratio exception. From 2af1074def5d6e02838c2188179270ec72b2cee2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Tue, 5 Nov 2024 14:18:02 +0000 Subject: [PATCH 155/188] feat(feature_matrix): extract features for gwas associations only (#901) --- src/gentropy/l2g.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index cc1b5a5d1..b585166c1 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -84,7 +84,9 @@ def __init__( gene_index=gene_index, ) - fm = credible_set.build_feature_matrix(features_list, features_input_loader) + fm = credible_set.filter(f.col("studyType") == "gwas").build_feature_matrix( + features_list, features_input_loader + ) fm._df.write.mode(session.write_mode).parquet(feature_matrix_path) From 6ec0d45baf48eeefa04c79632645c893f128b150 Mon Sep 17 00:00:00 2001 From: Yakov Date: Tue, 5 Nov 2024 15:30:56 +0000 Subject: [PATCH 156/188] fix: do not impute `isProteinCoding` (#902) * fix: fix col names for imputation * fix: fix v1 * fix: test --- src/gentropy/dataset/l2g_feature_matrix.py | 5 ++++- tests/gentropy/dataset/test_l2g_feature_matrix.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gentropy/dataset/l2g_feature_matrix.py b/src/gentropy/dataset/l2g_feature_matrix.py index 9caa4b58a..f59e1e725 100644 --- a/src/gentropy/dataset/l2g_feature_matrix.py +++ b/src/gentropy/dataset/l2g_feature_matrix.py @@ -143,7 +143,10 @@ def fill_na( Returns: L2GFeatureMatrix: L2G feature matrix dataset """ - cols_to_impute = ["proteinGeneCount500kb", "geneCount500kb", "isProteinCoding"] + cols_to_impute = [ + "proteinGeneCount500kb", + "geneCount500kb", + ] for col in cols_to_impute: if col not in self._df.columns: continue diff --git a/tests/gentropy/dataset/test_l2g_feature_matrix.py b/tests/gentropy/dataset/test_l2g_feature_matrix.py index f821daaac..4fe338254 100644 --- a/tests/gentropy/dataset/test_l2g_feature_matrix.py +++ b/tests/gentropy/dataset/test_l2g_feature_matrix.py @@ -228,7 +228,7 @@ def test_fill_na(spark: SparkSession) -> None: "geneId": "gene3", "proteinGeneCount500kb": 3.5, "geneCount500kb": 9.0, - "isProteinCoding": 1.0, + "isProteinCoding": 0.0, "anotherFeature": 0.0, }, ], From c305dbef6ccd9adc5f17ae723b4f6ffb10c8f6e3 Mon Sep 17 00:00:00 2001 From: Tobi Alegbe Date: Wed, 6 Nov 2024 11:34:00 +0000 Subject: [PATCH 157/188] fix: reclassify eqtl catalogue sc datasets (#894) * fix: tweak sc vs bulk eqtl catalogue logic * fix: update tests * fix: correct coloc calling of method * fix: address PR comments * fix: change string to col * fix: change eqtl catalogue path to specific commit * chore: fix method description --- src/gentropy/dataset/colocalisation.py | 4 +- .../datasource/eqtl_catalogue/finemapping.py | 4 +- .../datasource/eqtl_catalogue/study_index.py | 40 ++++++++----------- 3 files changed, 19 insertions(+), 29 deletions(-) diff --git a/src/gentropy/dataset/colocalisation.py b/src/gentropy/dataset/colocalisation.py index db0040652..e384dd796 100644 --- a/src/gentropy/dataset/colocalisation.py +++ b/src/gentropy/dataset/colocalisation.py @@ -61,11 +61,11 @@ def extract_maximum_coloc_probability_per_region_and_gene( from gentropy.colocalisation import ColocalisationStep valid_qtls = list( - set(EqtlCatalogueStudyIndex.method_to_study_type_mapping.values()) + set(EqtlCatalogueStudyIndex.method_to_qtl_type_mapping.values()) ) + [ f"sc{qtl}" for qtl in set( - EqtlCatalogueStudyIndex.method_to_study_type_mapping.values() + EqtlCatalogueStudyIndex.method_to_qtl_type_mapping.values() ) ] diff --git a/src/gentropy/datasource/eqtl_catalogue/finemapping.py b/src/gentropy/datasource/eqtl_catalogue/finemapping.py index ea46359df..ea4264fdd 100644 --- a/src/gentropy/datasource/eqtl_catalogue/finemapping.py +++ b/src/gentropy/datasource/eqtl_catalogue/finemapping.py @@ -179,9 +179,7 @@ def parse_susie_results( f.col("molecular_trait_id"), ).alias("studyId"), f.col("tissue_id").alias("biosampleFromSourceId"), - EqtlCatalogueStudyIndex._identify_study_type( - f.col("quant_method"), f.col("tissue_id") - ).alias("studyType"), + EqtlCatalogueStudyIndex._identify_study_type().alias("studyType"), f.col("study_label").alias("projectId"), f.concat_ws( "/", diff --git a/src/gentropy/datasource/eqtl_catalogue/study_index.py b/src/gentropy/datasource/eqtl_catalogue/study_index.py index d284eb781..b1bcfb17d 100644 --- a/src/gentropy/datasource/eqtl_catalogue/study_index.py +++ b/src/gentropy/datasource/eqtl_catalogue/study_index.py @@ -42,10 +42,11 @@ class EqtlCatalogueStudyIndex: StructField("sample_size", IntegerType(), True), StructField("quant_method", StringType(), True), StructField("pmid", StringType(), True), + StructField("study_type", StringType(), True), ] ) - raw_studies_metadata_path = "https://raw.githubusercontent.com/eQTL-Catalogue/eQTL-Catalogue-resources/092e01a9601feb404f1c88f86311b43b907a88f6/data_tables/dataset_metadata_upcoming.tsv" - method_to_study_type_mapping = { + raw_studies_metadata_path = "https://raw.githubusercontent.com/eQTL-Catalogue/eQTL-Catalogue-resources/fe3c4b4ed911b3a184271a6aadcd8c8769a66aba/data_tables/dataset_metadata.tsv" + method_to_qtl_type_mapping = { "ge": "eqtl", "exon": "eqtl", "tx": "eqtl", @@ -58,38 +59,29 @@ class EqtlCatalogueStudyIndex: @classmethod def _identify_study_type( cls: type[EqtlCatalogueStudyIndex], - quantification_method_col: Column, - biosample_col: Column, ) -> Column: - """Identify the study type based on the method to quantify the trait and the biosample where the trait was measured. - - The quantification method identifies the type of molecular QTLs that were found. - The biosample identifies the biosample where the trait was measured, distinguishing between bulk and single cell. - - Args: - quantification_method_col (Column): column with the label of the method to quantify the trait. Available methods are [here](https://www.ebi.ac.uk/eqtl/Methods/) - biosample_col (Column): column with the label of the biosample where the trait was measured. + """Identify the qtl type based on the quantification method and eqtl catalogue study type. Returns: Column: The study type. Examples: - >>> df = spark.createDataFrame([("ge", "CL_1"), ("leafcutter", "UBERON_2"), ("tx", "EFO_3")], ["quant_method", "tissue_id"]) - >>> df.withColumn("study_type", EqtlCatalogueStudyIndex._identify_study_type(f.col("quant_method"), f.col("tissue_id"))).show() - +------------+---------+----------+ - |quant_method|tissue_id|study_type| - +------------+---------+----------+ - | ge| CL_1| sceqtl| - | leafcutter| UBERON_2| sqtl| - | tx| EFO_3| eqtl| - +------------+---------+----------+ + >>> df = spark.createDataFrame([("ge", "bulk"), ("leafcutter", "bulk"), ("tx", "single-cell")], ["quant_method", "study_type"]) + >>> df.withColumn("studyType", EqtlCatalogueStudyIndex._identify_study_type()).show() + +------------+-----------+---------+ + |quant_method| study_type|studyType| + +------------+-----------+---------+ + | ge| bulk| eqtl| + | leafcutter| bulk| sqtl| + | tx|single-cell| sceqtl| + +------------+-----------+---------+ """ qtl_type_mapping = f.create_map( - *[f.lit(x) for x in chain(*cls.method_to_study_type_mapping.items())] - )[quantification_method_col] + *[f.lit(x) for x in chain(*cls.method_to_qtl_type_mapping.items())] + )[f.col("quant_method")] return f.when( - biosample_col.startswith("CL"), f.concat(f.lit("sc"), qtl_type_mapping) + f.col("study_type") == "single-cell", f.concat(f.lit("sc"), qtl_type_mapping) ).otherwise(qtl_type_mapping) @classmethod From ebde0da0f247db43f0bd1d3c9de0e590a618229c Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Wed, 6 Nov 2024 17:14:36 +0000 Subject: [PATCH 158/188] feat: improve partitioning of credible sets (#900) --- src/gentropy/study_locus_validation.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index 1d1d128b6..d5dd900de 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -55,10 +55,14 @@ def __init__( .assign_confidence() ).persist() # we will need this for 2 types of outputs - study_locus_with_qc.valid_rows(invalid_qc_reasons, invalid=True).df.write.mode( - session.write_mode - ).parquet(invalid_study_locus_path) + # Valid study locus partitioned to simplify the finding of overlaps + study_locus_with_qc.valid_rows( + invalid_qc_reasons, invalid=True + ).df.repartitionByRange("chromosome", "position").sortWithinPartitions( + "chromosome", "position" + ).write.mode(session.write_mode).parquet(invalid_study_locus_path) + # Infalid study locus study_locus_with_qc.valid_rows(invalid_qc_reasons).df.write.mode( session.write_mode ).parquet(valid_study_locus_path) From 0d3c01bcb0456c72d76d41f5108d498c0c29c6d2 Mon Sep 17 00:00:00 2001 From: Daniel-Considine <113430683+Daniel-Considine@users.noreply.github.com> Date: Thu, 7 Nov 2024 13:15:57 +0000 Subject: [PATCH 159/188] fix: using the 99% PIP cs column, (#904) * feat: changing to 99 credible sets * fix: change summary schema * fix: adding purity metrics * fix: updating test data samples * fix: updating test data samples * Update finemapping.py --- .../datasource/finngen/finemapping.py | 20 ++++++++++----- .../finngen_credset_summary_sample.tsv | 24 +++++++++--------- .../finngen_credset_summary_sample.tsv.bgz | Bin 1338 -> 1350 bytes 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/src/gentropy/datasource/finngen/finemapping.py b/src/gentropy/datasource/finngen/finemapping.py index 36ab97e80..723d918bf 100644 --- a/src/gentropy/datasource/finngen/finemapping.py +++ b/src/gentropy/datasource/finngen/finemapping.py @@ -105,8 +105,10 @@ class FinnGenFinemapping: [ StructField("trait", StringType(), True), StructField("region", StringType(), True), - StructField("cs", StringType(), True), + StructField("cs_number", StringType(), True), StructField("cs_log10bf", DoubleType(), True), + StructField("cs_avg_r2", DoubleType(), True), + StructField("cs_min_r2", DoubleType(), True), ] ) @@ -182,8 +184,10 @@ class FinnGenFinemapping: summary_hail_schema: hl.tstruct = hl.tstruct( trait=hl.tstr, region=hl.tstr, - cs=hl.tstr, + cs_number=hl.tstr, cs_log10bf=hl.tfloat64, + cs_avg_r2=hl.tfloat64, + cs_min_r2=hl.tfloat64, ) @staticmethod @@ -241,7 +245,7 @@ def from_finngen_susie_finemapping( The finngen_susie_finemapping_cs_summary_files are files that Contains credible set summaries from SuSiE fine-mapping for all genome-wide significant regions with following schema: - trait: phenotype - region: region for which the fine-mapping was run. - - cs: running number for independent credible sets in a region + - cs_number: running number for independent credible sets in a region, assigned to 99% PIP - cs_log10bf: Log10 bayes factor of comparing the solution of this model (cs independent credible sets) to cs -1 credible sets - cs_avg_r2: Average correlation R2 between variants in the credible set - cs_min_r2: minimum r2 between variants in the credible set @@ -294,7 +298,7 @@ def from_finngen_susie_finemapping( # Drop rows which don't have proper position. snps_df.filter(f.col("position").cast(t.IntegerType()).isNotNull()) # Drop non credible set SNPs: - .filter(f.col("cs").cast(t.IntegerType()) > 0) + .filter(f.col("cs_99").cast(t.IntegerType()) > 0) .select( # Add study idenfitier. f.concat_ws("_", f.lit(finngen_release_prefix), f.col("trait")) @@ -303,7 +307,7 @@ def from_finngen_susie_finemapping( f.col("region"), # Add variant information. f.regexp_replace(f.col("v"), ":", "_").alias("variantId"), - f.col("cs").cast("integer").alias("credibleSetIndex"), + f.col("cs_99").cast("integer").alias("credibleSetIndex"), f.regexp_replace(f.col("chromosome"), "^chr", "") .cast(t.StringType()) .alias("chromosome"), @@ -433,8 +437,10 @@ def from_finngen_susie_finemapping( cs_summary_df.select( f.col("region"), f.col("trait"), - f.col("cs").cast("integer").alias("credibleSetIndex"), + f.col("cs_number").cast("integer").alias("credibleSetIndex"), f.col("cs_log10bf").cast("double").alias("credibleSetlog10BF"), + f.col("cs_avg_r2").cast("double").alias("purityMeanR2"), + f.col("cs_min_r2").cast("double").alias("purityMinR2"), ) .filter( (f.col("credibleSetlog10BF") > credset_lbf_threshold) @@ -471,6 +477,8 @@ def from_finngen_susie_finemapping( "credibleSetIndex", "finemappingMethod", "credibleSetlog10BF", + "purityMeanR2", + "purityMinR2", ) processed_finngen_finemapping_df = ( diff --git a/tests/gentropy/data_samples/finngen_credset_summary_sample.tsv b/tests/gentropy/data_samples/finngen_credset_summary_sample.tsv index 6ba610807..ca973d8fc 100644 --- a/tests/gentropy/data_samples/finngen_credset_summary_sample.tsv +++ b/tests/gentropy/data_samples/finngen_credset_summary_sample.tsv @@ -1,12 +1,12 @@ -trait region cs cs_log10bf cs_avg_r2 cs_min_r2 low_purity cs_size good_cs cs_id v rsid p beta sd prob cs_specific_prob most_severe gene_most_severe -H7_HORDEOLUM chr1:156514826-159514826 1 1.46467818637 1.0 1.0 False 1 True chr1:156514826-159514826_1 1:158014826:C:T chr1_158014826_C_T 2.54378e-08 1.88207 0.446902952132688 0.971307293706789 0.971297199099758 intron_variant KIRREL1 -G6_MIGRAINE_NO_AURA chr6:11403725-14403725 1 1.36783669104 0.760565917219 0.140997997009 True 19 False chr6:11403725-14403725_1 6:12903725:A:G chr6_12903725_A_G 3.61468e-08 -0.0981548 0.0320646607273439 0.90799209537494 0.907981878613055 intron_variant PHACTR1 -G6_MIGRAINE_NO_AURA chr12:55647065-58647065 1 1.66017507931 0.804011708889 0.804011708889 False 2 True chr12:55647065-58647065_1 12:57147065:C:G chr12_57147065_C_G 2.29261e-08 0.105129 0.0353895280800414 0.898218191602507 0.898218191602507 intron_variant LRP1 -G6_MIGRAINE_NO_AURA chr12:2906208-5906208 1 1.87518878137 0.762329739726 0.382297363204 False 11 True chr12:2906208-5906208_1 12:4406208:T:C chr12_4406208_T_C 3.52631e-08 -0.097881 0.0413367587403912 0.244577820826502 0.244317125809489 downstream_gene_variant AC008012.1 -G6_MIGRAINE_NO_AURA chr6:95113283-98113283 1 3.00789560938 0.917456939478 0.788796212164 False 43 True chr6:95113283-98113283_1 6:96613283:C:T chr6_96613283_C_T 5.56352e-09 0.110915 0.0357600937089016 0.122046568781673 0.122046568781673 intron_variant FHL5 -K11_APHTA_RECUR chr2:203986459-206986459 1 1.53062868484 1.0 1.0 False 1 True chr2:203986459-206986459_1 2:205486459:A:G chr2_205486459_A_G 2.5015e-08 0.225777 0.0591109424642432 0.956659971956856 0.956659971956856 intron_variant PARD3B -DM_NEPHROPATHY_EXMORE chr2:224821033-227821033 1 5.02824362893 0.984196974601 0.661904280625 False 50 True chr2:224821033-227821033_1 2:226321033:T:C chr2_226321033_T_C 2.42008e-10 -0.144142 0.0211352709908576 0.0225587152970605 0.0225587152970605 intergenic_variant -DM_NEPHROPATHY_EXMORE chr11:660994-3660994 1 2.7116635835 0.870906412027 0.809751619044 False 3 True chr11:660994-3660994_1 11:2160994:A:T chr11_2160994_A_T 3.15043e-09 0.168647 0.08489428697954 0.47925285536432 0.479123015291369 splice_region_variant INS-IGF2 -DM_NEPHROPATHY_EXMORE chr12:2775678-5775678 1 2.25532985077 1.0 1.0 False 1 True chr12:2775678-5775678_1 12:4275678:T:G chr12_4275678_T_G 3.72503e-09 -0.433585 0.0763706359152554 0.996634114263842 0.996632992137397 intron_variant CCND2 -AB1_EBV chr6:1412516-4412516 1 4.4609149402 0.678175582701 0.468032488641 False 4 True chr6:1412516-4412516_1 6:2912516:CCA:C chr6_2912516_CCA_C 4.90908e-11 0.196413 0.0943211787286286 0.367291808726716 0.367106415849495 upstream_gene_variant AL133351.2 -AB1_EBV chr20:16016584-19016584 1 1.27192551601 1.0 1.0 False 1 True chr20:16016584-19016584_1 20:17516584:C:T chr20_17516584_C_T 3.72152e-08 2.00466 0.504004034147034 0.963905759488349 0.963892713745976 intron_variant BFSP1 +trait region cs cs_log10bf cs_avg_r2 cs_min_r2 low_purity cs_size good_cs cs_id v rsid p beta sd prob cs_specific_prob most_severe gene_most_severe cs_number +H7_HORDEOLUM chr1:156514826-159514826 1 1.46467818637 1.0 1.0 False 1 True chr1:156514826-159514826_1 1:158014826:C:T chr1_158014826_C_T 2.54378e-08 1.88207 0.446902952132688 0.971307293706789 0.971297199099758 intron_variant KIRREL1 1 +G6_MIGRAINE_NO_AURA chr6:11403725-14403725 1 1.36783669104 0.760565917219 0.140997997009 True 19 False chr6:11403725-14403725_1 6:12903725:A:G chr6_12903725_A_G 3.61468e-08 -0.0981548 0.0320646607273439 0.90799209537494 0.907981878613055 intron_variant PHACTR1 1 +G6_MIGRAINE_NO_AURA chr12:55647065-58647065 1 1.66017507931 0.804011708889 0.804011708889 False 2 True chr12:55647065-58647065_1 12:57147065:C:G chr12_57147065_C_G 2.29261e-08 0.105129 0.0353895280800414 0.898218191602507 0.898218191602507 intron_variant LRP1 1 +G6_MIGRAINE_NO_AURA chr12:2906208-5906208 1 1.87518878137 0.762329739726 0.382297363204 False 11 True chr12:2906208-5906208_1 12:4406208:T:C chr12_4406208_T_C 3.52631e-08 -0.097881 0.0413367587403912 0.244577820826502 0.244317125809489 downstream_gene_variant AC008012.1 1 +G6_MIGRAINE_NO_AURA chr6:95113283-98113283 1 3.00789560938 0.917456939478 0.788796212164 False 43 True chr6:95113283-98113283_1 6:96613283:C:T chr6_96613283_C_T 5.56352e-09 0.110915 0.0357600937089016 0.122046568781673 0.122046568781673 intron_variant FHL5 1 +K11_APHTA_RECUR chr2:203986459-206986459 1 1.53062868484 1.0 1.0 False 1 True chr2:203986459-206986459_1 2:205486459:A:G chr2_205486459_A_G 2.5015e-08 0.225777 0.0591109424642432 0.956659971956856 0.956659971956856 intron_variant PARD3B 1 +DM_NEPHROPATHY_EXMORE chr2:224821033-227821033 1 5.02824362893 0.984196974601 0.661904280625 False 50 True chr2:224821033-227821033_1 2:226321033:T:C chr2_226321033_T_C 2.42008e-10 -0.144142 0.0211352709908576 0.0225587152970605 0.0225587152970605 intergenic_variant 1 +DM_NEPHROPATHY_EXMORE chr11:660994-3660994 1 2.7116635835 0.870906412027 0.809751619044 False 3 True chr11:660994-3660994_1 11:2160994:A:T chr11_2160994_A_T 3.15043e-09 0.168647 0.08489428697954 0.47925285536432 0.479123015291369 splice_region_variant INS-IGF2 1 +DM_NEPHROPATHY_EXMORE chr12:2775678-5775678 1 2.25532985077 1.0 1.0 False 1 True chr12:2775678-5775678_1 12:4275678:T:G chr12_4275678_T_G 3.72503e-09 -0.433585 0.0763706359152554 0.996634114263842 0.996632992137397 intron_variant CCND2 1 +AB1_EBV chr6:1412516-4412516 1 4.4609149402 0.678175582701 0.468032488641 False 4 True chr6:1412516-4412516_1 6:2912516:CCA:C chr6_2912516_CCA_C 4.90908e-11 0.196413 0.0943211787286286 0.367291808726716 0.367106415849495 upstream_gene_variant AL133351.2 1 +AB1_EBV chr20:16016584-19016584 1 1.27192551601 1.0 1.0 False 1 True chr20:16016584-19016584_1 20:17516584:C:T chr20_17516584_C_T 3.72152e-08 2.00466 0.504004034147034 0.963905759488349 0.963892713745976 intron_variant BFSP1 1 diff --git a/tests/gentropy/data_samples/finngen_credset_summary_sample.tsv.bgz b/tests/gentropy/data_samples/finngen_credset_summary_sample.tsv.bgz index 148f0f22fa564b24d7c9319c85a5fa24e6cd27f1..68e6965506938a056fb620caae22fc610628d285 100644 GIT binary patch delta 1338 zcmV-A1;zTh3dRb5ABzYC000000RIL6LPG)oDFuC2O>Y}F5Ir;hMT?o?eC|zA9NVa4 z8>&*YC&IBeu7E@`Bqc%n>kD!vCsh<5gq?X>k~@!Y9=Y4xEq7@%JTBL(biYma+wy7s zi16WK4e!1_mJQYD^KwC0xh`=^52<-bFET(2L0$~bcQkiMqPcKMK=(uZMp zm$nb-X|w*&-hCSGmmin+r2(JU+g;fXUx&?*9*5OXegy8fW%cFrhhg*T!pg-?Wi?0FYzaGPzqfavwnM_S*nAm&Il5q~xI@eOS-)tn zmS?EU$|6yJC!EcP33$TJoxl=w%o;!;gqhSmK^KM@j3OJTd5j2!VFW~&cv`M@oAs)E zz1uABR=f1u<$Qj2g$ZAsYk7BhKA&D*pOx#IGJQXv)-TZ>ks%w&1bGZv!CB3-Y9Mfe z&J;u?&{1BkJFj6EHvr!1JXM~as8)Be1Fy;lxMnU?dEJ4M!!g9&sHj>5HQ z$OOu&U}Twds~RkVAS4?H?*T_QSIi)COvCNPbhenkKm$@QqHLft5g&ubgDRj2SY%Al zIWQm^xHt0umyY$*cR8GxvZ{f4F>F~s8;L+F2dK=-d7_Sj6dM!JAqY}k4YFhpAqNLw ztYv$DIH6;p0!h=%F>*x2)%^Bl7AQaod?Mqsao{HCZjQOGU`m<7au{iXE<4psGXWo3 z0)HgoXvUEO=9ct}ezs>|kCa83rQ8W=#_v)v_ewu7=By@e3^|Yzbi^!XPT-^hdnBVF zi9@hQdRTv3ZFigD?sI8YaG+v318_jq;~hjT~$> zHFpaoB-8^VoSzvvF=gwHP;JSWzKs5vpK&}4~0jp@; zFtvOFInu=2iz`X^>NmtPy}ejW%lvHieqO(UdV(CTEHNR_F=~>K3`(xf&cF1-lcNiN zriu%<>|g~{o}uOi)Cr*M8-Pg6s`5ah7S6=V#JO&Eh^j<2Kv5SxfgC%*bp9s4PWbB0 zyK;SYdojPcoh~l^DrbMbyP2QuKbzPIA?FE^jZwlR9gq`eC6CH#IAdrSD^zW&VgQqa zlI*Dv_>qgBCmp#U%`H3h0+nZ|H3M~jOaMMi5Nb0ZGjc5*2r&y8M1YHFgGj^;g~C8V zPC?7nZrD5ytL6POCoi#q*sDSWPxAOon5Z*E)hwRt5^fNnj0Dsi2*Ly#8wdG+SN~7b zRXf0*uqFL;e>`H@BV}3^DR(Hq`JjERW?D;g_F!@i3#o3%79`{%Sx0rS1yY88T0CZr zw0-)tydTPV9eFPQ^7{A5<@sBBkp}9?Ow@cLNG<6X&xgPs zDT{U*8VQUwPoU$R#oKsBDgXcg delta 1326 zcmV+}1=0G(3c3n^ABzYC000000RIL6LPG)o9R+<>O>f&s3_VZ&iv^OyPu*NMO@d96 zz;ze9r$L&iSp%&d*lvs6U%!}290uw&m0M z0pY{P8s2_=C>yHL=jEzKpVr^XrN&`Nxx4W_(z7CrqJq)X%`~aMsTRFd;zdpUbeE&Ax{jw4|T*XYIanK+3e7rQ9ihvW^T)po4G}t}R0*P*w#a%bZ)! zU=ai%**JI)IJ&uF29f6^+?-Emi}{NrAoU{31}YQrF=#BP0-AtD#sr-M1EPU@BmSp! z?4G_$;Yi7<1nR}GW&LcV0jV6IGAn0^Ito&3JV1vaNcCxuC3^@tH~?d9+QSJQ0~JV` zUY;X=&uF-u-~5gP3Qz){$arlGxCy$OW3Kg0DN|SuBTdj{rZT?Xb}$p^-q)x?b<2U3ELn8nNqoK#?sWHcmk2=++#>u;;= zZZq6|E^YJ=L`-J@4oKZ^<{|_!ljlkJ7$i)8xdSi{QHb285sgJ72U|_e-9iZo^}q+` zCq9mpY;zH+Ejd_&mIKn9K|0YaG)$n49zvjudN2j3M&JRc^*g@cY3m#ovFjZW*Wd{qO@&q-1 z51>u}WiNdqF{`QriP|<3D--9s(IKi5)%HYP^a%3Y0jBfU`PIqmx8>^e=6rsAGhLkj zT~7acdp$qhzcjHELe3K+8>56tIv^*`N*KGt1a%~)c2r&y8M1YHFgGj^;g~C8VjzP=SZrD5wtL5E+ zk(a4J>{TIxCwaUkOw<{oY8KD6z8eH6BLOwrfiS^F#XH^pkwxpl#&qFMG zq)f{qnIGiK*~@%$E=aIkDr!zLm9t+I-b;U zarJI;arTCOLjv_=CTc#B@!D7*kxbzN_M5MOUzYBDk6O|%o_v3gltnxKj0DDxC(v=u z;%zHU>sg;AP)J(S5vrLHnKXM-MMEfpFjsSaF8NTBAUtu|Y_2>SGW+XwK@OVv_ zxMPJt4sKRjKXy#S$(qnv9Wt?hyD}cE?FVOjR<8LPMT`5{Y&vRO%N{DT*|f|OcLC8f zUY~%WjCI5yI1{3|kyo>+2aOs9I=GP Date: Thu, 7 Nov 2024 14:21:27 +0000 Subject: [PATCH 160/188] chore: add `hf_model_commit_message` to `LocusToGeneStep` (#905) --- src/gentropy/config.py | 3 +++ src/gentropy/l2g.py | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 82ead9ed0..b931bb686 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -274,6 +274,7 @@ class LocusToGeneConfig(StepConfig): ) wandb_run_name: str | None = None hf_hub_repo_id: str | None = "opentargets/locus_to_gene" + hf_model_commit_message: str | None = "chore: update model" download_from_hub: bool = True _target_: str = "gentropy.l2g.LocusToGeneStep" @@ -633,6 +634,7 @@ class LocusToGeneEvidenceStepConfig(StepConfig): locus_to_gene_threshold: float = 0.05 _target_: str = "gentropy.l2g.LocusToGeneEvidenceStep" + @dataclass class LocusToGeneAssociationsStepConfig(StepConfig): """Configuration of the locus to gene association step.""" @@ -643,6 +645,7 @@ class LocusToGeneAssociationsStepConfig(StepConfig): indirect_associations_output_path: str = MISSING _target_: str = "gentropy.l2g.LocusToGeneAssociationsStep" + @dataclass class StudyLocusValidationStepConfig(StepConfig): """Configuration of the study index validation step. diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index b585166c1..30d2a9837 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -110,6 +110,7 @@ def __init__( gene_interactions_path: str | None = None, predictions_path: str | None = None, hf_hub_repo_id: str | None, + hf_model_commit_message: str | None = "chore: update model", ) -> None: """Initialise the step and run the logic based on mode. @@ -128,6 +129,7 @@ def __init__( gene_interactions_path (str | None): Path to the gene interactions dataset predictions_path (str | None): Path to the L2G predictions output dataset hf_hub_repo_id (str | None): Hugging Face Hub repository ID. If provided, the model will be uploaded to Hugging Face. + hf_model_commit_message (str | None): Commit message when we upload the model to the Hugging Face Hub Raises: ValueError: If run_mode is not 'train' or 'predict' @@ -146,6 +148,7 @@ def __init__( self.wandb_run_name = wandb_run_name self.hf_hub_repo_id = hf_hub_repo_id self.download_from_hub = download_from_hub + self.hf_model_commit_message = hf_model_commit_message # Load common inputs self.credible_set = StudyLocus.from_parquet( @@ -219,7 +222,7 @@ def run_train(self) -> None: ).train(self.wandb_run_name) if trained_model.training_data and trained_model.model and self.model_path: trained_model.save(self.model_path) - if self.hf_hub_repo_id: + if self.hf_hub_repo_id and self.hf_model_commit_message: hf_hub_token = access_gcp_secret( "hfhub-key", "open-targets-genetics-dev" ) @@ -231,7 +234,7 @@ def run_train(self) -> None: "goldStandardSet", "geneId" ).toPandas(), repo_id=self.hf_hub_repo_id, - commit_message="chore: update model", + commit_message=self.hf_model_commit_message, ) def _annotate_gold_standards_w_feature_matrix(self) -> L2GFeatureMatrix: From b5b71f0a288163845e26d9ba9c085120c3a9b6ca Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Fri, 8 Nov 2024 12:41:54 +0000 Subject: [PATCH 161/188] refactor: finemapping method enum (#897) Co-authored-by: Yakov --- docs/python_api/datasets/study_locus.md | 4 ++ src/gentropy/colocalisation.py | 6 ++- src/gentropy/dataset/study_locus.py | 53 ++++++++++++++++--- .../datasource/eqtl_catalogue/finemapping.py | 4 +- .../datasource/finngen/finemapping.py | 4 +- src/gentropy/method/pics.py | 12 +++-- src/gentropy/susie_finemapper.py | 8 ++- 7 files changed, 72 insertions(+), 19 deletions(-) diff --git a/docs/python_api/datasets/study_locus.md b/docs/python_api/datasets/study_locus.md index 6896db167..700e39944 100644 --- a/docs/python_api/datasets/study_locus.md +++ b/docs/python_api/datasets/study_locus.md @@ -6,6 +6,10 @@ title: Study Locus --- +::: gentropy.dataset.study_locus.FinemappingMethod + +--- + ::: gentropy.dataset.study_locus.StudyLocusQualityCheck --- diff --git a/src/gentropy/colocalisation.py b/src/gentropy/colocalisation.py index a45a9a6a1..9682a8ed9 100644 --- a/src/gentropy/colocalisation.py +++ b/src/gentropy/colocalisation.py @@ -8,7 +8,7 @@ from pyspark.sql.functions import col from gentropy.common.session import Session -from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.study_locus import FinemappingMethod, StudyLocus from gentropy.method.colocalisation import Coloc, ColocalisationMethodInterface @@ -56,7 +56,9 @@ def __init__( ) if colocalisation_method == Coloc.METHOD_NAME.lower(): credible_set = credible_set.filter( - col("finemappingMethod").isin("SuSie", "SuSiE-inf") + col("finemappingMethod").isin( + FinemappingMethod.SUSIE.value, FinemappingMethod.SUSIE_INF.value + ) ) # Transform diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 908c093b6..1a2aa3697 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -139,6 +139,20 @@ class CredibleInterval(Enum): IS99 = "is99CredibleSet" +class FinemappingMethod(Enum): + """Finemapping method enum. + + Attributes: + PICS (str): PICS + SUSIE (str): SuSiE method + SUSIE_INF (str): SuSiE-inf method implemented in `gentropy` + """ + + PICS = "pics" + SUSIE = "SuSie" + SUSIE_INF = "SuSiE-inf" + + @dataclass class StudyLocus(Dataset): """Study-Locus dataset. @@ -1056,7 +1070,7 @@ def qc_redundant_top_hits_from_PICS(self: StudyLocus) -> StudyLocus: StudyLocus: Updated study locus with redundant top hits flagged. """ studies_with_pics_sumstats = ( - self.df.filter(f.col("finemappingMethod") == "pics") + self.df.filter(f.col("finemappingMethod") == FinemappingMethod.PICS.value) # Returns True if the study contains any PICS associations from summary statistics .withColumn( "hasPicsSumstats", @@ -1095,7 +1109,11 @@ def qc_explained_by_SuSiE(self: StudyLocus) -> StudyLocus: """ # unique study-regions covered by SuSie credible sets susie_study_regions = ( - self.filter(f.col("finemappingMethod") == "SuSiE-inf") + self.filter( + f.col("finemappingMethod").isin( + FinemappingMethod.SUSIE.value, FinemappingMethod.SUSIE_INF.value + ) + ) .df.select( "studyId", "chromosome", @@ -1108,7 +1126,11 @@ def qc_explained_by_SuSiE(self: StudyLocus) -> StudyLocus: # non SuSiE credible sets (studyLocusId) overlapping in any variant with SuSiE locus redundant_study_locus = ( - self.filter(f.col("finemappingMethod") != "SuSiE-inf") + self.filter( + ~f.col("finemappingMethod").isin( + FinemappingMethod.SUSIE.value, FinemappingMethod.SUSIE_INF.value + ) + ) .df.withColumn("l", f.explode("locus")) .select( "studyLocusId", @@ -1141,7 +1163,12 @@ def qc_explained_by_SuSiE(self: StudyLocus) -> StudyLocus: # credible set in SuSiE overlapping region f.col("inSuSiE") # credible set not based on SuSiE - & (f.col("finemappingMethod") != "SuSiE-inf"), + & ( + ~f.col("finemappingMethod").isin( + FinemappingMethod.SUSIE.value, + FinemappingMethod.SUSIE_INF.value, + ) + ), StudyLocusQualityCheck.EXPLAINED_BY_SUSIE, ), ) @@ -1268,7 +1295,12 @@ def assign_confidence(self: StudyLocus) -> StudyLocus: df = self.df.withColumn( "confidence", f.when( - (f.col("finemappingMethod").isin(["SuSiE-inf", "SuSie"])) + ( + f.col("finemappingMethod").isin( + FinemappingMethod.SUSIE.value, + FinemappingMethod.SUSIE_INF.value, + ) + ) & ( ~f.array_contains( f.col("qualityControls"), @@ -1278,7 +1310,12 @@ def assign_confidence(self: StudyLocus) -> StudyLocus: CredibleSetConfidenceClasses.FINEMAPPED_IN_SAMPLE_LD.value, ) .when( - (f.col("finemappingMethod").isin(["SuSiE-inf", "SuSie"])) + ( + f.col("finemappingMethod").isin( + FinemappingMethod.SUSIE.value, + FinemappingMethod.SUSIE_INF.value, + ) + ) & ( f.array_contains( f.col("qualityControls"), @@ -1288,7 +1325,7 @@ def assign_confidence(self: StudyLocus) -> StudyLocus: CredibleSetConfidenceClasses.FINEMAPPED_OUT_OF_SAMPLE_LD.value, ) .when( - (f.col("finemappingMethod") == "pics") + (f.col("finemappingMethod") == FinemappingMethod.PICS.value) & ( ~f.array_contains( f.col("qualityControls"), StudyLocusQualityCheck.TOP_HIT.value @@ -1297,7 +1334,7 @@ def assign_confidence(self: StudyLocus) -> StudyLocus: CredibleSetConfidenceClasses.PICSED_SUMMARY_STATS.value, ) .when( - (f.col("finemappingMethod") == "pics") + (f.col("finemappingMethod") == FinemappingMethod.PICS.value) & ( f.array_contains( f.col("qualityControls"), StudyLocusQualityCheck.TOP_HIT.value diff --git a/src/gentropy/datasource/eqtl_catalogue/finemapping.py b/src/gentropy/datasource/eqtl_catalogue/finemapping.py index ea4264fdd..0db240350 100644 --- a/src/gentropy/datasource/eqtl_catalogue/finemapping.py +++ b/src/gentropy/datasource/eqtl_catalogue/finemapping.py @@ -17,7 +17,7 @@ from gentropy.common.session import Session from gentropy.common.utils import parse_pvalue -from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.study_locus import FinemappingMethod, StudyLocus from gentropy.datasource.eqtl_catalogue.study_index import EqtlCatalogueStudyIndex if TYPE_CHECKING: @@ -166,7 +166,7 @@ def parse_susie_results( f.col("se").alias("standardError"), f.col("credibleSetIndex"), f.col("logBF"), - f.lit("SuSie").alias("finemappingMethod"), + f.lit(FinemappingMethod.SUSIE.value).alias("finemappingMethod"), # Study metadata f.col("molecular_trait_id").alias("traitFromSource"), f.col("gene_id").alias("geneId"), diff --git a/src/gentropy/datasource/finngen/finemapping.py b/src/gentropy/datasource/finngen/finemapping.py index 723d918bf..e0f39689d 100644 --- a/src/gentropy/datasource/finngen/finemapping.py +++ b/src/gentropy/datasource/finngen/finemapping.py @@ -13,7 +13,7 @@ from gentropy.common.spark_helpers import get_top_ranked_in_window from gentropy.common.utils import parse_pvalue -from gentropy.dataset.study_locus import StudyLocus +from gentropy.dataset.study_locus import FinemappingMethod, StudyLocus @dataclass @@ -319,7 +319,7 @@ def from_finngen_susie_finemapping( # Add standard error, and allele frequency information. f.col("se").cast("double").alias("standardError"), f.col("maf").cast("float").alias("effectAlleleFrequencyFromSource"), - f.lit("SuSie").cast("string").alias("finemappingMethod"), + f.lit(FinemappingMethod.SUSIE.value).alias("finemappingMethod"), *[ f.col(f"alpha{i}").cast(t.DoubleType()).alias(f"alpha_{i}") for i in range(1, 11) diff --git a/src/gentropy/method/pics.py b/src/gentropy/method/pics.py index 918850527..96d0902c3 100644 --- a/src/gentropy/method/pics.py +++ b/src/gentropy/method/pics.py @@ -8,7 +8,11 @@ import pyspark.sql.types as t from scipy.stats import norm -from gentropy.dataset.study_locus import StudyLocus, StudyLocusQualityCheck +from gentropy.dataset.study_locus import ( + FinemappingMethod, + StudyLocus, + StudyLocusQualityCheck, +) if TYPE_CHECKING: from pyspark.sql import Row @@ -213,9 +217,11 @@ def finemap( """ # Finemapping method is an optional column: finemapping_method_expression = ( - f.lit("pics") + f.lit(FinemappingMethod.PICS.value) if "finemappingMethod" not in associations.df.columns - else f.coalesce(f.col("finemappingMethod"), f.lit("pics")) + else f.coalesce( + f.col("finemappingMethod"), f.lit(FinemappingMethod.PICS.value) + ) ) # Flagging expression for loci that do not qualify for PICS: diff --git a/src/gentropy/susie_finemapper.py b/src/gentropy/susie_finemapper.py index 03a8730ef..94ad918a5 100644 --- a/src/gentropy/susie_finemapper.py +++ b/src/gentropy/susie_finemapper.py @@ -26,7 +26,11 @@ order_array_of_structs_by_field, ) from gentropy.dataset.study_index import StudyIndex -from gentropy.dataset.study_locus import StudyLocus, StudyLocusQualityCheck +from gentropy.dataset.study_locus import ( + FinemappingMethod, + StudyLocus, + StudyLocusQualityCheck, +) from gentropy.method.carma import CARMA from gentropy.method.ld import LDAnnotator from gentropy.method.ld_matrix_interface import LDMatrixInterface @@ -290,7 +294,7 @@ def susie_inf_to_studylocus( # noqa: C901 "region": f.lit(region), "credibleSetIndex": f.lit(counter), "credibleSetlog10BF": f.lit(cs_lbf_value * 0.4342944819), - "finemappingMethod": f.lit("SuSiE-inf"), + "finemappingMethod": f.lit(FinemappingMethod.SUSIE_INF.value), } ) .withColumn( From 0e7e815b5f7c8b1b63a711f819ce8c345858600c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Mon, 11 Nov 2024 09:41:58 +0000 Subject: [PATCH 162/188] chore(l2g): parametrise score threshold when writing predictions (#907) --- src/gentropy/config.py | 1 + src/gentropy/l2g.py | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index b931bb686..6befe472e 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -221,6 +221,7 @@ class LocusToGeneConfig(StepConfig): credible_set_path: str = MISSING feature_matrix_path: str = MISSING predictions_path: str | None = None + l2g_threshold: float | None = 0.05 variant_index_path: str | None = None model_path: str | None = None gold_standard_curation_path: str | None = None diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 30d2a9837..07cd14336 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -109,6 +109,7 @@ def __init__( variant_index_path: str | None = None, gene_interactions_path: str | None = None, predictions_path: str | None = None, + l2g_threshold: float | None, hf_hub_repo_id: str | None, hf_model_commit_message: str | None = "chore: update model", ) -> None: @@ -128,6 +129,7 @@ def __init__( variant_index_path (str | None): Path to the variant index gene_interactions_path (str | None): Path to the gene interactions dataset predictions_path (str | None): Path to the L2G predictions output dataset + l2g_threshold (float | None): An optional threshold for the L2G score to filter predictions. A threshold of 0.05 is recommended. hf_hub_repo_id (str | None): Hugging Face Hub repository ID. If provided, the model will be uploaded to Hugging Face. hf_model_commit_message (str | None): Commit message when we upload the model to the Hugging Face Hub @@ -149,6 +151,7 @@ def __init__( self.hf_hub_repo_id = hf_hub_repo_id self.download_from_hub = download_from_hub self.hf_model_commit_message = hf_model_commit_message + self.l2g_threshold = l2g_threshold or 0.0 # Load common inputs self.credible_set = StudyLocus.from_parquet( @@ -195,7 +198,9 @@ def run_predict(self) -> None: hf_token=access_gcp_secret("hfhub-key", "open-targets-genetics-dev"), download_from_hub=self.download_from_hub, ) - predictions.add_locus_to_gene_features(self.feature_matrix).df.write.mode( + predictions.filter( + f.col("score") >= self.l2g_threshold + ).add_locus_to_gene_features(self.feature_matrix).df.write.mode( self.session.write_mode ).parquet(self.predictions_path) self.session.logger.info("L2G predictions saved successfully.") From bb609cb20671c868500a10ca3f71856bbc679a8a Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Mon, 11 Nov 2024 16:21:13 +0000 Subject: [PATCH 163/188] chore: validate chromosome (#906) --- src/gentropy/study_locus_validation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index d5dd900de..4c0c8c4c5 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -41,6 +41,7 @@ def __init__( StudyLocus.from_parquet(session, list(study_locus_path)) # Add flag for MHC region .qc_MHC_region() + .validate_chromosome_label() # Flagging credible sets with unsupported chromosomes .validate_study(study_index) # Flagging studies not in study index .annotate_study_type(study_index) # Add study type to study locus .qc_redundant_top_hits_from_PICS() # Flagging top hits from studies with PICS summary statistics From 10b4be0308698fedeb38da597f272526db81622f Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Mon, 11 Nov 2024 20:01:32 +0000 Subject: [PATCH 164/188] feat: extract pos and chromosome from variantid (#909) Co-authored-by: Szymon Szyszkowski --- src/gentropy/common/utils.py | 56 ++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/gentropy/common/utils.py b/src/gentropy/common/utils.py index ca6e8e7f2..91b002fe2 100644 --- a/src/gentropy/common/utils.py +++ b/src/gentropy/common/utils.py @@ -315,3 +315,59 @@ def copy_to_gcs(source_path: str, destination_blob: str) -> None: bucket = client.bucket(bucket_name=urlparse(destination_blob).hostname) blob = bucket.blob(blob_name=urlparse(destination_blob).path.lstrip("/")) blob.upload_from_filename(source_path) + + +def extract_chromosome(variant_id: Column) -> Column: + """Extract chromosome from variant ID. + + This function extracts the chromosome from a variant ID. The variantId is expected to be in the format `chromosome_position_ref_alt`. + The function does not convert the GENCODE to Ensembl chromosome notation. + See https://genome.ucsc.edu/FAQ/FAQgenes.html#:~:text=maps%20only%20once.-,The%20differences,-Some%20of%20our + + Args: + variant_id (Column): Variant ID + + Returns: + Column: Chromosome + + Examples: + >>> d = [("chr1_12345_A_T",),("15_KI270850v1_alt_48777_C_T",),] + >>> df = spark.createDataFrame(d).toDF("variantId") + >>> df.withColumn("chromosome", extract_chromosome(f.col("variantId"))).show(truncate=False) + +---------------------------+-----------------+ + |variantId |chromosome | + +---------------------------+-----------------+ + |chr1_12345_A_T |chr1 | + |15_KI270850v1_alt_48777_C_T|15_KI270850v1_alt| + +---------------------------+-----------------+ + + + """ + return f.regexp_extract(variant_id, r"^(.*)_\d+_.*$", 1) + + +def extract_position(variant_id: Column) -> Column: + """Extract position from variant ID. + + This function extracts the position from a variant ID. The variantId is expected to be in the format `chromosome_position_ref_alt`. + + Args: + variant_id (Column): Variant ID + + Returns: + Column: Position + + Examples: + >>> d = [("chr1_12345_A_T",),("15_KI270850v1_alt_48777_C_T",),] + >>> df = spark.createDataFrame(d).toDF("variantId") + >>> df.withColumn("position", extract_position(f.col("variantId"))).show(truncate=False) + +---------------------------+--------+ + |variantId |position| + +---------------------------+--------+ + |chr1_12345_A_T |12345 | + |15_KI270850v1_alt_48777_C_T|48777 | + +---------------------------+--------+ + + + """ + return f.regexp_extract(variant_id, r"^.*_(\d+)_.*$", 1) From e5b3c9ed1bba769ea61c89a6175eb2d45ceee4f1 Mon Sep 17 00:00:00 2001 From: Vivien Ho <56025826+vivienho@users.noreply.github.com> Date: Tue, 12 Nov 2024 15:12:37 +0000 Subject: [PATCH 165/188] feat: changes to PICS credible sets (OUT_OF_SAMPLE_LD QC flag and capital PICS) (#910) * feat: add OUT_OF_SAMPLE_LD QC flag to PICS credible sets * feat: change pics finemapping method to PICS * test: change pics to PICS in test data * fix: flag studies without sumstats without relying on hasSumstats column * fix: flag studies without sumstats without using update_quality_flag function --- src/gentropy/dataset/study_locus.py | 2 +- .../datasource/gwas_catalog/study_index.py | 6 +-- src/gentropy/method/pics.py | 9 ++++ tests/gentropy/dataset/test_study_locus.py | 42 +++++++++---------- 4 files changed, 32 insertions(+), 27 deletions(-) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 1a2aa3697..e6fd06c12 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -148,7 +148,7 @@ class FinemappingMethod(Enum): SUSIE_INF (str): SuSiE-inf method implemented in `gentropy` """ - PICS = "pics" + PICS = "PICS" SUSIE = "SuSie" SUSIE_INF = "SuSiE-inf" diff --git a/src/gentropy/datasource/gwas_catalog/study_index.py b/src/gentropy/datasource/gwas_catalog/study_index.py index c01d6d263..8630d31f6 100644 --- a/src/gentropy/datasource/gwas_catalog/study_index.py +++ b/src/gentropy/datasource/gwas_catalog/study_index.py @@ -655,11 +655,7 @@ def add_no_sumstats_flag(self: StudyIndexGWASCatalog) -> StudyIndexGWASCatalog: """ self.df = self.df.withColumn( "qualityControls", - StudyIndex.update_quality_flag( - f.col("qualityControls"), - ~f.col("hasSumstats"), - StudyQualityCheck.SUMSTATS_NOT_AVAILABLE, - ), + f.array(f.lit(StudyQualityCheck.SUMSTATS_NOT_AVAILABLE.value)) ) return self diff --git a/src/gentropy/method/pics.py b/src/gentropy/method/pics.py index 96d0902c3..60e28b9a1 100644 --- a/src/gentropy/method/pics.py +++ b/src/gentropy/method/pics.py @@ -280,6 +280,15 @@ def finemap( StudyLocusQualityCheck.NOT_QUALIFYING_LD_BLOCK, ), ) + # Flagging all PICS loci with OUT_OF_SAMPLE_LD flag: + .withColumn( + "qualityControls", + StudyLocus.update_quality_flag( + f.col("qualityControls"), + f.lit(True), + StudyLocusQualityCheck.OUT_OF_SAMPLE_LD, + ), + ) .withColumn( "finemappingMethod", finemapping_method_expression, diff --git a/tests/gentropy/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py index 1d34479e1..19b833124 100644 --- a/tests/gentropy/dataset/test_study_locus.py +++ b/tests/gentropy/dataset/test_study_locus.py @@ -629,7 +629,7 @@ class TestStudyLocusValidation: STUDY_LOCUS_DATA = [ # Won't be flagged: - ("1", "v1", "s1", 1.0, -8, [], "pics"), + ("1", "v1", "s1", 1.0, -8, [], "PICS"), # Already flagged, needs to be tested if the flag reamins unique: ( "2", @@ -638,7 +638,7 @@ class TestStudyLocusValidation: 5.0, -4, [StudyLocusQualityCheck.SUBSIGNIFICANT_FLAG.value], - "pics", + "PICS", ), # To be flagged: ("3", "v3", "s3", 1.0, -4, [], "SuSiE-inf"), @@ -869,18 +869,18 @@ class TestStudyLocusRedundancyFlagging: """Collection of tests related to flagging redundant credible sets.""" STUDY_LOCUS_DATA = [ - ("1", "v1", "s1", "pics", []), - ("2", "v2", "s1", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), - ("3", "v3", "s1", "pics", []), - ("3", "v3", "s1", "pics", []), - ("1", "v1", "s1", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), - ("1", "v1", "s2", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), - ("1", "v1", "s2", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s1", "PICS", []), + ("2", "v2", "s1", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]), + ("3", "v3", "s1", "PICS", []), + ("3", "v3", "s1", "PICS", []), + ("1", "v1", "s1", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s2", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s2", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]), ("1", "v1", "s3", "SuSie", []), - ("1", "v1", "s3", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), - ("1", "v1", "s4", "pics", []), + ("1", "v1", "s3", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s4", "PICS", []), ("1", "v1", "s4", "SuSie", []), - ("1", "v1", "s4", "pics", [StudyLocusQualityCheck.TOP_HIT.value]), + ("1", "v1", "s4", "PICS", [StudyLocusQualityCheck.TOP_HIT.value]), ] STUDY_LOCUS_SCHEMA = t.StructType( @@ -946,7 +946,7 @@ class TestStudyLocusSuSiERedundancyFlagging: "v1", "s1", "X", - "pics", + "PICS", 1, 3, [ @@ -962,7 +962,7 @@ class TestStudyLocusSuSiERedundancyFlagging: "v2", "s1", "X", - "pics", + "PICS", 4, 5, [ @@ -977,7 +977,7 @@ class TestStudyLocusSuSiERedundancyFlagging: "v3", "s1", "X", - "pics", + "PICS", 6, 7, [ @@ -1004,7 +1004,7 @@ class TestStudyLocusSuSiERedundancyFlagging: "v5", "s1", "X", - "pics", + "PICS", 5, 5, [ @@ -1018,7 +1018,7 @@ class TestStudyLocusSuSiERedundancyFlagging: "v6", "s2", "X", - "pics", + "PICS", 3, 5, [ @@ -1141,11 +1141,11 @@ class TestStudyLocusDuplicationFlagging: STUDY_LOCUS_DATA = [ # Non-duplicated: - ("1", "v1", "s1", "pics"), + ("1", "v1", "s1", "PICS"), # Triplicate: - ("3", "v3", "s1", "pics"), - ("3", "v3", "s1", "pics"), - ("3", "v3", "s1", "pics"), + ("3", "v3", "s1", "PICS"), + ("3", "v3", "s1", "PICS"), + ("3", "v3", "s1", "PICS"), ] STUDY_LOCUS_SCHEMA = t.StructType( From 253fe31dd1b1d5861c70f864bd4d9ae6813dad98 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Thu, 14 Nov 2024 16:12:28 +0100 Subject: [PATCH 166/188] feat(gold_standard): arbitrary gold standards (#912) * feat(gold_standard): filter by protein coding genes * feat: arbitrary gold standards * feat: read model from gcs * feat: read model from gcs * feat: get untrusted types from blob * revert: changes to gene_index * fix: correct list of missing and unexpected fields * chore: addressing comments * fix: selective check on the schema issues --------- Co-authored-by: Szymon Szyszkowski --- src/gentropy/l2g.py | 227 ++++++++++++++++++----------- src/gentropy/method/l2g/model.py | 23 ++- src/gentropy/method/l2g/trainer.py | 10 +- 3 files changed, 165 insertions(+), 95 deletions(-) diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 07cd14336..dc95bb670 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -2,12 +2,14 @@ from __future__ import annotations +import logging from typing import Any import pyspark.sql.functions as f from sklearn.ensemble import GradientBoostingClassifier from wandb import login as wandb_login +from gentropy.common.schemas import compare_struct_schemas from gentropy.common.session import Session from gentropy.common.spark_helpers import calculate_harmonic_sum from gentropy.common.utils import access_gcp_secret @@ -152,6 +154,9 @@ def __init__( self.download_from_hub = download_from_hub self.hf_model_commit_message = hf_model_commit_message self.l2g_threshold = l2g_threshold or 0.0 + self.gold_standard_curation_path = gold_standard_curation_path + self.gene_interactions_path = gene_interactions_path + self.variant_index_path = variant_index_path # Load common inputs self.credible_set = StudyLocus.from_parquet( @@ -160,27 +165,105 @@ def __init__( self.feature_matrix = L2GFeatureMatrix( _df=session.load_data(feature_matrix_path), features_list=self.features_list ) - self.variant_index = ( - VariantIndex.from_parquet(session, variant_index_path) - if variant_index_path - else None - ) if run_mode == "predict": self.run_predict() elif run_mode == "train": - self.gs_curation = ( - self.session.spark.read.json(gold_standard_curation_path) - if gold_standard_curation_path - else None - ) - self.interactions = ( - self.session.spark.read.parquet(gene_interactions_path) - if gene_interactions_path - else None - ) + self.gold_standard = self.prepare_gold_standard() self.run_train() + def prepare_gold_standard(self) -> L2GGoldStandard: + """Prepare the gold standard for training. + + Returns: + L2GGoldStandard: training dataset. + + Raises: + ValueError: When gold standard path, is not provided, or when + parsing OTG gold standard but missing interactions and variant index paths. + TypeError: When gold standard is not OTG gold standard nor L2GGoldStandard. + + """ + if self.gold_standard_curation_path is None: + raise ValueError("Gold Standard is required for model training.") + # Read the gold standard either from json or parquet, default to parquet if can not infer the format from extension. + ext = self.gold_standard_curation_path.split(".")[-1] + ext = "parquet" if ext not in ["parquet", "json"] else ext + gold_standard = self.session.load_data(self.gold_standard_curation_path, ext) + schema_issues = compare_struct_schemas( + gold_standard.schema, L2GGoldStandard.get_schema() + ) + # Parse the gold standard depending on the input schema + match schema_issues: + case {**extra} if not extra: + # Schema is the same as L2GGoldStandard - load the GS + # NOTE: match to empty dict will be non-selective + # see https://stackoverflow.com/questions/75389166/how-to-match-an-empty-dictionary + logging.info("Successfully parsed gold standard.") + return L2GGoldStandard( + _df=gold_standard, + _schema=L2GGoldStandard.get_schema(), + ) + case { + "missing_mandatory_columns": [ + "studyLocusId", + "variantId", + "studyId", + "geneId", + "goldStandardSet", + ], + "unexpected_columns": [ + "association_info", + "gold_standard_info", + "metadata", + "sentinel_variant", + "trait_info", + ], + }: + # There are schema mismatches, this would mean that we have + logging.info("Detected OTG Gold Standard. Attempting to parse it.") + otg_curation = gold_standard + if self.gene_interactions_path is None: + raise ValueError("Interactions are required for parsing curation.") + if self.variant_index_path is None: + raise ValueError("Variant Index are required for parsing curation.") + + interactions = self.session.load_data( + self.gene_interactions_path, "parquet" + ) + variant_index = VariantIndex.from_parquet( + self.session, self.variant_index_path + ) + study_locus_overlap = StudyLocus( + _df=self.credible_set.df.join( + otg_curation.select( + f.concat_ws( + "_", + f.col("sentinel_variant.locus_GRCh38.chromosome"), + f.col("sentinel_variant.locus_GRCh38.position"), + f.col("sentinel_variant.alleles.reference"), + f.col("sentinel_variant.alleles.alternative"), + ).alias("variantId"), + f.col("association_info.otg_id").alias("studyId"), + ), + on=[ + "studyId", + "variantId", + ], + how="inner", + ), + _schema=StudyLocus.get_schema(), + ).find_overlaps() + + return L2GGoldStandard.from_otg_curation( + gold_standard_curation=otg_curation, + variant_index=variant_index, + study_locus_overlap=study_locus_overlap, + interactions=interactions, + ) + case _: + raise TypeError("Incorrect gold standard dataset provided.") + def run_predict(self) -> None: """Run the prediction step. @@ -207,87 +290,55 @@ def run_predict(self) -> None: def run_train(self) -> None: """Run the training step.""" - if ( - self.gs_curation - and self.interactions - and self.wandb_run_name - and self.model_path - ): - wandb_key = access_gcp_secret("wandb-key", "open-targets-genetics-dev") - - # Instantiate classifier and train model - l2g_model = LocusToGeneModel( - model=GradientBoostingClassifier(random_state=42), - hyperparameters=self.hyperparameters, - ) - wandb_login(key=wandb_key) - trained_model = LocusToGeneTrainer( - model=l2g_model, - feature_matrix=self._annotate_gold_standards_w_feature_matrix(), - ).train(self.wandb_run_name) - if trained_model.training_data and trained_model.model and self.model_path: - trained_model.save(self.model_path) - if self.hf_hub_repo_id and self.hf_model_commit_message: - hf_hub_token = access_gcp_secret( - "hfhub-key", "open-targets-genetics-dev" - ) - trained_model.export_to_hugging_face_hub( - # we upload the model in the filesystem - self.model_path.split("/")[-1], - hf_hub_token, - data=trained_model.training_data._df.drop( - "goldStandardSet", "geneId" - ).toPandas(), - repo_id=self.hf_hub_repo_id, - commit_message=self.hf_model_commit_message, - ) + # Initialize access to weights and biases + wandb_key = access_gcp_secret("wandb-key", "open-targets-genetics-dev") + wandb_login(key=wandb_key) + + # Instantiate classifier and train model + l2g_model = LocusToGeneModel( + model=GradientBoostingClassifier(random_state=42), + hyperparameters=self.hyperparameters, + ) + + # Calculate the gold standard features + feature_matrix = self._annotate_gold_standards_w_feature_matrix() + + # Run the training + trained_model = LocusToGeneTrainer( + model=l2g_model, feature_matrix=feature_matrix + ).train(self.wandb_run_name) + + # Export the model + if trained_model.training_data and trained_model.model and self.model_path: + trained_model.save(self.model_path) + if self.hf_hub_repo_id and self.hf_model_commit_message: + hf_hub_token = access_gcp_secret( + "hfhub-key", "open-targets-genetics-dev" + ) + trained_model.export_to_hugging_face_hub( + # we upload the model in the filesystem + self.model_path.split("/")[-1], + hf_hub_token, + data=trained_model.training_data._df.drop( + "goldStandardSet", "geneId" + ).toPandas(), + repo_id=self.hf_hub_repo_id, + commit_message=self.hf_model_commit_message, + ) def _annotate_gold_standards_w_feature_matrix(self) -> L2GFeatureMatrix: """Generate the feature matrix of annotated gold standards. Returns: L2GFeatureMatrix: Feature matrix with gold standards annotated with features. - - Raises: - ValueError: Not all training dependencies are defined """ - if self.gs_curation and self.interactions and self.variant_index: - study_locus_overlap = StudyLocus( - _df=self.credible_set.df.join( - self.gs_curation.select( - f.concat_ws( - "_", - f.col("sentinel_variant.locus_GRCh38.chromosome"), - f.col("sentinel_variant.locus_GRCh38.position"), - f.col("sentinel_variant.alleles.reference"), - f.col("sentinel_variant.alleles.alternative"), - ).alias("variantId"), - f.col("association_info.otg_id").alias("studyId"), - ), - on=[ - "studyId", - "variantId", - ], - how="inner", - ), - _schema=StudyLocus.get_schema(), - ).find_overlaps() - - gold_standards = L2GGoldStandard.from_otg_curation( - gold_standard_curation=self.gs_curation, - variant_index=self.variant_index, - study_locus_overlap=study_locus_overlap, - interactions=self.interactions, + return ( + self.gold_standard.build_feature_matrix( + self.feature_matrix, self.credible_set ) - - return ( - gold_standards.build_feature_matrix( - self.feature_matrix, self.credible_set - ) - .select_features(self.features_list) - .persist() - ) - raise ValueError("Dependencies for train mode not set.") + .select_features(self.features_list) + .persist() + ) class LocusToGeneEvidenceStep: diff --git a/src/gentropy/method/l2g/model.py b/src/gentropy/method/l2g/model.py index e35e255a2..336efeb7f 100644 --- a/src/gentropy/method/l2g/model.py +++ b/src/gentropy/method/l2g/model.py @@ -42,13 +42,11 @@ def __post_init__(self: LocusToGeneModel) -> None: self.model.set_params(**self.hyperparameters_dict) @classmethod - def load_from_disk( - cls: Type[LocusToGeneModel], path: str | Path - ) -> LocusToGeneModel: + def load_from_disk(cls: Type[LocusToGeneModel], path: str) -> LocusToGeneModel: """Load a fitted model from disk. Args: - path (str | Path): Path to the model + path (str): Path to the model Returns: LocusToGeneModel: L2G model loaded from disk @@ -56,7 +54,20 @@ def load_from_disk( Raises: ValueError: If the model has not been fitted yet """ - loaded_model = sio.load(path, trusted=sio.get_untrusted_types(file=path)) + if path.startswith("gs://"): + path = path.removeprefix("gs://") + bucket_name = path.split("/")[0] + blob_name = "/".join(path.split("/")[1:]) + from google.cloud import storage + + client = storage.Client() + bucket = storage.Bucket(client=client, name=bucket_name) + blob = storage.Blob(name=blob_name, bucket=bucket) + data = blob.download_as_string(client=client) + loaded_model = sio.loads(data, trusted=sio.get_untrusted_types(data=data)) + else: + loaded_model = sio.load(path, trusted=sio.get_untrusted_types(file=path)) + if not loaded_model._is_fitted(): raise ValueError("Model has not been fitted yet.") return cls(model=loaded_model) @@ -80,7 +91,7 @@ def load_from_hub( """ local_path = Path(model_id) hub_utils.download(repo_id=model_id, dst=local_path, token=hf_token) - return cls.load_from_disk(Path(local_path) / model_name) + return cls.load_from_disk(str(Path(local_path) / model_name)) @property def hyperparameters_dict(self) -> dict[str, Any]: diff --git a/src/gentropy/method/l2g/trainer.py b/src/gentropy/method/l2g/trainer.py index fe56b3f42..ab2a3fa7e 100644 --- a/src/gentropy/method/l2g/trainer.py +++ b/src/gentropy/method/l2g/trainer.py @@ -97,6 +97,7 @@ def _get_shap_explanation( Raises: ValueError: Train data not set, cannot get SHAP values. + Exception: (ExplanationError) When the additivity check fails. """ if self.x_train is not None and self.x_test is not None: training_data = pd.concat([self.x_train, self.x_test], ignore_index=True) @@ -105,7 +106,14 @@ def _get_shap_explanation( data=training_data, feature_perturbation="interventional", ) - return explainer(training_data) + try: + return explainer(training_data) + except Exception as e: + if "Additivity check failed in TreeExplainer" in repr(e): + return explainer(training_data, check_additivity=False) + else: + raise + raise ValueError("Train data not set.") def log_plot_image_to_wandb( From c46480b1e0d16e1287aaaebafeb07539bdc444dc Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:41:46 +0100 Subject: [PATCH 167/188] feat: gzip evicence output to match existing format (#915) * feat: gzip evicence output to match existing format * docs: added info about compression to docstring --------- Co-authored-by: Szymon Szyszkowski --- src/gentropy/l2g.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index dc95bb670..0dbcd226e 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -360,7 +360,7 @@ def __init__( locus_to_gene_predictions_path (str): Path to the L2G predictions dataset credible_set_path (str): Path to the credible set dataset study_index_path (str): Path to the study index dataset - evidence_output_path (str): Path to the L2G evidence output dataset + evidence_output_path (str): Path to the L2G evidence output dataset. The output format is ndjson gzipped. locus_to_gene_threshold (float, optional): Threshold to consider a gene as a target. Defaults to 0.05. """ # Reading the predictions @@ -379,6 +379,7 @@ def __init__( credible_sets, study_index, locus_to_gene_threshold ) .write.mode(session.write_mode) + .option("compression", "gzip") .json(evidence_output_path) ) From 40ca21565fe9dd1441f2f3529de3a202c98d1e0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:27:48 +0000 Subject: [PATCH 168/188] feat: redefine neighbourhood features to represent similarity with best metric + other fixes (#913) * feat: mean to max * fix: remove protein coding * fix: adding protein coding * feat(l2g): neighbourhood features are a division between local and regional * feat(l2g): regional max for distance features only consider protein coding genes * fix(coloc_features): regional max for coloc features only consider protein coding genes * fix(vep_features): regional max for vep features only consider protein coding genes * feat(l2g): train and predict based on protein coding genes only * feat: set nbh feature to 1 if features are 0 in the region * feat: set nbh feature to 1 if features are 0 in the region * Revert "feat: set nbh feature to 1 if features are 0 in the region" This reverts commit da145ab6465892fcdb79eded7816964765fbe271. * fix: return nbh features only for protein coding genes + optimisation * test: change expected results based on changes * test: change expected results based on changes * fix: test --------- Co-authored-by: Yakov Tsepilov --- src/gentropy/config.py | 2 +- .../dataset/l2g_features/colocalisation.py | 29 +++--- src/gentropy/dataset/l2g_features/distance.py | 41 +++++--- src/gentropy/dataset/l2g_features/vep.py | 36 ++++--- src/gentropy/dataset/l2g_gold_standard.py | 1 + src/gentropy/dataset/l2g_prediction.py | 1 + src/gentropy/l2g.py | 2 +- tests/gentropy/dataset/test_l2g_feature.py | 99 ++++++++----------- .../open_targets/test_l2g_gold_standard.py | 5 +- 9 files changed, 117 insertions(+), 99 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 6befe472e..4e8cb99e3 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -263,7 +263,7 @@ class LocusToGeneConfig(StepConfig): "geneCount500kb", "proteinGeneCount500kb", "credibleSetConfidence", - "isProteinCoding", + # "isProteinCoding", ] ) hyperparameters: dict[str, Any] = field( diff --git a/src/gentropy/dataset/l2g_features/colocalisation.py b/src/gentropy/dataset/l2g_features/colocalisation.py index fdbf3ed18..68509ca79 100644 --- a/src/gentropy/dataset/l2g_features/colocalisation.py +++ b/src/gentropy/dataset/l2g_features/colocalisation.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Any import pyspark.sql.functions as f +from pyspark.sql import Window from gentropy.common.spark_helpers import convert_from_wide_to_long from gentropy.dataset.colocalisation import Colocalisation @@ -168,23 +169,27 @@ def common_neighbourhood_colocalisation_feature_logic( study_locus, ) ) - # Compute average score in the vicinity (feature will be the same for any gene associated with a studyLocus) - # (non protein coding genes in the vicinity are excluded see #3552) - regional_mean_per_study_locus = ( + return ( extended_local_max.join( - gene_index.df.select("geneId", "biotype"), "geneId", "left" + # Compute average score in the vicinity (feature will be the same for any gene associated with a studyLocus) + # (non protein coding genes in the vicinity are excluded see #3552) + gene_index.df.filter(f.col("biotype") == "protein_coding").select("geneId"), + "geneId", + "inner", + ) + .withColumn( + "regional_max", + f.max(local_feature_name).over(Window.partitionBy("studyLocusId")), ) - .filter(f.col("biotype") == "protein_coding") - .groupBy("studyLocusId") - .agg(f.mean(local_feature_name).alias("regional_mean")) - ) - return ( - local_max.join(regional_mean_per_study_locus, "studyLocusId", "left") .withColumn( feature_name, - f.col(local_feature_name) - f.coalesce(f.col("regional_mean"), f.lit(0.0)), + f.when( + (f.col("regional_max").isNotNull()) & (f.col("regional_max") != 0.0), + f.col(local_feature_name) + / f.coalesce(f.col("regional_max"), f.lit(0.0)), + ).otherwise(f.lit(0.0)), ) - .drop("regional_mean", local_feature_name) + .drop("regional_max", local_feature_name) ) diff --git a/src/gentropy/dataset/l2g_features/distance.py b/src/gentropy/dataset/l2g_features/distance.py index 08ffc7c5e..40ad568ac 100644 --- a/src/gentropy/dataset/l2g_features/distance.py +++ b/src/gentropy/dataset/l2g_features/distance.py @@ -8,6 +8,7 @@ from pyspark.sql import Window from gentropy.common.spark_helpers import convert_from_wide_to_long +from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.l2g_features.l2g_feature import L2GFeature from gentropy.dataset.l2g_gold_standard import L2GGoldStandard from gentropy.dataset.study_locus import StudyLocus @@ -55,7 +56,7 @@ def common_distance_feature_logic( agg_expr = f.sum(f.col("distance_score")) elif "Sentinel" in feature_name: df = study_loci_to_annotate.df.select("studyLocusId", "variantId") - # For minimum distances we calculate the unweighted distance between the sentinel (lead) and the gene. This + # For minimum distances we calculate the unweighted distance between the sentinel (lead) and the gene. distance_score_expr = f.lit(genomic_window) - f.col(distance_type) + f.lit(1) agg_expr = f.first(f.col("distance_score")) return ( @@ -84,15 +85,17 @@ def common_neighbourhood_distance_feature_logic( variant_index: VariantIndex, feature_name: str, distance_type: str, + gene_index: GeneIndex, genomic_window: int = 500_000, ) -> DataFrame: - """Calculate the distance feature that correlates any variant in a credible set with any gene nearby the locus. The distance is weighted by the posterior probability of the variant to factor in its contribution to the trait. + """Calculate the distance feature that correlates any variant in a credible set with any protein coding gene nearby the locus. The distance is weighted by the posterior probability of the variant to factor in its contribution to the trait. Args: study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation variant_index (VariantIndex): The dataset containing distance to gene information feature_name (str): The name of the feature distance_type (str): The type of distance to gene + gene_index (GeneIndex): The dataset containing gene information genomic_window (int): The maximum window size to consider Returns: @@ -109,16 +112,30 @@ def common_neighbourhood_distance_feature_logic( ) return ( # Then compute mean distance in the vicinity (feature will be the same for any gene associated with a studyLocus) - local_metric.withColumn( - "regional_metric", - f.mean(f.col(local_feature_name)).over(Window.partitionBy("studyLocusId")), + local_metric.join( + gene_index.df.filter(f.col("biotype") == "protein_coding").select("geneId"), + "geneId", + "inner", + ) + .withColumn( + "regional_max", + f.max(local_feature_name).over(Window.partitionBy("studyLocusId")), + ) + .withColumn( + feature_name, + f.when( + (f.col("regional_max").isNotNull()) & (f.col("regional_max") != 0.0), + f.col(local_feature_name) + / f.coalesce(f.col("regional_max"), f.lit(0.0)), + ).otherwise(f.lit(0.0)), ) .withColumn( feature_name, - (f.col(local_feature_name) - f.col("regional_metric")) - / f.log10(f.lit(genomic_window + 1)), + f.when(f.col(feature_name) < 0, f.lit(0.0)) + .when(f.col(feature_name) > 1, f.lit(1.0)) + .otherwise(f.col(feature_name)), ) - .drop("regional_metric", local_feature_name) + .drop("regional_max", local_feature_name) ) @@ -168,7 +185,7 @@ def compute( class DistanceTssMeanNeighbourhoodFeature(L2GFeature): """Minimum mean distance to TSS for all genes in the vicinity of a studyLocus.""" - feature_dependency_type = VariantIndex + feature_dependency_type = [VariantIndex, GeneIndex] feature_name = "distanceTssMeanNeighbourhood" @classmethod @@ -244,7 +261,7 @@ def compute( class DistanceSentinelTssNeighbourhoodFeature(L2GFeature): """Distance between the sentinel variant and a gene TSS as a relation of the distnace with all the genes in the vicinity of a studyLocus. This is not weighted by the causal probability.""" - feature_dependency_type = VariantIndex + feature_dependency_type = [VariantIndex, GeneIndex] feature_name = "distanceSentinelTssNeighbourhood" @classmethod @@ -325,7 +342,7 @@ def compute( class DistanceFootprintMeanNeighbourhoodFeature(L2GFeature): """Minimum mean distance to footprint for all genes in the vicinity of a studyLocus.""" - feature_dependency_type = VariantIndex + feature_dependency_type = [VariantIndex, GeneIndex] feature_name = "distanceFootprintMeanNeighbourhood" @classmethod @@ -401,7 +418,7 @@ def compute( class DistanceSentinelFootprintNeighbourhoodFeature(L2GFeature): """Distance between the sentinel variant and a gene footprint as a relation of the distnace with all the genes in the vicinity of a studyLocus. This is not weighted by the causal probability.""" - feature_dependency_type = VariantIndex + feature_dependency_type = [VariantIndex, GeneIndex] feature_name = "distanceSentinelFootprintNeighbourhood" @classmethod diff --git a/src/gentropy/dataset/l2g_features/vep.py b/src/gentropy/dataset/l2g_features/vep.py index 91b03d57b..4f8dd6779 100644 --- a/src/gentropy/dataset/l2g_features/vep.py +++ b/src/gentropy/dataset/l2g_features/vep.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Any import pyspark.sql.functions as f +from pyspark.sql import Window from gentropy.common.spark_helpers import convert_from_wide_to_long from gentropy.dataset.gene_index import GeneIndex @@ -79,7 +80,7 @@ def common_neighbourhood_vep_feature_logic( gene_index: GeneIndex, feature_name: str, ) -> DataFrame: - """Extracts variant severity score computed from VEP for any gene, based on what is the mean score for protein coding genes that are nearby the locus. + """Extracts variant severity score computed from VEP for any gene, based on what is the max score for protein coding genes that are nearby the locus. Args: study_loci_to_annotate (StudyLocus | L2GGoldStandard): The dataset containing study loci that will be used for annotation @@ -95,26 +96,29 @@ def common_neighbourhood_vep_feature_logic( study_loci_to_annotate, feature_name=local_feature_name, variant_index=variant_index, - ).join( - # Bring gene classification - gene_index.df.select("geneId", "biotype"), - "geneId", - "inner", - ) - # Compute average score in the vicinity (feature will be the same for any gene associated with a studyLocus) - # (non protein coding genes in the vicinity are excluded see #3552) - regional_mean_per_study_locus = ( - local_metric.filter(f.col("biotype") == "protein_coding") - .groupBy("studyLocusId") - .agg(f.mean(local_feature_name).alias("regional_mean")) ) return ( - local_metric.join(regional_mean_per_study_locus, "studyLocusId", "left") + local_metric + # Compute average score in the vicinity (feature will be the same for any gene associated with a studyLocus) + # (non protein coding genes in the vicinity are excluded see #3552) + .join( + gene_index.df.filter(f.col("biotype") == "protein_coding").select("geneId"), + "geneId", + "inner", + ) + .withColumn( + "regional_max", + f.max(local_feature_name).over(Window.partitionBy("studyLocusId")), + ) .withColumn( feature_name, - f.col(local_feature_name) - f.coalesce(f.col("regional_mean"), f.lit(0.0)), + f.when( + (f.col("regional_max").isNotNull()) & (f.col("regional_max") != 0.0), + f.col(local_feature_name) + / f.coalesce(f.col("regional_max"), f.lit(0.0)), + ).otherwise(f.lit(0.0)), ) - .drop("regional_mean", local_feature_name, "biotype") + .drop("regional_max", local_feature_name) ) diff --git a/src/gentropy/dataset/l2g_gold_standard.py b/src/gentropy/dataset/l2g_gold_standard.py index ec99f7141..4acf96cd2 100644 --- a/src/gentropy/dataset/l2g_gold_standard.py +++ b/src/gentropy/dataset/l2g_gold_standard.py @@ -132,6 +132,7 @@ def build_feature_matrix( on=["studyId", "variantId", "geneId"], how="inner", ) + .filter(f.col("isProteinCoding") == 1) .drop("studyId", "variantId") .distinct(), with_gold_standard=True, diff --git a/src/gentropy/dataset/l2g_prediction.py b/src/gentropy/dataset/l2g_prediction.py index 64ce964c7..2bc286a40 100644 --- a/src/gentropy/dataset/l2g_prediction.py +++ b/src/gentropy/dataset/l2g_prediction.py @@ -78,6 +78,7 @@ def from_credible_set( credible_set.df.filter(f.col("studyType") == "gwas") .select("studyLocusId") .join(feature_matrix._df, "studyLocusId") + .filter(f.col("isProteinCoding") == 1) ) ) .fill_na() diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 0dbcd226e..548368967 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -163,7 +163,7 @@ def __init__( session, credible_set_path, recursiveFileLookup=True ) self.feature_matrix = L2GFeatureMatrix( - _df=session.load_data(feature_matrix_path), features_list=self.features_list + _df=session.load_data(feature_matrix_path), ) if run_mode == "predict": diff --git a/tests/gentropy/dataset/test_l2g_feature.py b/tests/gentropy/dataset/test_l2g_feature.py index 0637b4a86..feb8e449a 100644 --- a/tests/gentropy/dataset/test_l2g_feature.py +++ b/tests/gentropy/dataset/test_l2g_feature.py @@ -346,21 +346,21 @@ def test_common_neighbourhood_colocalisation_feature_logic( gene_index=sample_gene_index, variant_index=sample_variant_index, ).withColumn(feature_name, f.round(f.col(feature_name), 3)) - # expected average is (0.81 + 0)/2 = 0.405 + # expected max is 0.81 expected_df = spark.createDataFrame( [ { "studyLocusId": "1", "geneId": "gene1", - "eQtlColocH4MaximumNeighbourhood": 0.405, # 0.81 - 0.405 + "eQtlColocH4MaximumNeighbourhood": 1.0, # 0.81 / 0.81 }, { "studyLocusId": "1", - "geneId": "gene2", - "eQtlColocH4MaximumNeighbourhood": 0.495, # 0.9 - 0.405 + "geneId": "gene3", + "eQtlColocH4MaximumNeighbourhood": 0.0, # 0.0 (no coloc with gene3) /0.81 }, ], - ).select("studyLocusId", "geneId", "eQtlColocH4MaximumNeighbourhood") + ).select("geneId", "studyLocusId", "eQtlColocH4MaximumNeighbourhood") assert ( observed_df.collect() == expected_df.collect() ), "The expected and observed dataframes do not match." @@ -561,6 +561,7 @@ def test_common_neighbourhood_distance_feature_logic( common_neighbourhood_distance_feature_logic( self.sample_study_locus, variant_index=self.sample_variant_index, + gene_index=self.sample_gene_index, feature_name=feature_name, distance_type=self.distance_type, genomic_window=10, @@ -568,9 +569,12 @@ def test_common_neighbourhood_distance_feature_logic( .withColumn(feature_name, f.round(f.col(feature_name), 2)) .orderBy(f.col(feature_name).asc()) ) - expected_df = spark.createDataFrame( - (["1", "gene1", -0.44], ["1", "gene2", 0.44]), - ["studyLocusId", "geneId", feature_name], + expected_df = spark.createDataFrame( # regional max is 0.91 from gene2 + ( + ["gene1", "1", 0.0], # (10-10)/0.91 + ["gene2", "1", 1.0], + ), # 0.91/0.91 + ["geneId", "studyLocusId", feature_name], ).orderBy(feature_name) assert ( observed_df.collect() == expected_df.collect() @@ -649,6 +653,32 @@ def _setup( ), _schema=VariantIndex.get_schema(), ) + self.sample_gene_index = GeneIndex( + _df=spark.createDataFrame( + [ + { + "geneId": "gene1", + "chromosome": "1", + "tss": 950000, + "biotype": "protein_coding", + }, + { + "geneId": "gene2", + "chromosome": "1", + "tss": 1050000, + "biotype": "protein_coding", + }, + { + "geneId": "gene3", + "chromosome": "1", + "tss": 1010000, + "biotype": "non_coding", + }, + ], + GeneIndex.get_schema(), + ), + _schema=GeneIndex.get_schema(), + ) class TestCommonVepFeatureLogic: @@ -727,55 +757,13 @@ def test_common_vep_feature_logic( observed_df.collect() == expected_df.collect() ), f"Expected and observed dataframes are not equal for feature {feature_name}." - def test_common_neighbourhood_vep_feature_logic_no_protein_coding( - self: TestCommonVepFeatureLogic, - spark: SparkSession, - sample_gene_index: GeneIndex, - sample_variant_index: VariantIndex, - ) -> None: - """Test the logic of the function that extracts the maximum severity score for a gene given the average of the maximum scores for all protein coding genes in the vicinity. - - Because the genes in the vicinity are all non coding, the neighbourhood features should equal the local ones. - """ - feature_name = "vepMaximumNeighbourhood" - non_protein_coding_gene_index = GeneIndex( - _df=sample_gene_index.df.filter(f.col("geneId") != "gene3"), - _schema=GeneIndex.get_schema(), - ) - observed_df = ( - common_neighbourhood_vep_feature_logic( - self.sample_study_locus, - variant_index=sample_variant_index, - gene_index=non_protein_coding_gene_index, - feature_name=feature_name, - ) - .withColumn(feature_name, f.round(f.col(feature_name), 2)) - .orderBy(f.col(feature_name).asc()) - .select("studyLocusId", "geneId", feature_name) - ) - expected_df = ( - spark.createDataFrame( - # regional mean is 0.66 - ( - ["1", "gene1", 0.0], - ["1", "gene2", 0.34], - ), # (0.66-0.66) and (1.0-0.66) - ["studyLocusId", "geneId", feature_name], - ) - .orderBy(feature_name) - .select("studyLocusId", "geneId", feature_name) - ) - assert ( - observed_df.collect() == expected_df.collect() - ), "Output doesn't meet the expectation." - def test_common_neighbourhood_vep_feature_logic( self: TestCommonVepFeatureLogic, spark: SparkSession, sample_gene_index: GeneIndex, sample_variant_index: VariantIndex, ) -> None: - """Test the logic of the function that extracts the maximum severity score for a gene given the average of the maximum scores for all protein coding genes in the vicinity.""" + """Test the logic of the function that extracts the maximum severity score for a gene given the maximum of the maximum scores for all protein coding genes in the vicinity.""" feature_name = "vepMaximumNeighbourhood" observed_df = ( common_neighbourhood_vep_feature_logic( @@ -789,12 +777,11 @@ def test_common_neighbourhood_vep_feature_logic( ) expected_df = ( spark.createDataFrame( - # regional mean is 0.66/2 = 0.33 + # regional max is 0.66 ( - ["1", "gene3", -0.33], - ["1", "gene1", 0.33], - ["1", "gene2", 0.67], - ), # (0 - 0.33) and (0.66-0.33) and (1.0 -0.33) + ["1", "gene1", 1.0], # 0.66/0.66 + ["1", "gene3", 0.0], # 0/0.66 + ), ["studyLocusId", "geneId", feature_name], ) .orderBy(feature_name) diff --git a/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py index e6afc942f..79f9d925a 100644 --- a/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py +++ b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py @@ -29,6 +29,7 @@ from pyspark.sql.session import SparkSession from gentropy.dataset.colocalisation import Colocalisation + from gentropy.dataset.gene_index import GeneIndex from gentropy.dataset.study_locus import StudyLocus @@ -161,13 +162,15 @@ def test_build_feature_matrix( mock_study_locus: StudyLocus, mock_colocalisation: Colocalisation, mock_study_index: StudyIndex, + mock_gene_index: GeneIndex, ) -> None: """Test building feature matrix with the eQtlColocH4Maximum feature.""" - features_list = ["eQtlColocH4Maximum"] + features_list = ["eQtlColocH4Maximum", "isProteinCoding"] loader = L2GFeatureInputLoader( colocalisation=mock_colocalisation, study_index=mock_study_index, study_locus=mock_study_locus, + gene_index=mock_gene_index, ) fm = mock_study_locus.build_feature_matrix(features_list, loader) assert isinstance( From 8f8c711a54d3332f925394f455b69334f359e90a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 11:34:25 +0000 Subject: [PATCH 169/188] chore(deps): bump codecov/codecov-action from 4 to 5 (#916) Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 4 to 5. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v4...v5) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/pr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index f27ae499e..d32a3b28e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -46,7 +46,7 @@ jobs: - name: Run tests run: poetry run pytest - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} files: ./coverage.xml From cff83ce059286d48fa27fe9f90740b9a672dd037 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 11:49:34 +0000 Subject: [PATCH 170/188] build(deps-dev): bump pytest-cov from 5.0.0 to 6.0.0 (#893) Bumps [pytest-cov](https://github.com/pytest-dev/pytest-cov) from 5.0.0 to 6.0.0. - [Changelog](https://github.com/pytest-dev/pytest-cov/blob/master/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest-cov/compare/v5.0.0...v6.0.0) --- updated-dependencies: - dependency-name: pytest-cov dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 14 +++++++------- pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/poetry.lock b/poetry.lock index edba6ef99..aea09f8c9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiodns" @@ -4414,17 +4414,17 @@ dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments [[package]] name = "pytest-cov" -version = "5.0.0" +version = "6.0.0" description = "Pytest plugin for measuring coverage." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857"}, - {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, + {file = "pytest-cov-6.0.0.tar.gz", hash = "sha256:fde0b595ca248bb8e2d76f020b465f3b107c9632e6a1d1705f17834c89dcadc0"}, + {file = "pytest_cov-6.0.0-py3-none-any.whl", hash = "sha256:eee6f1b9e61008bd34975a4d5bab25801eb31898b032dd55addc93e96fcaaa35"}, ] [package.dependencies] -coverage = {version = ">=5.2.1", extras = ["toml"]} +coverage = {version = ">=7.5", extras = ["toml"]} pytest = ">=4.6" [package.extras] @@ -5959,4 +5959,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "c22ab3de1b76f7549448f4204d52fe0a2d9e68cbbd4d4e873fad667a075dffe3" +content-hash = "af70455b40ec31084130c90b9dc468a5c1198f80e6ae30d10bfb1b17d1706537" diff --git a/pyproject.toml b/pyproject.toml index 54b5a20ca..fd69201cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,7 @@ pymdown-extensions = "^10.7" [tool.poetry.group.tests.dependencies] -pytest-cov = ">=4.1,<6.0" +pytest-cov = ">=4.1,<7.0" pytest-sugar = ">=0.9.5,<1.1.0" dbldatagen = ">=0.3.1,<0.5.0" pyparsing = "^3.1.1" From 4104ce3efc54e0db2f622940fc43ff46c3e26bdf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 12:31:52 +0000 Subject: [PATCH 171/188] chore: pre-commit autoupdate (#898) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.7.1 → v0.7.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.7.1...v0.7.3) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e62a9d790..2c9da9926 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ ci: skip: [poetry-lock] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.1 + rev: v0.7.3 hooks: - id: ruff args: From 9f9cfd6479b2bbef71ac596457cf27e3804ea2a5 Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Tue, 19 Nov 2024 11:18:08 +0000 Subject: [PATCH 172/188] feat(variant index): variant description to summarise variant consequences in transcripts (#914) * feat: extending the VEP schema * feat(vep parser): adding logic to build variant description based on VEP annotation * fix: remove commented lines * fix: improving consequence to so term mapping * fix: nullified variant descriptions * fix: assessment_flag_column_name type fix * chore: pre-commit auto fixes [...] * feat: adding formatting to distances in description * fix: formatting * fix: variant index schema * fix: conftest for variant index * feat(variant index): normalising assessments of in-silico predictors * feat: adding VEP predictor * fix: variant test config * fix: variant test config * fix: schema type * fix: dropping failing test * fix: variant annotatin * fix: gnomad variant index repartition --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../assets/schemas/variant_index.json | 24 + .../assets/schemas/vep_json_output.json | 12 + src/gentropy/common/spark_helpers.py | 7 +- src/gentropy/config.py | 6 + src/gentropy/dataset/variant_index.py | 266 +++++++++++ src/gentropy/datasource/ensembl/vep_parser.py | 445 +++++++++++++++--- src/gentropy/datasource/gnomad/variants.py | 13 +- src/gentropy/gnomad_ingestion.py | 4 +- tests/gentropy/conftest.py | 7 +- .../datasource/ensembl/test_vep_variants.py | 28 -- 10 files changed, 708 insertions(+), 104 deletions(-) diff --git a/src/gentropy/assets/schemas/variant_index.json b/src/gentropy/assets/schemas/variant_index.json index 6d5e211ac..1f1ef787c 100644 --- a/src/gentropy/assets/schemas/variant_index.json +++ b/src/gentropy/assets/schemas/variant_index.json @@ -67,6 +67,12 @@ "name": "targetId", "nullable": true, "type": "string" + }, + { + "metadata": {}, + "name": "normalisedScore", + "nullable": true, + "type": "double" } ], "type": "struct" @@ -192,6 +198,18 @@ "nullable": true, "type": "integer" }, + { + "metadata": {}, + "name": "approvedSymbol", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "biotype", + "nullable": true, + "type": "string" + }, { "metadata": {}, "name": "transcriptId", @@ -271,6 +289,12 @@ }, "type": "array" } + }, + { + "metadata": {}, + "name": "variantDescription", + "nullable": true, + "type": "string" } ], "type": "struct" diff --git a/src/gentropy/assets/schemas/vep_json_output.json b/src/gentropy/assets/schemas/vep_json_output.json index 43c3f4ad7..674788407 100644 --- a/src/gentropy/assets/schemas/vep_json_output.json +++ b/src/gentropy/assets/schemas/vep_json_output.json @@ -340,6 +340,18 @@ "nullable": true, "type": "string" }, + { + "metadata": {}, + "name": "gene_symbol", + "nullable": true, + "type": "string" + }, + { + "metadata": {}, + "name": "biotype", + "nullable": true, + "type": "string" + }, { "metadata": {}, "name": "appris", diff --git a/src/gentropy/common/spark_helpers.py b/src/gentropy/common/spark_helpers.py index a1bf9670a..64a8bceb7 100644 --- a/src/gentropy/common/spark_helpers.py +++ b/src/gentropy/common/spark_helpers.py @@ -848,6 +848,7 @@ def get_struct_field_schema(schema: t.StructType, name: str) -> t.DataType: raise ValueError("Provided name %s is not present in the schema.", name) return matching_fields[0].dataType + def calculate_harmonic_sum(input_array: Column) -> Column: """Calculate the harmonic sum of an array. @@ -876,9 +877,11 @@ def calculate_harmonic_sum(input_array: Column) -> Column: return f.aggregate( f.arrays_zip( f.sort_array(input_array, False).alias("score"), - f.sequence(f.lit(1), f.size(input_array)).alias("pos") + f.sequence(f.lit(1), f.size(input_array)).alias("pos"), ), f.lit(0.0), lambda acc, x: acc - + x["score"]/f.pow(x["pos"], 2)/f.lit(sum(1 / ((i + 1)**2) for i in range(1000))) + + x["score"] + / f.pow(x["pos"], 2) + / f.lit(sum(1 / ((i + 1) ** 2) for i in range(1000))), ) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 4e8cb99e3..a84dbd89e 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -476,6 +476,11 @@ class _ConsequenceToPathogenicityScoreMap(TypedDict): "label": "splice_polypyrimidine_tract_variant", "score": 0.33, }, + { + "id": "SO_0001626", + "label": "incomplete_terminal_codon_variant", + "score": 0.33, + }, {"id": "SO_0001819", "label": "synonymous_variant", "score": 0.33}, { "id": "SO_0002170", @@ -499,6 +504,7 @@ class _ConsequenceToPathogenicityScoreMap(TypedDict): "score": 0.0, }, {"id": "SO_0001620", "label": "mature_miRNA_variant", "score": 0.0}, + {"id": "SO_0001060", "label": "intergenic_variant", "score": 0.0}, ] _target_: str = "gentropy.variant_index.VariantIndexStep" diff --git a/src/gentropy/dataset/variant_index.py b/src/gentropy/dataset/variant_index.py index 9e8740aa6..4092ca961 100644 --- a/src/gentropy/dataset/variant_index.py +++ b/src/gentropy/dataset/variant_index.py @@ -298,3 +298,269 @@ def get_loftee(self: VariantIndex) -> DataFrame: "isHighQualityPlof", ) ) + + +class InSilicoPredictorNormaliser: + """Class to normalise in silico predictor assessments. + + Essentially based on the raw scores, it normalises the scores to a range between -1 and 1, and appends the normalised + value to the in silico predictor struct. + + The higher negative values indicate increasingly confident prediction to be a benign variant, + while the higher positive values indicate increasingly deleterious predicted effect. + + The point of these operations to make the scores comparable across different in silico predictors. + """ + + @classmethod + def normalise_in_silico_predictors( + cls: type[InSilicoPredictorNormaliser], + in_silico_predictors: Column, + ) -> Column: + """Normalise in silico predictors. Appends a normalised score to the in silico predictor struct. + + Args: + in_silico_predictors (Column): Column containing in silico predictors (list of structs). + + Returns: + Column: Normalised in silico predictors. + """ + return f.transform( + in_silico_predictors, + lambda predictor: f.struct( + # Extracing all existing columns: + predictor.method.alias("method"), + predictor.assessment.alias("assessment"), + predictor.score.alias("score"), + predictor.assessmentFlag.alias("assessmentFlag"), + predictor.targetId.alias("targetId"), + # Normalising the score + cls.resolve_predictor_methods( + predictor.score, predictor.method, predictor.assessment + ).alias("normalisedScore"), + ), + ) + + @classmethod + def resolve_predictor_methods( + cls: type[InSilicoPredictorNormaliser], + score: Column, + method: Column, + assessment: Column, + ) -> Column: + """It takes a score, a method, and an assessment, and returns a normalized score for the in silico predictor. + + Args: + score (Column): The raw score from the in silico predictor. + method (Column): The method used to generate the score. + assessment (Column): The assessment of the score. + + Returns: + Column: Normalised score for the in silico predictor. + """ + return ( + f.when(method == "LOFTEE", cls._normalise_loftee(assessment)) + .when(method == "SIFT", cls._normalise_sift(score, assessment)) + .when(method == "PolyPhen", cls._normalise_polyphen(assessment, score)) + .when(method == "AlphaMissense", cls._normalise_alpha_missense(score)) + .when(method == "CADD", cls._normalise_cadd(score)) + .when(method == "Pangolin", cls._normalise_pangolin(score)) + # The following predictors are not normalised: + .when(method == "SpliceAI", score) + .when(method == "VEP", score) + ) + + @staticmethod + def _rescaleColumnValue( + column: Column, + min_value: float, + max_value: float, + minimum: float = 0.0, + maximum: float = 1.0, + ) -> Column: + """Rescale a column to a new range. Similar to MinMaxScaler in pyspark ML. + + Args: + column (Column): Column to rescale. + min_value (float): Minimum value of the column. + max_value (float): Maximum value of the column. + minimum (float, optional): Minimum value of the new range. Defaults to 0.0. + maximum (float, optional): Maximum value of the new range. Defaults to 1.0. + + Returns: + Column: Rescaled column. + """ + return (column - min_value) / (max_value - min_value) * ( + maximum - minimum + ) + minimum + + @classmethod + def _normalise_cadd( + cls: type[InSilicoPredictorNormaliser], + score: Column, + ) -> Column: + """Normalise CADD scores. + + Logic: CADD scores are divided into four ranges and scaled accordingly: + - 0-10 -> -1-0 (likely benign ~2M) + - 10-20 -> 0-0.5 (potentially deleterious ~300k) + - 20-30 -> 0.5-0.75 (likely deleterious ~350k) + - 30-81 -> 0.75-1 (highly likely deleterious ~86k) + + Args: + score (Column): CADD score. + + Returns: + Column: Normalised CADD score. + """ + return ( + f.when(score <= 10, cls._rescaleColumnValue(score, 0, 10, -1.0, 0.0)) + .when(score <= 20, cls._rescaleColumnValue(score, 10, 20, 0.0, 0.5)) + .when(score <= 30, cls._rescaleColumnValue(score, 20, 30, 0.5, 0.75)) + .when(score > 30, cls._rescaleColumnValue(score, 30, 81, 0.75, 1)) + ) + + @classmethod + def _normalise_loftee( + cls: type[InSilicoPredictorNormaliser], + assessment: Column, + ) -> Column: + """Normalise LOFTEE scores. + + Logic: LOFTEE scores are divided into two categories: + - HC (high confidence): 1.0 (~120k) + - LC (low confidence): 0.85 (~18k) + The normalised score is calculated based on the category the score falls into. + + Args: + assessment (Column): LOFTEE assessment. + + Returns: + Column: Normalised LOFTEE score. + """ + return f.when(assessment == "HC", f.lit(1)).when( + assessment == "LC", f.lit(0.85) + ) + + @classmethod + def _normalise_sift( + cls: type[InSilicoPredictorNormaliser], + score: Column, + assessment: Column, + ) -> Column: + """Normalise SIFT scores. + + Logic: SIFT scores are divided into four categories: + - deleterious and score >= 0.95: 0.75-1 + - deleterious_low_confidence and score >= 0.95: 0.5-0.75 + - tolerated_low_confidence and score <= 0.95: 0.25-0.5 + - tolerated and score <= 0.95: 0-0.25 + + Args: + score (Column): SIFT score. + assessment (Column): SIFT assessment. + + Returns: + Column: Normalised SIFT score. + """ + return ( + f.when( + (1 - f.round(score.cast(t.DoubleType()), 2) >= 0.95) + & (assessment == "deleterious"), + cls._rescaleColumnValue(1 - score, 0.95, 1, 0.5, 1), + ) + .when( + (1 - f.round(score.cast(t.DoubleType()), 2) >= 0.95) + & (assessment == "deleterious_low_confidence"), + cls._rescaleColumnValue(1 - score, 0.95, 1, 0, 0.5), + ) + .when( + (1 - f.round(score.cast(t.DoubleType()), 2) <= 0.95) + & (assessment == "tolerated_low_confidence"), + cls._rescaleColumnValue(1 - score, 0, 0.95, -0.5, 0.0), + ) + .when( + (1 - f.round(score.cast(t.DoubleType()), 2) <= 0.95) + & (assessment == "tolerated"), + cls._rescaleColumnValue(1 - score, 0, 0.95, -1, -0.5), + ) + ) + + @classmethod + def _normalise_polyphen( + cls: type[InSilicoPredictorNormaliser], + assessment: Column, + score: Column, + ) -> Column: + """Normalise PolyPhen scores. + + Logic: PolyPhen scores are divided into three categories: + - benign: 0-0.446: -1--0.25 + - possibly_damaging: 0.446-0.908: -0.25-0.25 + - probably_damaging: 0.908-1: 0.25-1 + - if assessment is unknown: None + + Args: + assessment (Column): PolyPhen assessment. + score (Column): PolyPhen score. + + Returns: + Column: Normalised PolyPhen score. + """ + return ( + f.when(assessment == "unknown", f.lit(None).cast(t.DoubleType())) + .when(score <= 0.446, cls._rescaleColumnValue(score, 0, 0.446, -1.0, -0.25)) + .when( + score <= 0.908, + cls._rescaleColumnValue(score, 0.446, 0.908, -0.25, 0.25), + ) + .when(score > 0.908, cls._rescaleColumnValue(score, 0.908, 1.0, 0.25, 1.0)) + ) + + @classmethod + def _normalise_alpha_missense( + cls: type[InSilicoPredictorNormaliser], + score: Column, + ) -> Column: + """Normalise AlphaMissense scores. + + Logic: AlphaMissense scores are divided into three categories: + - 0-0.06: -1.0--0.25 + - 0.06-0.77: -0.25-0.25 + - 0.77-1: 0.25-1 + + Args: + score (Column): AlphaMissense score. + + Returns: + Column: Normalised AlphaMissense score. + """ + return ( + f.when(score < 0.06, cls._rescaleColumnValue(score, 0, 0.06, -1.0, -0.25)) + .when(score < 0.77, cls._rescaleColumnValue(score, 0.06, 0.77, -0.25, 0.25)) + .when(score >= 0.77, cls._rescaleColumnValue(score, 0.77, 1, 0.25, 1)) + ) + + @classmethod + def _normalise_pangolin( + cls: type[InSilicoPredictorNormaliser], + score: Column, + ) -> Column: + """Normalise Pangolin scores. + + Logic: Pangolin scores are divided into two categories: + - 0-0.14: 0-0.25 + - 0.14-1: 0.75-1 + + Args: + score (Column): Pangolin score. + + Returns: + Column: Normalised Pangolin score. + """ + return f.when( + f.abs(score) > 0.14, cls._rescaleColumnValue(f.abs(score), 0.14, 1, 0.5, 1) + ).when( + f.abs(score) <= 0.14, + cls._rescaleColumnValue(f.abs(score), 0, 0.14, 0.0, 0.5), + ) diff --git a/src/gentropy/datasource/ensembl/vep_parser.py b/src/gentropy/datasource/ensembl/vep_parser.py index 01c820513..9494bf6f3 100644 --- a/src/gentropy/datasource/ensembl/vep_parser.py +++ b/src/gentropy/datasource/ensembl/vep_parser.py @@ -16,7 +16,7 @@ order_array_of_structs_by_field, order_array_of_structs_by_two_fields, ) -from gentropy.dataset.variant_index import VariantIndex +from gentropy.dataset.variant_index import InSilicoPredictorNormaliser, VariantIndex if TYPE_CHECKING: from pyspark.sql import Column, DataFrame @@ -41,6 +41,18 @@ class VariantEffectPredictorParser: # Schema for the allele frequency column: ALLELE_FREQUENCY_SCHEMA = VariantIndex.get_schema()["alleleFrequencies"].dataType + # Consequence to sequence ontology map: + SEQUENCE_ONTOLOGY_MAP = { + item["label"]: item["id"] + for item in VariantIndexConfig.consequence_to_pathogenicity_score + } + + # Sequence ontology to score map: + LABEL_TO_SCORE_MAP = { + item["label"]: item["score"] + for item in VariantIndexConfig.consequence_to_pathogenicity_score + } + @staticmethod def get_schema() -> t.StructType: """Return the schema of the VEP output. @@ -328,8 +340,19 @@ def _get_most_severe_transcript( lambda transcript: transcript.getItem(score_field_name).isNotNull(), )[0] + @classmethod @enforce_schema(IN_SILICO_PREDICTOR_SCHEMA) + def _get_vep_prediction(cls, most_severe_consequence: Column) -> Column: + return f.struct( + f.lit("VEP").alias("method"), + most_severe_consequence.alias("assessment"), + map_column_by_dictionary( + most_severe_consequence, cls.LABEL_TO_SCORE_MAP + ).alias("score"), + ) + @staticmethod + @enforce_schema(IN_SILICO_PREDICTOR_SCHEMA) def _get_max_alpha_missense(transcripts: Column) -> Column: """Return the most severe alpha missense prediction from all transcripts. @@ -354,37 +377,42 @@ def _get_max_alpha_missense(transcripts: Column) -> Column: ... .select(VariantEffectPredictorParser._get_max_alpha_missense(f.col('transcripts')).alias('am')) ... .show(truncate=False) ... ) - +----------------------------------------------------+ - |am | - +----------------------------------------------------+ - |{max alpha missense, assessment 1, 0.4, null, gene1}| - |{max alpha missense, null, null, null, gene1} | - +----------------------------------------------------+ + +-----------------------------------------------------+ + |am | + +-----------------------------------------------------+ + |{AlphaMissense, assessment 1, 0.4, null, gene1, null}| + |{AlphaMissense, null, null, null, gene1, null} | + +-----------------------------------------------------+ """ - return f.transform( - # Extract transcripts with alpha missense values: - f.filter( - transcripts, - lambda transcript: transcript.getItem("alphamissense").isNotNull(), - ), - # Extract alpha missense values: - lambda transcript: f.struct( - transcript.getItem("alphamissense") - .getItem("am_pathogenicity") - .cast(t.FloatType()) - .alias("score"), - transcript.getItem("alphamissense") - .getItem("am_class") - .alias("assessment"), - f.lit("max alpha missense").alias("method"), - transcript.getItem("gene_id").alias("targetId"), - ), + # Extracting transcript with alpha missense values: + transcript = f.filter( + transcripts, + lambda transcript: transcript.getItem("alphamissense").isNotNull(), )[0] + return f.when( + transcript.isNotNull(), + f.struct( + # Adding method: + f.lit("AlphaMissense").alias("method"), + # Extracting assessment: + transcript.alphamissense.am_class.alias("assessment"), + # Extracting score: + transcript.alphamissense.am_pathogenicity.cast(t.FloatType()).alias( + "score" + ), + # Adding assessment flag: + f.lit(None).cast(t.StringType()).alias("assessmentFlag"), + # Extracting target id: + transcript.gene_id.alias("targetId"), + ), + ) + + @classmethod @enforce_schema(IN_SILICO_PREDICTOR_SCHEMA) - @staticmethod def _vep_in_silico_prediction_extractor( + cls: type[VariantEffectPredictorParser], transcript_column_name: str, method_name: str, score_column_name: str | None = None, @@ -403,7 +431,7 @@ def _vep_in_silico_prediction_extractor( Returns: Column: In silico predictor. """ - # Get highest score: + # Get transcript with the highest score: most_severe_transcript: Column = ( # Getting the most severe transcript: VariantEffectPredictorParser._get_most_severe_transcript( @@ -419,31 +447,44 @@ def _vep_in_silico_prediction_extractor( ) ) + # Get assessment: + assessment = ( + f.lit(None).cast(t.StringType()).alias("assessment") + if assessment_column_name is None + else most_severe_transcript.getField(assessment_column_name).alias( + "assessment" + ) + ) + + # Get score: + score = ( + f.lit(None).cast(t.FloatType()).alias("score") + if score_column_name is None + else most_severe_transcript.getField(score_column_name) + .cast(t.FloatType()) + .alias("score") + ) + + # Get assessment flag: + assessment_flag = ( + f.lit(None).cast(t.StringType()).alias("assessmentFlag") + if assessment_flag_column_name is None + else most_severe_transcript.getField(assessment_flag_column_name) + .cast(t.StringType()) + .alias("assessmentFlag") + ) + + # Extract gene id: + gene_id = most_severe_transcript.getItem("gene_id").alias("targetId") + return f.when( most_severe_transcript.isNotNull(), f.struct( - # Adding method name: - f.lit(method_name).cast(t.StringType()).alias("method"), - # Adding assessment: - f.lit(None).cast(t.StringType()).alias("assessment") - if assessment_column_name is None - else most_severe_transcript.getField(assessment_column_name).alias( - "assessment" - ), - # Adding score: - f.lit(None).cast(t.FloatType()).alias("score") - if score_column_name is None - else most_severe_transcript.getField(score_column_name) - .cast(t.FloatType()) - .alias("score"), - # Adding assessment flag: - f.lit(None).cast(t.StringType()).alias("assessmentFlag") - if assessment_flag_column_name is None - else most_severe_transcript.getField(assessment_flag_column_name) - .cast(t.FloatType()) - .alias("assessmentFlag"), - # Adding target id if present: - most_severe_transcript.getItem("gene_id").alias("targetId"), + f.lit(method_name).alias("method"), + assessment, + score, + assessment_flag, + gene_id, ), ) @@ -569,16 +610,6 @@ def process_vep_output( Returns: DataFrame: processed data in the right shape. """ - # Consequence to sequence ontology map: - sequence_ontology_map = { - item["label"]: item["id"] - for item in VariantIndexConfig.consequence_to_pathogenicity_score - } - # Sequence ontology to score map: - label_to_score_map = { - item["label"]: item["score"] - for item in VariantIndexConfig.consequence_to_pathogenicity_score - } # Processing VEP output: return ( vep_output @@ -612,26 +643,26 @@ def process_vep_output( # Extract CADD scores: cls._vep_in_silico_prediction_extractor( transcript_column_name="transcript_consequences", - method_name="phred scaled CADD", + method_name="CADD", score_column_name="cadd_phred", ), # Extract polyphen scores: cls._vep_in_silico_prediction_extractor( transcript_column_name="transcript_consequences", - method_name="polyphen", + method_name="PolyPhen", score_column_name="polyphen_score", assessment_column_name="polyphen_prediction", ), # Extract sift scores: cls._vep_in_silico_prediction_extractor( transcript_column_name="transcript_consequences", - method_name="sift", + method_name="SIFT", score_column_name="sift_score", assessment_column_name="sift_prediction", ), # Extract loftee scores: cls._vep_in_silico_prediction_extractor( - method_name="loftee", + method_name="LOFTEE", transcript_column_name="transcript_consequences", score_column_name="lof", assessment_column_name="lof", @@ -641,6 +672,8 @@ def process_vep_output( cls._get_max_alpha_missense( f.col("transcript_consequences") ), + # Extract VEP prediction: + cls._get_vep_prediction(f.col("most_severe_consequence")), ), lambda predictor: predictor.isNotNull(), ), @@ -650,16 +683,20 @@ def process_vep_output( f.array( cls._vep_in_silico_prediction_extractor( transcript_column_name="intergenic_consequences", - method_name="phred scaled CADD", + method_name="CADD", score_column_name="cadd_phred", ), + # Extract VEP prediction: + cls._get_vep_prediction(f.col("most_severe_consequence")), ) ) .alias("inSilicoPredictors"), # Convert consequence to SO: map_column_by_dictionary( - f.col("most_severe_consequence"), sequence_ontology_map + f.col("most_severe_consequence"), cls.SEQUENCE_ONTOLOGY_MAP ).alias("mostSevereConsequenceId"), + # Propagate most severe consequence: + "most_severe_consequence", # Extract HGVS identifier: f.when( f.size("transcript_consequences") > 0, @@ -681,7 +718,7 @@ def process_vep_output( f.transform( transcript.consequence_terms, lambda y: map_column_by_dictionary( - y, sequence_ontology_map + y, cls.SEQUENCE_ONTOLOGY_MAP ), ).alias("variantFunctionalConsequenceIds"), # Convert consequence terms to consequence score: @@ -689,7 +726,7 @@ def process_vep_output( f.transform( transcript.consequence_terms, lambda term: map_column_by_dictionary( - term, label_to_score_map + term, cls.LABEL_TO_SCORE_MAP ), ) ) @@ -732,6 +769,8 @@ def process_vep_output( "polyphenPrediction" ), transcript.transcript_id.alias("transcriptId"), + transcript.biotype.alias("biotype"), + transcript.gene_symbol.alias("approvedSymbol"), ), ), ).alias("transcriptConsequences"), @@ -806,8 +845,278 @@ def process_vep_output( hash_threshold, ), ) + # Generating a temporary column with only protein coding transcripts: + .withColumn( + "proteinCodingTranscripts", + f.filter( + f.col("transcriptConsequences"), + lambda x: x.getItem("biotype") == "protein_coding", + ), + ) + # Generate variant descrioption: + .withColumn( + "variantDescription", + cls._compose_variant_description( + # Passing the most severe consequence: + f.col("most_severe_consequence"), + # The first transcript: + f.filter( + f.col("transcriptConsequences"), + lambda vep: vep.transcriptIndex == 1, + ).getItem(0), + # The first protein coding transcript: + order_array_of_structs_by_field( + "proteinCodingTranscripts", "transcriptIndex" + )[f.size("proteinCodingTranscripts") - 1], + ), + ) + # Normalising in silico predictor assessments: + .withColumn( + "inSilicoPredictors", + InSilicoPredictorNormaliser.normalise_in_silico_predictors( + f.col("inSilicoPredictors") + ), + ) # Dropping intermediate xref columns: - .drop(*["ensembl_xrefs", "omim_xrefs", "clinvar_xrefs", "protvar_xrefs"]) + .drop( + *[ + "ensembl_xrefs", + "omim_xrefs", + "clinvar_xrefs", + "protvar_xrefs", + "most_severe_consequence", + "proteinCodingTranscripts", + ] + ) # Drooping rows with null position: .filter(f.col("position").isNotNull()) ) + + @classmethod + def _compose_variant_description( + cls: type[VariantEffectPredictorParser], + most_severe_consequence: Column, + first_transcript: Column, + first_protein_coding: Column, + ) -> Column: + """Compose variant description based on the most severe consequence. + + Args: + most_severe_consequence (Column): Most severe consequence + first_transcript (Column): First transcript + first_protein_coding (Column): First protein coding transcript + + Returns: + Column: Variant description + """ + return ( + # When there's no transcript whatsoever: + f.when( + first_transcript.isNull(), + f.lit("Intergenic variant no gene in window"), + ) + # When the biotype of the first gene is protein coding: + .when( + first_transcript.getItem("biotype") == "protein_coding", + cls._process_protein_coding_transcript( + first_transcript, most_severe_consequence + ), + ) + # When the first gene is not protein coding, we also pass the first protein coding gene: + .otherwise( + cls._process_non_protein_coding_transcript( + most_severe_consequence, first_transcript, first_protein_coding + ) + ) + ) + + @staticmethod + def _process_consequence_term(consequence_term: Column) -> Column: + """Cleaning up consequence term: capitalizing and replacing underscores. + + Args: + consequence_term (Column): Consequence term. + + Returns: + Column: Cleaned up consequence term. + """ + last = f.when(consequence_term.contains("variant"), f.lit("")).otherwise( + " variant" + ) + return f.concat(f.regexp_replace(f.initcap(consequence_term), "_", " "), last) + + @staticmethod + def _process_overlap(transcript: Column) -> Column: + """Process overlap with gene: if the variant overlaps with the gene, return the gene name or distance. + + Args: + transcript (Column): Transcript. + + Returns: + Column: string column with overlap description. + """ + gene_label = f.when( + transcript.getField("approvedSymbol").isNotNull(), + transcript.getField("approvedSymbol"), + ).otherwise(transcript.getField("targetId")) + + return f.when( + transcript.getField("distanceFromFootprint") == 0, + # "overlapping with CCDC8" + f.concat(f.lit(" overlapping with "), gene_label), + ).otherwise( + # " 123 basepair away from CCDC8" + f.concat( + f.lit(" "), + f.format_number(transcript.getField("distanceFromFootprint"), 0), + f.lit(" basepair away from "), + gene_label, + ) + ) + + @staticmethod + def _process_aa_change(transcript: Column) -> Column: + """Extract amino acid change information from transcript when available. + + Args: + transcript (Column): Transcript. + + Returns: + Column: Amino acid change information. + """ + return f.when( + transcript.getField("aminoAcidChange").isNotNull(), + f.concat( + f.lit(", causing amio-acid change: "), + transcript.getField("aminoAcidChange"), + f.lit(" with "), + f.lower(transcript.getField("impact")), + f.lit(" impact."), + ), + ).otherwise(f.lit(".")) + + @staticmethod + def _process_lof(transcript: Column) -> Column: + """Process loss of function annotation from LOFTEE prediction. + + Args: + transcript (Column): Transcript. + + Returns: + Column: Loss of function annotation. + """ + return f.when( + transcript.getField("lofteePrediction").isNotNull() + & (transcript.getField("lofteePrediction") == "HC"), + f.lit(" A high-confidence loss-of-function variant by loftee."), + ).otherwise(f.lit("")) + + @classmethod + def _process_protein_coding_transcript( + cls: type[VariantEffectPredictorParser], + transcript: Column, + most_severe_consequence: Column, + ) -> Column: + """Extract information from the first, protein coding transcript. + + Args: + transcript (Column): Transcript. + most_severe_consequence (Column): Most severe consequence. + + Returns: + Column: Variant description. + """ + # Process consequence term: + consequence_text = cls._process_consequence_term(most_severe_consequence) + + # Does it overlap with the gene: + overlap = cls._process_overlap(transcript) + + # Does it cause amino acid change: + amino_acid_change = cls._process_aa_change(transcript) + + # Processing lof annotation: + lof_assessment = cls._process_lof(transcript) + + # Concat all together: + return f.concat(consequence_text, overlap, amino_acid_change, lof_assessment) + + @staticmethod + def _adding_biotype(transcript: Column) -> Column: + """Adding biotype information to the variant description. + + Args: + transcript (Column): Transcript. + + Returns: + Column: Biotype information. + """ + biotype = f.when( + transcript.getField("biotype").contains("gene"), + f.regexp_replace(transcript.getField("biotype"), "_", " "), + ).otherwise( + f.concat( + f.regexp_replace(transcript.getField("biotype"), "_", " "), + f.lit(" gene."), + ) + ) + + return f.concat(f.lit(", a "), biotype) + + @staticmethod + def _parse_protein_coding_transcript(transcript: Column) -> Column: + """Parse the closest, not first protein coding transcript: extract gene symbol and distance. + + Args: + transcript (Column): Transcript. + + Returns: + Column: Protein coding transcript information. + """ + gene_label = f.when( + transcript.getField("approvedSymbol").isNotNull(), + transcript.getField("approvedSymbol"), + ).otherwise(transcript.getField("targetId")) + + return f.when( + transcript.isNotNull(), + f.concat( + f.lit(" The closest protein-coding gene is "), + gene_label, + f.lit(" ("), + f.format_number(transcript.getField("distanceFromFootprint"), 0), + f.lit(" basepair away)."), + ), + ).otherwise(f.lit("")) + + @classmethod + def _process_non_protein_coding_transcript( + cls: type[VariantEffectPredictorParser], + most_severe_consequence: Column, + first_transcript: Column, + first_protein_coding: Column, + ) -> Column: + """Extract information from the first, non-protein coding transcript. + + Args: + most_severe_consequence (Column): Most severe consequence. + first_transcript (Column): First transcript. + first_protein_coding (Column): First protein coding transcript. + + Returns: + Column: Variant description. + """ + # Process consequence term: + consequence_text = cls._process_consequence_term(most_severe_consequence) + + # Does it overlap with the gene: + overlap = cls._process_overlap(first_transcript) + + # Adding biotype: + biotype = cls._adding_biotype(first_transcript) + + # Adding protein coding gene: + protein_transcript = cls._parse_protein_coding_transcript(first_protein_coding) + + # Concat all together: + return f.concat(consequence_text, overlap, biotype, protein_transcript) diff --git a/src/gentropy/datasource/gnomad/variants.py b/src/gentropy/datasource/gnomad/variants.py index 0575261c2..7540c374f 100644 --- a/src/gentropy/datasource/gnomad/variants.py +++ b/src/gentropy/datasource/gnomad/variants.py @@ -10,7 +10,7 @@ from gentropy.common.types import VariantPopulation from gentropy.config import GnomadVariantConfig, VariantIndexConfig -from gentropy.dataset.variant_index import VariantIndex +from gentropy.dataset.variant_index import InSilicoPredictorNormaliser, VariantIndex if TYPE_CHECKING: pass @@ -91,7 +91,7 @@ def as_variant_index(self: GnomADVariants) -> VariantIndex: inSilicoPredictors=hl.array( [ hl.struct( - method=hl.str("spliceai"), + method=hl.str("SpliceAI"), assessment=hl.missing(hl.tstr), score=hl.expr.functions.float32( ht.in_silico_predictors.spliceai_ds_max @@ -100,7 +100,7 @@ def as_variant_index(self: GnomADVariants) -> VariantIndex: targetId=hl.missing(hl.tstr), ), hl.struct( - method=hl.str("pangolin"), + method=hl.str("Pangolin"), assessment=hl.missing(hl.tstr), score=hl.expr.functions.float32( ht.in_silico_predictors.pangolin_largest_ds @@ -149,6 +149,13 @@ def as_variant_index(self: GnomADVariants) -> VariantIndex: "mostSevereConsequenceId": f.lit(None).cast(t.StringType()), } ) + # Normalising in silico predictor assessments: + .withColumn( + "inSilicoPredictors", + InSilicoPredictorNormaliser.normalise_in_silico_predictors( + f.col("inSilicoPredictors") + ), + ) ), _schema=VariantIndex.get_schema(), ) diff --git a/src/gentropy/gnomad_ingestion.py b/src/gentropy/gnomad_ingestion.py index 8d2cd92f9..9b4de8a0c 100644 --- a/src/gentropy/gnomad_ingestion.py +++ b/src/gentropy/gnomad_ingestion.py @@ -114,6 +114,8 @@ def __init__( # Convert data to variant index: .as_variant_index() # Write file: - .df.write.mode(session.write_mode) + .df.repartitionByRange("chromosome", "position") + .sortWithinPartitions("chromosome", "position") + .write.mode(session.write_mode) .parquet(variant_annotation_path) ) diff --git a/tests/gentropy/conftest.py b/tests/gentropy/conftest.py index 10298205d..f19c28623 100644 --- a/tests/gentropy/conftest.py +++ b/tests/gentropy/conftest.py @@ -275,7 +275,8 @@ def mock_variant_index(spark: SparkSession) -> VariantIndex: "assessment", cast(rand() as string), "score", rand(), "assessmentFlag", cast(rand() as string), - "targetId", cast(rand() as string) + "targetId", cast(rand() as string), + "normalizedScore", cast(rand() as float) ) ) """, @@ -308,7 +309,9 @@ def mock_variant_index(spark: SparkSession) -> VariantIndex: "polyphenPrediction", rand(), "consequenceScore", cast(rand() as float), "transcriptIndex", cast(rand() as integer), - "transcriptId", cast(rand() as string) + "transcriptId", cast(rand() as string), + "biotype", cast(rand() as string), + "approvedSymbol", cast(rand() as string) ) ) """, diff --git a/tests/gentropy/datasource/ensembl/test_vep_variants.py b/tests/gentropy/datasource/ensembl/test_vep_variants.py index 556a22411..f0127b9b2 100644 --- a/tests/gentropy/datasource/ensembl/test_vep_variants.py +++ b/tests/gentropy/datasource/ensembl/test_vep_variants.py @@ -7,7 +7,6 @@ import pytest from pyspark.sql import DataFrame from pyspark.sql import functions as f -from pyspark.sql import types as t from gentropy.dataset.variant_index import VariantIndex from gentropy.datasource.ensembl.vep_parser import VariantEffectPredictorParser @@ -108,33 +107,6 @@ def _setup(self: TestVEPParser, spark: SparkSession) -> None: self.raw_vep_output, 200 ) - def test_extract_variant_index_from_vep( - self: TestVEPParser, spark: SparkSession - ) -> None: - """Test if the variant index can be extracted from the VEP output.""" - variant_index = VariantEffectPredictorParser.extract_variant_index_from_vep( - spark, self.SAMPLE_VEP_DATA_PATH, hash_threshold=100 - ) - - assert isinstance( - variant_index, VariantIndex - ), "VariantIndex object not created." - in_silico_schema = t.ArrayType( - t.StructType( - [ - t.StructField("method", t.StringType(), True), - t.StructField("assessment", t.StringType(), True), - t.StructField("score", t.FloatType(), True), - t.StructField("assessmentFlag", t.StringType(), True), - t.StructField("targetId", t.StringType(), True), - ] - ) - ) - assert ( - variant_index.df.select("inSilicoPredictors").schema.fields[0].dataType - == in_silico_schema - ), "In silico schema is not correct." - def test_process(self: TestVEPParser) -> None: """Test process method.""" df = VariantEffectPredictorParser.process_vep_output(self.raw_vep_output) From a858662c41824c54954491d20bdbd0f20bfee283 Mon Sep 17 00:00:00 2001 From: Yakov Date: Wed, 20 Nov 2024 10:16:08 +0000 Subject: [PATCH 173/188] fix: r2 for lead variant is always 1 (#919) * fix: r2 for lead varaint is always 1 * fix: removing not needed quality flag * test: removing unused condition * fix: type: ignore --------- Co-authored-by: DSuveges --- src/gentropy/dataset/study_locus.py | 4 -- src/gentropy/method/ld.py | 57 ++++++++++++++++------------- src/gentropy/method/pics.py | 14 ------- tests/gentropy/method/test_pics.py | 19 ++-------- 4 files changed, 34 insertions(+), 60 deletions(-) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index e6fd06c12..10dc9c10d 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -74,7 +74,6 @@ class StudyLocusQualityCheck(Enum): LD_CLUMPED (str): Explained by a more significant variant in high LD WINDOW_CLUMPED (str): Explained by a more significant variant in the same window NO_POPULATION (str): Study does not have population annotation to resolve LD - NOT_QUALIFYING_LD_BLOCK (str): LD block does not contain variants at the required R^2 threshold FLAGGED_STUDY (str): Study has quality control flag(s) MISSING_STUDY (str): Flagging study loci if the study is not found in the study index as a reference DUPLICATED_STUDYLOCUS_ID (str): Study-locus identifier is not unique @@ -100,9 +99,6 @@ class StudyLocusQualityCheck(Enum): LD_CLUMPED = "Explained by a more significant variant in high LD" WINDOW_CLUMPED = "Explained by a more significant variant in the same window" NO_POPULATION = "Study does not have population annotation to resolve LD" - NOT_QUALIFYING_LD_BLOCK = ( - "LD block does not contain variants at the required R^2 threshold" - ) FLAGGED_STUDY = "Study has quality control flag(s)" MISSING_STUDY = "Study not found in the study index" DUPLICATED_STUDYLOCUS_ID = "Non-unique study locus identifier" diff --git a/src/gentropy/method/ld.py b/src/gentropy/method/ld.py index 64d47451d..4fe27e2ee 100644 --- a/src/gentropy/method/ld.py +++ b/src/gentropy/method/ld.py @@ -35,19 +35,13 @@ def _get_major_population(ordered_populations: Column) -> Column: major_population_size = ordered_populations["relativeSampleSize"][0] major_populations = f.filter( ordered_populations, - lambda x: x["relativeSampleSize"] == major_population_size + lambda x: x["relativeSampleSize"] == major_population_size, ) # Check if nfe (Non-Finnish European) is one of the major populations - has_nfe = f.filter( - major_populations, - lambda x: x["ldPopulation"] == "nfe" - ) + has_nfe = f.filter(major_populations, lambda x: x["ldPopulation"] == "nfe") return f.when( - (f.size(major_populations) > 1) & (f.size(has_nfe) == 1), - f.lit("nfe") - ).otherwise( - ordered_populations["ldPopulation"][0] - ) + (f.size(major_populations) > 1) & (f.size(has_nfe) == 1), f.lit("nfe") + ).otherwise(ordered_populations["ldPopulation"][0]) @staticmethod def _calculate_r2_major(ld_set: Column, major_population: Column) -> Column: @@ -65,19 +59,18 @@ def _calculate_r2_major(ld_set: Column, major_population: Column) -> Column: lambda x: f.struct( x["tagVariantId"].alias("tagVariantId"), f.filter( - x["rValues"], - lambda y: y["population"] == major_population - ).alias("rValues") - ) + x["rValues"], lambda y: y["population"] == major_population + ).alias("rValues"), + ), ) return f.transform( ld_set_with_major_pop, lambda x: f.struct( x["tagVariantId"].alias("tagVariantId"), - f.coalesce( - f.pow(x["rValues"]["r"][0], 2), f.lit(0.0) - ).alias("r2Overall") - ) + f.coalesce(f.pow(x["rValues"]["r"][0], 2), f.lit(0.0)).alias( + "r2Overall" + ), + ), ) @staticmethod @@ -160,8 +153,8 @@ def ld_annotate( studies.df.select( "studyId", order_array_of_structs_by_field( - "ldPopulationStructure", "relativeSampleSize" - ).alias("ldPopulationStructure") + "ldPopulationStructure", "relativeSampleSize" + ).alias("ldPopulationStructure"), ), on="studyId", how="left", @@ -177,10 +170,8 @@ def ld_annotate( "majorPopulation", f.when( f.col("ldPopulationStructure").isNotNull(), - cls._get_major_population( - f.col("ldPopulationStructure") - ) - ) + cls._get_major_population(f.col("ldPopulationStructure")), + ), ) # Calculate R2 using R of the major population .withColumn( @@ -189,8 +180,8 @@ def ld_annotate( f.col("ldPopulationStructure").isNotNull(), cls._calculate_r2_major( f.col("ldSet"), f.col("majorPopulation") - ) - ) + ), + ), ) .drop("ldPopulationStructure", "majorPopulation") # Filter the LD set by the R2 threshold and set to null if no LD information passes the threshold @@ -209,6 +200,20 @@ def ld_annotate( "ldSet", cls._rescue_lead_variant(f.col("ldSet"), f.col("variantId")), ) + # Ensure that the lead varaitn is always with r2==1 + .withColumn( + "ldSet", + f.expr( + """ + transform(ldSet, x -> + IF(x.tagVariantId == variantId, + named_struct('tagVariantId', x.tagVariantId, 'r2Overall', 1.0), + x + ) + ) + """ + ), + ) ), _schema=StudyLocus.get_schema(), )._qc_no_population() diff --git a/src/gentropy/method/pics.py b/src/gentropy/method/pics.py index 60e28b9a1..dd767204e 100644 --- a/src/gentropy/method/pics.py +++ b/src/gentropy/method/pics.py @@ -224,11 +224,6 @@ def finemap( ) ) - # Flagging expression for loci that do not qualify for PICS: - non_picsable_expr = ( - f.size(f.filter(f.col("ldSet"), lambda x: x.r2Overall >= 0.5)) == 0 - ) - # Registering the UDF to be used in the pipeline: finemap_udf = f.udf( lambda ld_set, neglog_p: cls._finemap(ld_set, neglog_p, k), @@ -271,15 +266,6 @@ def finemap( ), ), ) - # Flagging loci that do not qualify for PICS: - .withColumn( - "qualityControls", - StudyLocus.update_quality_flag( - f.col("qualityControls"), - non_picsable_expr, - StudyLocusQualityCheck.NOT_QUALIFYING_LD_BLOCK, - ), - ) # Flagging all PICS loci with OUT_OF_SAMPLE_LD flag: .withColumn( "qualityControls", diff --git a/tests/gentropy/method/test_pics.py b/tests/gentropy/method/test_pics.py index d5a8eb5d0..9c1fc80b3 100644 --- a/tests/gentropy/method/test_pics.py +++ b/tests/gentropy/method/test_pics.py @@ -38,21 +38,6 @@ def test_finemap_null_ld_set( observed_df = PICS.finemap(mock_study_locus).df.limit(1) assert observed_df.collect()[0]["locus"] is None - def test_finemap_quality_control( - self: TestFinemap, mock_study_locus: StudyLocus - ) -> None: - """Test that we add a `empty locus` flag when any variant in the locus meets PICS criteria.""" - mock_study_locus.df = mock_study_locus.df.withColumn( - # Association with an empty ldSet - "ldSet", - f.when(f.col("ldSet").isNull(), f.array()).otherwise(f.col("ldSet")), - ).filter(f.size("ldSet") == 0) - observed_df = PICS.finemap(mock_study_locus).df.limit(1) - qc_flag = "LD block does not contain variants at the required R^2 threshold" - assert ( - qc_flag in observed_df.collect()[0]["qualityControls"] - ), "Empty locus QC flag is missing." - def test__finemap_udf() -> None: """Test the _finemap UDF with a simple case.""" @@ -75,7 +60,9 @@ def test__finemap_udf() -> None: "posteriorProbability": 0.9288304011311763, }, ] - for idx, tag in enumerate(result): # type: ignore + + assert result is not None, "The result of _finemap should not be None" + for idx, tag in enumerate(result): # assert both dictionaries have the same content regardless of its order assert tag == expected[idx] From b6303d571d19cb6262386c5d02adfdfefb7184be Mon Sep 17 00:00:00 2001 From: Daniel-Considine <113430683+Daniel-Considine@users.noreply.github.com> Date: Thu, 21 Nov 2024 13:15:22 +0000 Subject: [PATCH 174/188] feat: reverting to using finngen 95% credible sets (#922) * feat: reverting to 95% finngen credible sets * fix: updating tests and column names --- .../datasource/finngen/finemapping.py | 12 +++++----- .../finngen_credset_summary_sample.tsv | 24 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/gentropy/datasource/finngen/finemapping.py b/src/gentropy/datasource/finngen/finemapping.py index e0f39689d..e4c1f776f 100644 --- a/src/gentropy/datasource/finngen/finemapping.py +++ b/src/gentropy/datasource/finngen/finemapping.py @@ -105,7 +105,7 @@ class FinnGenFinemapping: [ StructField("trait", StringType(), True), StructField("region", StringType(), True), - StructField("cs_number", StringType(), True), + StructField("cs", StringType(), True), StructField("cs_log10bf", DoubleType(), True), StructField("cs_avg_r2", DoubleType(), True), StructField("cs_min_r2", DoubleType(), True), @@ -184,7 +184,7 @@ class FinnGenFinemapping: summary_hail_schema: hl.tstruct = hl.tstruct( trait=hl.tstr, region=hl.tstr, - cs_number=hl.tstr, + cs=hl.tstr, cs_log10bf=hl.tfloat64, cs_avg_r2=hl.tfloat64, cs_min_r2=hl.tfloat64, @@ -245,7 +245,7 @@ def from_finngen_susie_finemapping( The finngen_susie_finemapping_cs_summary_files are files that Contains credible set summaries from SuSiE fine-mapping for all genome-wide significant regions with following schema: - trait: phenotype - region: region for which the fine-mapping was run. - - cs_number: running number for independent credible sets in a region, assigned to 99% PIP + - cs: running number for independent credible sets in a region, assigned to 95% PIP - cs_log10bf: Log10 bayes factor of comparing the solution of this model (cs independent credible sets) to cs -1 credible sets - cs_avg_r2: Average correlation R2 between variants in the credible set - cs_min_r2: minimum r2 between variants in the credible set @@ -298,7 +298,7 @@ def from_finngen_susie_finemapping( # Drop rows which don't have proper position. snps_df.filter(f.col("position").cast(t.IntegerType()).isNotNull()) # Drop non credible set SNPs: - .filter(f.col("cs_99").cast(t.IntegerType()) > 0) + .filter(f.col("cs").cast(t.IntegerType()) > 0) .select( # Add study idenfitier. f.concat_ws("_", f.lit(finngen_release_prefix), f.col("trait")) @@ -307,7 +307,7 @@ def from_finngen_susie_finemapping( f.col("region"), # Add variant information. f.regexp_replace(f.col("v"), ":", "_").alias("variantId"), - f.col("cs_99").cast("integer").alias("credibleSetIndex"), + f.col("cs").cast("integer").alias("credibleSetIndex"), f.regexp_replace(f.col("chromosome"), "^chr", "") .cast(t.StringType()) .alias("chromosome"), @@ -437,7 +437,7 @@ def from_finngen_susie_finemapping( cs_summary_df.select( f.col("region"), f.col("trait"), - f.col("cs_number").cast("integer").alias("credibleSetIndex"), + f.col("cs").cast("integer").alias("credibleSetIndex"), f.col("cs_log10bf").cast("double").alias("credibleSetlog10BF"), f.col("cs_avg_r2").cast("double").alias("purityMeanR2"), f.col("cs_min_r2").cast("double").alias("purityMinR2"), diff --git a/tests/gentropy/data_samples/finngen_credset_summary_sample.tsv b/tests/gentropy/data_samples/finngen_credset_summary_sample.tsv index ca973d8fc..6ba610807 100644 --- a/tests/gentropy/data_samples/finngen_credset_summary_sample.tsv +++ b/tests/gentropy/data_samples/finngen_credset_summary_sample.tsv @@ -1,12 +1,12 @@ -trait region cs cs_log10bf cs_avg_r2 cs_min_r2 low_purity cs_size good_cs cs_id v rsid p beta sd prob cs_specific_prob most_severe gene_most_severe cs_number -H7_HORDEOLUM chr1:156514826-159514826 1 1.46467818637 1.0 1.0 False 1 True chr1:156514826-159514826_1 1:158014826:C:T chr1_158014826_C_T 2.54378e-08 1.88207 0.446902952132688 0.971307293706789 0.971297199099758 intron_variant KIRREL1 1 -G6_MIGRAINE_NO_AURA chr6:11403725-14403725 1 1.36783669104 0.760565917219 0.140997997009 True 19 False chr6:11403725-14403725_1 6:12903725:A:G chr6_12903725_A_G 3.61468e-08 -0.0981548 0.0320646607273439 0.90799209537494 0.907981878613055 intron_variant PHACTR1 1 -G6_MIGRAINE_NO_AURA chr12:55647065-58647065 1 1.66017507931 0.804011708889 0.804011708889 False 2 True chr12:55647065-58647065_1 12:57147065:C:G chr12_57147065_C_G 2.29261e-08 0.105129 0.0353895280800414 0.898218191602507 0.898218191602507 intron_variant LRP1 1 -G6_MIGRAINE_NO_AURA chr12:2906208-5906208 1 1.87518878137 0.762329739726 0.382297363204 False 11 True chr12:2906208-5906208_1 12:4406208:T:C chr12_4406208_T_C 3.52631e-08 -0.097881 0.0413367587403912 0.244577820826502 0.244317125809489 downstream_gene_variant AC008012.1 1 -G6_MIGRAINE_NO_AURA chr6:95113283-98113283 1 3.00789560938 0.917456939478 0.788796212164 False 43 True chr6:95113283-98113283_1 6:96613283:C:T chr6_96613283_C_T 5.56352e-09 0.110915 0.0357600937089016 0.122046568781673 0.122046568781673 intron_variant FHL5 1 -K11_APHTA_RECUR chr2:203986459-206986459 1 1.53062868484 1.0 1.0 False 1 True chr2:203986459-206986459_1 2:205486459:A:G chr2_205486459_A_G 2.5015e-08 0.225777 0.0591109424642432 0.956659971956856 0.956659971956856 intron_variant PARD3B 1 -DM_NEPHROPATHY_EXMORE chr2:224821033-227821033 1 5.02824362893 0.984196974601 0.661904280625 False 50 True chr2:224821033-227821033_1 2:226321033:T:C chr2_226321033_T_C 2.42008e-10 -0.144142 0.0211352709908576 0.0225587152970605 0.0225587152970605 intergenic_variant 1 -DM_NEPHROPATHY_EXMORE chr11:660994-3660994 1 2.7116635835 0.870906412027 0.809751619044 False 3 True chr11:660994-3660994_1 11:2160994:A:T chr11_2160994_A_T 3.15043e-09 0.168647 0.08489428697954 0.47925285536432 0.479123015291369 splice_region_variant INS-IGF2 1 -DM_NEPHROPATHY_EXMORE chr12:2775678-5775678 1 2.25532985077 1.0 1.0 False 1 True chr12:2775678-5775678_1 12:4275678:T:G chr12_4275678_T_G 3.72503e-09 -0.433585 0.0763706359152554 0.996634114263842 0.996632992137397 intron_variant CCND2 1 -AB1_EBV chr6:1412516-4412516 1 4.4609149402 0.678175582701 0.468032488641 False 4 True chr6:1412516-4412516_1 6:2912516:CCA:C chr6_2912516_CCA_C 4.90908e-11 0.196413 0.0943211787286286 0.367291808726716 0.367106415849495 upstream_gene_variant AL133351.2 1 -AB1_EBV chr20:16016584-19016584 1 1.27192551601 1.0 1.0 False 1 True chr20:16016584-19016584_1 20:17516584:C:T chr20_17516584_C_T 3.72152e-08 2.00466 0.504004034147034 0.963905759488349 0.963892713745976 intron_variant BFSP1 1 +trait region cs cs_log10bf cs_avg_r2 cs_min_r2 low_purity cs_size good_cs cs_id v rsid p beta sd prob cs_specific_prob most_severe gene_most_severe +H7_HORDEOLUM chr1:156514826-159514826 1 1.46467818637 1.0 1.0 False 1 True chr1:156514826-159514826_1 1:158014826:C:T chr1_158014826_C_T 2.54378e-08 1.88207 0.446902952132688 0.971307293706789 0.971297199099758 intron_variant KIRREL1 +G6_MIGRAINE_NO_AURA chr6:11403725-14403725 1 1.36783669104 0.760565917219 0.140997997009 True 19 False chr6:11403725-14403725_1 6:12903725:A:G chr6_12903725_A_G 3.61468e-08 -0.0981548 0.0320646607273439 0.90799209537494 0.907981878613055 intron_variant PHACTR1 +G6_MIGRAINE_NO_AURA chr12:55647065-58647065 1 1.66017507931 0.804011708889 0.804011708889 False 2 True chr12:55647065-58647065_1 12:57147065:C:G chr12_57147065_C_G 2.29261e-08 0.105129 0.0353895280800414 0.898218191602507 0.898218191602507 intron_variant LRP1 +G6_MIGRAINE_NO_AURA chr12:2906208-5906208 1 1.87518878137 0.762329739726 0.382297363204 False 11 True chr12:2906208-5906208_1 12:4406208:T:C chr12_4406208_T_C 3.52631e-08 -0.097881 0.0413367587403912 0.244577820826502 0.244317125809489 downstream_gene_variant AC008012.1 +G6_MIGRAINE_NO_AURA chr6:95113283-98113283 1 3.00789560938 0.917456939478 0.788796212164 False 43 True chr6:95113283-98113283_1 6:96613283:C:T chr6_96613283_C_T 5.56352e-09 0.110915 0.0357600937089016 0.122046568781673 0.122046568781673 intron_variant FHL5 +K11_APHTA_RECUR chr2:203986459-206986459 1 1.53062868484 1.0 1.0 False 1 True chr2:203986459-206986459_1 2:205486459:A:G chr2_205486459_A_G 2.5015e-08 0.225777 0.0591109424642432 0.956659971956856 0.956659971956856 intron_variant PARD3B +DM_NEPHROPATHY_EXMORE chr2:224821033-227821033 1 5.02824362893 0.984196974601 0.661904280625 False 50 True chr2:224821033-227821033_1 2:226321033:T:C chr2_226321033_T_C 2.42008e-10 -0.144142 0.0211352709908576 0.0225587152970605 0.0225587152970605 intergenic_variant +DM_NEPHROPATHY_EXMORE chr11:660994-3660994 1 2.7116635835 0.870906412027 0.809751619044 False 3 True chr11:660994-3660994_1 11:2160994:A:T chr11_2160994_A_T 3.15043e-09 0.168647 0.08489428697954 0.47925285536432 0.479123015291369 splice_region_variant INS-IGF2 +DM_NEPHROPATHY_EXMORE chr12:2775678-5775678 1 2.25532985077 1.0 1.0 False 1 True chr12:2775678-5775678_1 12:4275678:T:G chr12_4275678_T_G 3.72503e-09 -0.433585 0.0763706359152554 0.996634114263842 0.996632992137397 intron_variant CCND2 +AB1_EBV chr6:1412516-4412516 1 4.4609149402 0.678175582701 0.468032488641 False 4 True chr6:1412516-4412516_1 6:2912516:CCA:C chr6_2912516_CCA_C 4.90908e-11 0.196413 0.0943211787286286 0.367291808726716 0.367106415849495 upstream_gene_variant AL133351.2 +AB1_EBV chr20:16016584-19016584 1 1.27192551601 1.0 1.0 False 1 True chr20:16016584-19016584_1 20:17516584:C:T chr20_17516584_C_T 3.72152e-08 2.00466 0.504004034147034 0.963905759488349 0.963892713745976 intron_variant BFSP1 From 05e47a384b03bdcb7dd3999b6645775db99da9eb Mon Sep 17 00:00:00 2001 From: Daniel-Considine <113430683+Daniel-Considine@users.noreply.github.com> Date: Thu, 21 Nov 2024 13:29:49 +0000 Subject: [PATCH 175/188] feat: changing studylocus validation to 95 percent credible sets (#921) * feat: changing studylocus validation to 95 percent credible sets * fix: updating comment in code to reflect 95% credset * fix: removing credset number of partitions * fix: flag name --------- Co-authored-by: Yakov Tsepilov --- src/gentropy/dataset/study_locus.py | 2 +- src/gentropy/study_locus_validation.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/gentropy/dataset/study_locus.py b/src/gentropy/dataset/study_locus.py index 10dc9c10d..5abb30c8e 100644 --- a/src/gentropy/dataset/study_locus.py +++ b/src/gentropy/dataset/study_locus.py @@ -113,7 +113,7 @@ class StudyLocusQualityCheck(Enum): EXPLAINED_BY_SUSIE = "Study locus in region explained by a SuSiE credible set" OUT_OF_SAMPLE_LD = "Study locus finemapped without in-sample LD reference" ABNORMAL_PIPS = ( - "Study locus with a sum of PIPs that not in the expected range [0.99,1]" + "Study locus with a sum of PIPs that not in the expected range [0.95,1]" ) INVALID_CHROMOSOME = "Chromosome not in 1:22, X, Y, XY or MT" TOP_HIT_AND_SUMMARY_STATS = ( diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index 4c0c8c4c5..cf36a4389 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -46,24 +46,24 @@ def __init__( .annotate_study_type(study_index) # Add study type to study locus .qc_redundant_top_hits_from_PICS() # Flagging top hits from studies with PICS summary statistics .qc_explained_by_SuSiE() # Flagging credible sets in regions explained by SuSiE - # Flagging credible sets with PIP > 1 or PIP < 0.99 + # Annotates credible intervals and filter to only keep 95% credible sets + .filter_credible_set(credible_interval=CredibleInterval.IS95) + # Flagging credible sets with PIP > 1 or PIP < 0.95 .qc_abnormal_pips( - sum_pips_lower_threshold=0.99, sum_pips_upper_threshold=1.0001 + sum_pips_lower_threshold=0.95, sum_pips_upper_threshold=1.0001 ) - # Annotates credible intervals and filter to only keep 99% credible sets - .filter_credible_set(credible_interval=CredibleInterval.IS99) # Annotate credible set confidence: .assign_confidence() ).persist() # we will need this for 2 types of outputs # Valid study locus partitioned to simplify the finding of overlaps - study_locus_with_qc.valid_rows( - invalid_qc_reasons, invalid=True - ).df.repartitionByRange("chromosome", "position").sortWithinPartitions( + study_locus_with_qc.valid_rows(invalid_qc_reasons).df.repartitionByRange( "chromosome", "position" - ).write.mode(session.write_mode).parquet(invalid_study_locus_path) - - # Infalid study locus - study_locus_with_qc.valid_rows(invalid_qc_reasons).df.write.mode( + ).sortWithinPartitions("chromosome", "position").write.mode( session.write_mode ).parquet(valid_study_locus_path) + + # Invalid study locus + study_locus_with_qc.valid_rows(invalid_qc_reasons, invalid=True).df.write.mode( + session.write_mode + ).parquet(invalid_study_locus_path) From 8a83ec692b5426870941c12fb3c20bca7c9fc499 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 22 Nov 2024 10:40:15 +0000 Subject: [PATCH 176/188] chore: pre-commit autoupdate (#918) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.7.3 → v0.7.4](https://github.com/astral-sh/ruff-pre-commit/compare/v0.7.3...v0.7.4) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2c9da9926..5bf5e6239 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ ci: skip: [poetry-lock] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.3 + rev: v0.7.4 hooks: - id: ruff args: From 008aa3898cfbb75281b5a88be35abef216aa2a9f Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Tue, 26 Nov 2024 14:08:36 +0000 Subject: [PATCH 177/188] chore(gnomad): updating GnomAD version to 4.1 from 4.0 + using joint frequencies (#929) * fix: gnomad 4.1 frequencies * fix: removing in-silico extraction in gnomad * fix: removing in silico predictor ingestion from gnomad pre-process --- src/gentropy/config.py | 4 ++- src/gentropy/datasource/gnomad/variants.py | 41 +++------------------- src/gentropy/gnomad_ingestion.py | 2 ++ 3 files changed, 9 insertions(+), 38 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index a84dbd89e..b32647acf 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -387,7 +387,9 @@ class GnomadVariantConfig(StepConfig): } ) variant_annotation_path: str = MISSING - gnomad_genomes_path: str = "gs://gcp-public-data--gnomad/release/4.0/ht/genomes/gnomad.genomes.v4.0.sites.ht/" + gnomad_genomes_path: str = ( + "gs://gcp-public-data--gnomad/release/4.1/ht/joint/gnomad.joint.v4.1.sites.ht/" + ) gnomad_variant_populations: list[str] = field( default_factory=lambda: [ "afr", # African-American diff --git a/src/gentropy/datasource/gnomad/variants.py b/src/gentropy/datasource/gnomad/variants.py index 7540c374f..0731181b7 100644 --- a/src/gentropy/datasource/gnomad/variants.py +++ b/src/gentropy/datasource/gnomad/variants.py @@ -10,7 +10,7 @@ from gentropy.common.types import VariantPopulation from gentropy.config import GnomadVariantConfig, VariantIndexConfig -from gentropy.dataset.variant_index import InSilicoPredictorNormaliser, VariantIndex +from gentropy.dataset.variant_index import VariantIndex if TYPE_CHECKING: pass @@ -84,32 +84,11 @@ def as_variant_index(self: GnomADVariants) -> VariantIndex: ).map( lambda p: hl.struct( populationName=p, - alleleFrequency=ht.freq[ht.globals.freq_index_dict[p]].AF, + alleleFrequency=ht.joint.freq[ + ht.joint_globals.freq_index_dict[p] + ].AF, ) ), - # Extract in silico predictors: - inSilicoPredictors=hl.array( - [ - hl.struct( - method=hl.str("SpliceAI"), - assessment=hl.missing(hl.tstr), - score=hl.expr.functions.float32( - ht.in_silico_predictors.spliceai_ds_max - ), - assessmentFlag=hl.missing(hl.tstr), - targetId=hl.missing(hl.tstr), - ), - hl.struct( - method=hl.str("Pangolin"), - assessment=hl.missing(hl.tstr), - score=hl.expr.functions.float32( - ht.in_silico_predictors.pangolin_largest_ds - ), - assessmentFlag=hl.missing(hl.tstr), - targetId=hl.missing(hl.tstr), - ), - ] - ), # Extract cross references to GnomAD: dbXrefs=hl.array( [ @@ -133,11 +112,6 @@ def as_variant_index(self: GnomADVariants) -> VariantIndex: .to_spark(flatten=False) .withColumns( { - # Once The parsing is done, we have to drop objects with no score from inSilicoPredictors: - "inSilicoPredictors": f.filter( - f.col("inSilicoPredictors"), - lambda predictor: predictor["score"].isNotNull(), - ), # Generate a variantId that is hashed for long variant ids: "variantId": VariantIndex.hash_long_variant_ids( f.col("variantId"), @@ -149,13 +123,6 @@ def as_variant_index(self: GnomADVariants) -> VariantIndex: "mostSevereConsequenceId": f.lit(None).cast(t.StringType()), } ) - # Normalising in silico predictor assessments: - .withColumn( - "inSilicoPredictors", - InSilicoPredictorNormaliser.normalise_in_silico_predictors( - f.col("inSilicoPredictors") - ), - ) ), _schema=VariantIndex.get_schema(), ) diff --git a/src/gentropy/gnomad_ingestion.py b/src/gentropy/gnomad_ingestion.py index 9b4de8a0c..d930b54c6 100644 --- a/src/gentropy/gnomad_ingestion.py +++ b/src/gentropy/gnomad_ingestion.py @@ -105,6 +105,8 @@ def __init__( gnomad_genomes_path, variant_annotation_path ) + session.logger.info("Gnomad variant annotation path:") + session.logger.info(variant_annotation_path) # Parse variant info from source. ( GnomADVariants( From 4837a4b4add387ac9e1edb3fba7fbbbea712bc80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Wed, 27 Nov 2024 10:07:42 +0000 Subject: [PATCH 178/188] feat(gold_standard): add traitFromSourceMappedId to schema (#924) * feat(gold_standard): add traitFromSourceMappedId to schema * chore: adapt tests * feat(feature_matrix): consider `traitFromSourceMappedId` a static column * feat(feature_matrix): consider `traitFromSourceMappedId` an optional column --- src/gentropy/assets/schemas/l2g_gold_standard.json | 6 ++++++ src/gentropy/dataset/l2g_feature_matrix.py | 2 ++ tests/gentropy/dataset/test_l2g_feature_matrix.py | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/gentropy/assets/schemas/l2g_gold_standard.json b/src/gentropy/assets/schemas/l2g_gold_standard.json index 6af921d61..6ba715963 100644 --- a/src/gentropy/assets/schemas/l2g_gold_standard.json +++ b/src/gentropy/assets/schemas/l2g_gold_standard.json @@ -25,6 +25,12 @@ "nullable": false, "metadata": {} }, + { + "name": "traitFromSourceMappedId", + "type": "string", + "nullable": true, + "metadata": {} + }, { "name": "goldStandardSet", "type": "string", diff --git a/src/gentropy/dataset/l2g_feature_matrix.py b/src/gentropy/dataset/l2g_feature_matrix.py index f59e1e725..8c3d97e88 100644 --- a/src/gentropy/dataset/l2g_feature_matrix.py +++ b/src/gentropy/dataset/l2g_feature_matrix.py @@ -39,6 +39,8 @@ def __init__( self.fixed_cols = ["studyLocusId", "geneId"] if self.with_gold_standard: self.fixed_cols.append("goldStandardSet") + if "traitFromSourceMappedId" in _df.columns: + self.fixed_cols.append("traitFromSourceMappedId") self.features_list = features_list or [ col for col in _df.columns if col not in self.fixed_cols diff --git a/tests/gentropy/dataset/test_l2g_feature_matrix.py b/tests/gentropy/dataset/test_l2g_feature_matrix.py index 4fe338254..6677d123e 100644 --- a/tests/gentropy/dataset/test_l2g_feature_matrix.py +++ b/tests/gentropy/dataset/test_l2g_feature_matrix.py @@ -87,7 +87,7 @@ def _setup(self: TestFromFeaturesList, spark: SparkSession) -> None: """Setup fixture.""" self.sample_gold_standard = L2GGoldStandard( _df=spark.createDataFrame( - [(1, "var1", "gwas1", "g1", "positive", ["a_source"])], + [(1, "var1", "gwas1", "g1", "efo1", "positive", ["a_source"])], L2GGoldStandard.get_schema(), ), _schema=L2GGoldStandard.get_schema(), From 7b3bfade5cf39b890346327a6e4c0f8e94990184 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Wed, 27 Nov 2024 14:31:49 +0100 Subject: [PATCH 179/188] feat: coalescing the datasets (#932) Co-authored-by: Szymon Szyszkowski --- src/gentropy/biosample_index.py | 13 ++++++++++--- src/gentropy/colocalisation.py | 6 +++--- src/gentropy/common/session.py | 3 +++ src/gentropy/config.py | 1 + src/gentropy/gene_index.py | 5 ++++- src/gentropy/study_locus_validation.py | 8 ++++---- src/gentropy/study_validation.py | 12 ++++++------ src/gentropy/variant_index.py | 4 +++- 8 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/gentropy/biosample_index.py b/src/gentropy/biosample_index.py index e0b5e9b10..a6e8b5223 100644 --- a/src/gentropy/biosample_index.py +++ b/src/gentropy/biosample_index.py @@ -1,4 +1,5 @@ """Step to generate biosample index dataset.""" + from __future__ import annotations from gentropy.common.session import Session @@ -28,10 +29,16 @@ def __init__( efo_input_path (str): Input efo dataset path. biosample_index_path (str): Output gene index dataset path. """ - cell_ontology_index = extract_ontology_from_json(cell_ontology_input_path, session.spark) + cell_ontology_index = extract_ontology_from_json( + cell_ontology_input_path, session.spark + ) uberon_index = extract_ontology_from_json(uberon_input_path, session.spark) - efo_index = extract_ontology_from_json(efo_input_path, session.spark).retain_rows_with_ancestor_id(["CL_0000000"]) + efo_index = extract_ontology_from_json( + efo_input_path, session.spark + ).retain_rows_with_ancestor_id(["CL_0000000"]) biosample_index = cell_ontology_index.merge_indices([uberon_index, efo_index]) - biosample_index.df.write.mode(session.write_mode).parquet(biosample_index_path) + biosample_index.df.coalesce(session.output_partitions).write.mode( + session.write_mode + ).parquet(biosample_index_path) diff --git a/src/gentropy/colocalisation.py b/src/gentropy/colocalisation.py index 9682a8ed9..6a4568397 100644 --- a/src/gentropy/colocalisation.py +++ b/src/gentropy/colocalisation.py @@ -70,9 +70,9 @@ def __init__( coloc = partial(coloc, **colocalisation_method_params) colocalisation_results = coloc(overlaps) # Load - colocalisation_results.df.write.mode(session.write_mode).parquet( - f"{coloc_path}/{colocalisation_method.lower()}" - ) + colocalisation_results.df.coalesce(session.output_partitions).write.mode( + session.write_mode + ).parquet(f"{coloc_path}/{colocalisation_method.lower()}") @classmethod def _get_colocalisation_class( diff --git a/src/gentropy/common/session.py b/src/gentropy/common/session.py index 297903629..3a8ad4af7 100644 --- a/src/gentropy/common/session.py +++ b/src/gentropy/common/session.py @@ -24,6 +24,7 @@ def __init__( # noqa: D107 hail_home: str | None = None, start_hail: bool = False, extended_spark_conf: dict[str, str] | None = None, + output_partitions: int = 200, ) -> None: """Initialises spark session and logger. @@ -34,6 +35,7 @@ def __init__( # noqa: D107 hail_home (str | None): Path to Hail installation. Defaults to None. start_hail (bool): Whether to start Hail. Defaults to False. extended_spark_conf (dict[str, str] | None): Extended Spark configuration. Defaults to None. + output_partitions (int): Number of partitions for output datasets. Defaults to 200. """ merged_conf = self._create_merged_config( start_hail, hail_home, extended_spark_conf @@ -53,6 +55,7 @@ def __init__( # noqa: D107 self.start_hail = start_hail if start_hail: hl.init(sc=self.spark.sparkContext, log="/dev/null") + self.output_partitions = output_partitions def _default_config(self: Session) -> SparkConf: """Default spark configuration. diff --git a/src/gentropy/config.py b/src/gentropy/config.py index b32647acf..65fdb5897 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -18,6 +18,7 @@ class SessionConfig: spark_uri: str = "local[*]" hail_home: str = os.path.dirname(hail_location) extended_spark_conf: dict[str, str] | None = field(default_factory=dict[str, str]) + output_partitions: int = 200 _target_: str = "gentropy.common.session.Session" diff --git a/src/gentropy/gene_index.py b/src/gentropy/gene_index.py index ad8e95083..0a317d077 100644 --- a/src/gentropy/gene_index.py +++ b/src/gentropy/gene_index.py @@ -1,4 +1,5 @@ """Step to generate gene index dataset.""" + from __future__ import annotations from gentropy.common.session import Session @@ -28,4 +29,6 @@ def __init__( # Transform gene_index = OpenTargetsTarget.as_gene_index(platform_target) # Load - gene_index.df.write.mode(session.write_mode).parquet(gene_index_path) + gene_index.df.coalesce(session.output_partitions).write.mode( + session.write_mode + ).parquet(gene_index_path) diff --git a/src/gentropy/study_locus_validation.py b/src/gentropy/study_locus_validation.py index cf36a4389..ce2201f80 100644 --- a/src/gentropy/study_locus_validation.py +++ b/src/gentropy/study_locus_validation.py @@ -58,12 +58,12 @@ def __init__( # Valid study locus partitioned to simplify the finding of overlaps study_locus_with_qc.valid_rows(invalid_qc_reasons).df.repartitionByRange( - "chromosome", "position" + session.output_partitions, "chromosome", "position" ).sortWithinPartitions("chromosome", "position").write.mode( session.write_mode ).parquet(valid_study_locus_path) # Invalid study locus - study_locus_with_qc.valid_rows(invalid_qc_reasons, invalid=True).df.write.mode( - session.write_mode - ).parquet(invalid_study_locus_path) + study_locus_with_qc.valid_rows(invalid_qc_reasons, invalid=True).df.coalesce( + session.output_partitions + ).write.mode(session.write_mode).parquet(invalid_study_locus_path) diff --git a/src/gentropy/study_validation.py b/src/gentropy/study_validation.py index 08f601f1e..3d2fdd060 100644 --- a/src/gentropy/study_validation.py +++ b/src/gentropy/study_validation.py @@ -71,10 +71,10 @@ def __init__( ) # Flagging QTL studies with invalid biosamples ).persist() # we will need this for 2 types of outputs - study_index_with_qc.valid_rows(invalid_qc_reasons, invalid=True).df.write.mode( - session.write_mode - ).parquet(invalid_study_index_path) + study_index_with_qc.valid_rows(invalid_qc_reasons, invalid=True).df.coalesce( + session.output_partitions + ).write.mode(session.write_mode).parquet(invalid_study_index_path) - study_index_with_qc.valid_rows(invalid_qc_reasons).df.write.mode( - session.write_mode - ).parquet(valid_study_index_path) + study_index_with_qc.valid_rows(invalid_qc_reasons).df.coalesce( + session.output_partitions + ).write.mode(session.write_mode).parquet(valid_study_index_path) diff --git a/src/gentropy/variant_index.py b/src/gentropy/variant_index.py index 9eac684b2..ae7efa5c4 100644 --- a/src/gentropy/variant_index.py +++ b/src/gentropy/variant_index.py @@ -56,7 +56,9 @@ def __init__( variant_index = variant_index.add_annotation(annotations) ( - variant_index.df.repartitionByRange("chromosome", "position") + variant_index.df.repartitionByRange( + session.output_partitions, "chromosome", "position" + ) .sortWithinPartitions("chromosome", "position") .write.mode(session.write_mode) .parquet(variant_index_path) From d2c741713a2dc66049be41033eb2ab9f87670901 Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Wed, 27 Nov 2024 15:29:05 +0000 Subject: [PATCH 180/188] chore(vep): Ensembl version update (#931) * chore: vep version bump * fix: bump version to most recent --------- Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- src/vep/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vep/Dockerfile b/src/vep/Dockerfile index ade55e3da..6b2fef42d 100644 --- a/src/vep/Dockerfile +++ b/src/vep/Dockerfile @@ -1,4 +1,4 @@ -FROM ensemblorg/ensembl-vep:release_111.0 +FROM ensemblorg/ensembl-vep:release_113.3 USER root From 9a9fb7b91f8fae56e7fdc54cfb313f70c6cd8261 Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Wed, 27 Nov 2024 16:40:25 +0100 Subject: [PATCH 181/188] feat: coalesce l2g fm and predictions (#934) Co-authored-by: Szymon Szyszkowski --- src/gentropy/l2g.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 548368967..16922ef78 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -89,7 +89,9 @@ def __init__( fm = credible_set.filter(f.col("studyType") == "gwas").build_feature_matrix( features_list, features_input_loader ) - fm._df.write.mode(session.write_mode).parquet(feature_matrix_path) + fm._df.coalesce(session.output_partitions).write.mode( + session.write_mode + ).parquet(feature_matrix_path) class LocusToGeneStep: @@ -283,9 +285,9 @@ def run_predict(self) -> None: ) predictions.filter( f.col("score") >= self.l2g_threshold - ).add_locus_to_gene_features(self.feature_matrix).df.write.mode( - self.session.write_mode - ).parquet(self.predictions_path) + ).add_locus_to_gene_features(self.feature_matrix).df.coalesce( + self.session.output_partitions + ).write.mode(self.session.write_mode).parquet(self.predictions_path) self.session.logger.info("L2G predictions saved successfully.") def run_train(self) -> None: @@ -378,6 +380,7 @@ def __init__( locus_to_gene_prediction.to_disease_target_evidence( credible_sets, study_index, locus_to_gene_threshold ) + .coalesce(session.output_partitions) .write.mode(session.write_mode) .option("compression", "gzip") .json(evidence_output_path) From 8595e5b1deecb80719fed6ff8053c55a4d9eb15e Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Wed, 27 Nov 2024 16:54:29 +0100 Subject: [PATCH 182/188] feat: allow building package from tag (#930) * feat: allow building package from tag * fix: indent --------- Co-authored-by: Szymon Szyszkowski --- Makefile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1d79d35fd..377661299 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,15 @@ PROJECT_ID ?= open-targets-genetics-dev REGION ?= europe-west1 APP_NAME ?= $$(cat pyproject.toml | grep -m 1 "name" | cut -d" " -f3 | sed 's/"//g') -REF ?= $$(git rev-parse --abbrev-ref HEAD) PACKAGE_VERSION ?= $$(poetry version --short) +# NOTE: git rev-parse will always return the HEAD if it sits in the tag, +# this way we can distinguish the tag vs branch name +ifeq ($(shell git rev-parse --abbrev-ref HEAD)),HEAD) + REF := $(shell git rev-parse --abbrev-ref HEAD) +else + REF := $(shell git describe --exact-match --tags) +endif + CLEAN_PACKAGE_VERSION := $(shell echo "$(PACKAGE_VERSION)" | tr -cd '[:alnum:]') BUCKET_NAME=gs://genetics_etl_python_playground/initialisation/${APP_NAME}/${REF} From 19219dd116493050648a80f00140c0e1fab76989 Mon Sep 17 00:00:00 2001 From: Daniel Suveges Date: Wed, 27 Nov 2024 16:50:08 +0000 Subject: [PATCH 183/188] feat: adding GERP conservation score to variant annotation (#933) * feat: adding GERP conservation score to variant annotation * fix: typo --------- Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> --- .../assets/schemas/vep_json_output.json | 12 +++++++ src/gentropy/dataset/variant_index.py | 33 +++++++++++++++++++ src/gentropy/datasource/ensembl/vep_parser.py | 12 +++++++ 3 files changed, 57 insertions(+) diff --git a/src/gentropy/assets/schemas/vep_json_output.json b/src/gentropy/assets/schemas/vep_json_output.json index 674788407..14aae6b84 100644 --- a/src/gentropy/assets/schemas/vep_json_output.json +++ b/src/gentropy/assets/schemas/vep_json_output.json @@ -20,6 +20,12 @@ "containsNull": true, "elementType": { "fields": [ + { + "metadata": {}, + "name": "conservation", + "nullable": true, + "type": "double" + }, { "metadata": {}, "name": "hgvsg", @@ -294,6 +300,12 @@ "containsNull": true, "elementType": { "fields": [ + { + "metadata": {}, + "name": "conservation", + "nullable": true, + "type": "double" + }, { "metadata": {}, "name": "alphamissense", diff --git a/src/gentropy/dataset/variant_index.py b/src/gentropy/dataset/variant_index.py index 4092ca961..649b9f3ff 100644 --- a/src/gentropy/dataset/variant_index.py +++ b/src/gentropy/dataset/variant_index.py @@ -368,6 +368,7 @@ def resolve_predictor_methods( # The following predictors are not normalised: .when(method == "SpliceAI", score) .when(method == "VEP", score) + .when(method == "GERP", cls._normalise_gerp(score)) ) @staticmethod @@ -420,6 +421,38 @@ def _normalise_cadd( .when(score > 30, cls._rescaleColumnValue(score, 30, 81, 0.75, 1)) ) + @classmethod + def _normalise_gerp( + cls: type[InSilicoPredictorNormaliser], + score: Column, + ) -> Column: + """Normalise GERP scores. + + # Score interpretation from here: + # https://pmc.ncbi.nlm.nih.gov/articles/PMC7286533/ + # https://genome.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=allHg19RS_BW + + Logic: GERP scores are divided into three categories: + - >6 : 1.0 - GERP scores are not bounded, so any value above 6 is considered as 1.0 + - 2-6: 0.5-1 - Highly conserved regions are scaled between 0.5 and 1 + - 0-2: 0-0.5 - Moderately conserved regions are scaled between 0 and 0.5 + - -3-0: -1-0.0 - Negative conservation indicates benign sequence alteration, so scaled between -1 and 0 + - < -3: -1.0 - As the score goes below -3, it is considered as -1.0 + + Args: + score (Column): GERP score. + + Returns: + Column: Normalised GERP score. + """ + return ( + f.when(score > 6, f.lit(1.0)) + .when(score >= 2, cls._rescaleColumnValue(score, 2, 6, 0.5, 1)) + .when(score >= 0, cls._rescaleColumnValue(score, 0, 2, 0, 0.5)) + .when(score >= -3, cls._rescaleColumnValue(score, -3, 0, -1, 0)) + .when(score < -3, f.lit(-1.0)) + ) + @classmethod def _normalise_loftee( cls: type[InSilicoPredictorNormaliser], diff --git a/src/gentropy/datasource/ensembl/vep_parser.py b/src/gentropy/datasource/ensembl/vep_parser.py index 9494bf6f3..b0a2e50a2 100644 --- a/src/gentropy/datasource/ensembl/vep_parser.py +++ b/src/gentropy/datasource/ensembl/vep_parser.py @@ -668,6 +668,12 @@ def process_vep_output( assessment_column_name="lof", assessment_flag_column_name="lof_filter", ), + # Extract GERP conservation score: + cls._vep_in_silico_prediction_extractor( + method_name="GERP", + transcript_column_name="transcript_consequences", + score_column_name="conservation", + ), # Extract max alpha missense: cls._get_max_alpha_missense( f.col("transcript_consequences") @@ -686,6 +692,12 @@ def process_vep_output( method_name="CADD", score_column_name="cadd_phred", ), + # Extract GERP conservation score: + cls._vep_in_silico_prediction_extractor( + method_name="GERP", + transcript_column_name="intergenic_consequences", + score_column_name="conservation", + ), # Extract VEP prediction: cls._get_vep_prediction(f.col("most_severe_consequence")), ) From ff35db4a35c3dbdf5c94505541ba67da68f97a2b Mon Sep 17 00:00:00 2001 From: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com> Date: Thu, 28 Nov 2024 12:51:09 +0100 Subject: [PATCH 184/188] fix: swap the ref parse (#935) Co-authored-by: Szymon Szyszkowski --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 377661299..4e6a7463f 100644 --- a/Makefile +++ b/Makefile @@ -4,10 +4,10 @@ APP_NAME ?= $$(cat pyproject.toml | grep -m 1 "name" | cut -d" " -f3 | sed 's/" PACKAGE_VERSION ?= $$(poetry version --short) # NOTE: git rev-parse will always return the HEAD if it sits in the tag, # this way we can distinguish the tag vs branch name -ifeq ($(shell git rev-parse --abbrev-ref HEAD)),HEAD) - REF := $(shell git rev-parse --abbrev-ref HEAD) -else +ifeq ($(shell git rev-parse --abbrev-ref HEAD),HEAD) REF := $(shell git describe --exact-match --tags) +else + REF := $(shell git rev-parse --abbrev-ref HEAD) endif CLEAN_PACKAGE_VERSION := $(shell echo "$(PACKAGE_VERSION)" | tr -cd '[:alnum:]') From e49608de994a62a3489feea52e9ee2f459550b6c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 11:42:33 +0000 Subject: [PATCH 185/188] build(deps-dev): bump ipython from 8.29.0 to 8.30.0 (#937) Bumps [ipython](https://github.com/ipython/ipython) from 8.29.0 to 8.30.0. - [Release notes](https://github.com/ipython/ipython/releases) - [Commits](https://github.com/ipython/ipython/compare/8.29.0...8.30.0) --- updated-dependencies: - dependency-name: ipython dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index aea09f8c9..434673775 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2080,13 +2080,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio [[package]] name = "ipython" -version = "8.29.0" +version = "8.30.0" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.10" files = [ - {file = "ipython-8.29.0-py3-none-any.whl", hash = "sha256:0188a1bd83267192123ccea7f4a8ed0a78910535dbaa3f37671dca76ebd429c8"}, - {file = "ipython-8.29.0.tar.gz", hash = "sha256:40b60e15b22591450eef73e40a027cf77bd652e757523eebc5bd7c7c498290eb"}, + {file = "ipython-8.30.0-py3-none-any.whl", hash = "sha256:85ec56a7e20f6c38fce7727dcca699ae4ffc85985aa7b23635a8008f918ae321"}, + {file = "ipython-8.30.0.tar.gz", hash = "sha256:cb0a405a306d2995a5cbb9901894d240784a9f341394c6ba3f4fe8c6eb89ff6e"}, ] [package.dependencies] @@ -2096,16 +2096,16 @@ exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} jedi = ">=0.16" matplotlib-inline = "*" pexpect = {version = ">4.3", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""} -prompt-toolkit = ">=3.0.41,<3.1.0" +prompt_toolkit = ">=3.0.41,<3.1.0" pygments = ">=2.4.0" -stack-data = "*" +stack_data = "*" traitlets = ">=5.13.0" -typing-extensions = {version = ">=4.6", markers = "python_version < \"3.12\""} +typing_extensions = {version = ">=4.6", markers = "python_version < \"3.12\""} [package.extras] all = ["ipython[black,doc,kernel,matplotlib,nbconvert,nbformat,notebook,parallel,qtconsole]", "ipython[test,test-extra]"] black = ["black"] -doc = ["docrepr", "exceptiongroup", "intersphinx-registry", "ipykernel", "ipython[test]", "matplotlib", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "sphinxcontrib-jquery", "tomli", "typing-extensions"] +doc = ["docrepr", "exceptiongroup", "intersphinx_registry", "ipykernel", "ipython[test]", "matplotlib", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "sphinxcontrib-jquery", "tomli", "typing_extensions"] kernel = ["ipykernel"] matplotlib = ["matplotlib"] nbconvert = ["nbconvert"] From a02f9c118f05adb71e873bf9739ad07914a514e0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Dec 2024 11:50:39 +0000 Subject: [PATCH 186/188] build(deps-dev): bump ruff from 0.7.1 to 0.8.1 (#936) Bumps [ruff](https://github.com/astral-sh/ruff) from 0.7.1 to 0.8.1. - [Release notes](https://github.com/astral-sh/ruff/releases) - [Changelog](https://github.com/astral-sh/ruff/blob/main/CHANGELOG.md) - [Commits](https://github.com/astral-sh/ruff/compare/0.7.1...0.8.1) --- updated-dependencies: - dependency-name: ruff dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 40 ++++++++++++++++++++-------------------- pyproject.toml | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/poetry.lock b/poetry.lock index 434673775..53d95babf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4905,29 +4905,29 @@ pyasn1 = ">=0.1.3" [[package]] name = "ruff" -version = "0.7.1" +version = "0.8.1" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.7.1-py3-none-linux_armv6l.whl", hash = "sha256:cb1bc5ed9403daa7da05475d615739cc0212e861b7306f314379d958592aaa89"}, - {file = "ruff-0.7.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:27c1c52a8d199a257ff1e5582d078eab7145129aa02721815ca8fa4f9612dc35"}, - {file = "ruff-0.7.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:588a34e1ef2ea55b4ddfec26bbe76bc866e92523d8c6cdec5e8aceefeff02d99"}, - {file = "ruff-0.7.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94fc32f9cdf72dc75c451e5f072758b118ab8100727168a3df58502b43a599ca"}, - {file = "ruff-0.7.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:985818742b833bffa543a84d1cc11b5e6871de1b4e0ac3060a59a2bae3969250"}, - {file = "ruff-0.7.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32f1e8a192e261366c702c5fb2ece9f68d26625f198a25c408861c16dc2dea9c"}, - {file = "ruff-0.7.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:699085bf05819588551b11751eff33e9ca58b1b86a6843e1b082a7de40da1565"}, - {file = "ruff-0.7.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:344cc2b0814047dc8c3a8ff2cd1f3d808bb23c6658db830d25147339d9bf9ea7"}, - {file = "ruff-0.7.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4316bbf69d5a859cc937890c7ac7a6551252b6a01b1d2c97e8fc96e45a7c8b4a"}, - {file = "ruff-0.7.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79d3af9dca4c56043e738a4d6dd1e9444b6d6c10598ac52d146e331eb155a8ad"}, - {file = "ruff-0.7.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c5c121b46abde94a505175524e51891f829414e093cd8326d6e741ecfc0a9112"}, - {file = "ruff-0.7.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8422104078324ea250886954e48f1373a8fe7de59283d747c3a7eca050b4e378"}, - {file = "ruff-0.7.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:56aad830af8a9db644e80098fe4984a948e2b6fc2e73891538f43bbe478461b8"}, - {file = "ruff-0.7.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:658304f02f68d3a83c998ad8bf91f9b4f53e93e5412b8f2388359d55869727fd"}, - {file = "ruff-0.7.1-py3-none-win32.whl", hash = "sha256:b517a2011333eb7ce2d402652ecaa0ac1a30c114fbbd55c6b8ee466a7f600ee9"}, - {file = "ruff-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f38c41fcde1728736b4eb2b18850f6d1e3eedd9678c914dede554a70d5241307"}, - {file = "ruff-0.7.1-py3-none-win_arm64.whl", hash = "sha256:19aa200ec824c0f36d0c9114c8ec0087082021732979a359d6f3c390a6ff2a37"}, - {file = "ruff-0.7.1.tar.gz", hash = "sha256:9d8a41d4aa2dad1575adb98a82870cf5db5f76b2938cf2206c22c940034a36f4"}, + {file = "ruff-0.8.1-py3-none-linux_armv6l.whl", hash = "sha256:fae0805bd514066f20309f6742f6ee7904a773eb9e6c17c45d6b1600ca65c9b5"}, + {file = "ruff-0.8.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b8a4f7385c2285c30f34b200ca5511fcc865f17578383db154e098150ce0a087"}, + {file = "ruff-0.8.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:cd054486da0c53e41e0086e1730eb77d1f698154f910e0cd9e0d64274979a209"}, + {file = "ruff-0.8.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2029b8c22da147c50ae577e621a5bfbc5d1fed75d86af53643d7a7aee1d23871"}, + {file = "ruff-0.8.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2666520828dee7dfc7e47ee4ea0d928f40de72056d929a7c5292d95071d881d1"}, + {file = "ruff-0.8.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:333c57013ef8c97a53892aa56042831c372e0bb1785ab7026187b7abd0135ad5"}, + {file = "ruff-0.8.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:288326162804f34088ac007139488dcb43de590a5ccfec3166396530b58fb89d"}, + {file = "ruff-0.8.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b12c39b9448632284561cbf4191aa1b005882acbc81900ffa9f9f471c8ff7e26"}, + {file = "ruff-0.8.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:364e6674450cbac8e998f7b30639040c99d81dfb5bbc6dfad69bc7a8f916b3d1"}, + {file = "ruff-0.8.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b22346f845fec132aa39cd29acb94451d030c10874408dbf776af3aaeb53284c"}, + {file = "ruff-0.8.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b2f2f7a7e7648a2bfe6ead4e0a16745db956da0e3a231ad443d2a66a105c04fa"}, + {file = "ruff-0.8.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:adf314fc458374c25c5c4a4a9270c3e8a6a807b1bec018cfa2813d6546215540"}, + {file = "ruff-0.8.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:a885d68342a231b5ba4d30b8c6e1b1ee3a65cf37e3d29b3c74069cdf1ee1e3c9"}, + {file = "ruff-0.8.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d2c16e3508c8cc73e96aa5127d0df8913d2290098f776416a4b157657bee44c5"}, + {file = "ruff-0.8.1-py3-none-win32.whl", hash = "sha256:93335cd7c0eaedb44882d75a7acb7df4b77cd7cd0d2255c93b28791716e81790"}, + {file = "ruff-0.8.1-py3-none-win_amd64.whl", hash = "sha256:2954cdbe8dfd8ab359d4a30cd971b589d335a44d444b6ca2cb3d1da21b75e4b6"}, + {file = "ruff-0.8.1-py3-none-win_arm64.whl", hash = "sha256:55873cc1a473e5ac129d15eccb3c008c096b94809d693fc7053f588b67822737"}, + {file = "ruff-0.8.1.tar.gz", hash = "sha256:3583db9a6450364ed5ca3f3b4225958b24f78178908d5c4bc0f46251ccca898f"}, ] [[package]] @@ -5959,4 +5959,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "af70455b40ec31084130c90b9dc468a5c1198f80e6ae30d10bfb1b17d1706537" +content-hash = "b47b8a546db802a97c4656174b4417d844f3af383cf6617b7743c43f7e2a5381" diff --git a/pyproject.toml b/pyproject.toml index fd69201cb..ebdacef7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ pep8-naming = "^0.14.1" interrogate = "^1.7.0" isort = "^5.13.2" darglint = "^1.8.1" -ruff = "^0.7.0" +ruff = "^0.8.1" [tool.poetry.group.docs.dependencies] mkdocs = "^1.5.3" From 43f047a9c1a34942a69b5230e98e4e85c87bcc24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Thu, 5 Dec 2024 15:35:54 +0000 Subject: [PATCH 187/188] fix(l2g_predictions): annotate based on list of features + filter out missing annotation (#925) * fix(prediction): do not annotate all features from matrix * fix(prediction): filter out features with 0 * chore: pre-commit auto fixes [...] --- src/gentropy/dataset/l2g_prediction.py | 29 ++++++++------------------ src/gentropy/l2g.py | 10 +++++---- 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/src/gentropy/dataset/l2g_prediction.py b/src/gentropy/dataset/l2g_prediction.py index 2bc286a40..915b2b7dc 100644 --- a/src/gentropy/dataset/l2g_prediction.py +++ b/src/gentropy/dataset/l2g_prediction.py @@ -129,12 +129,13 @@ def to_disease_target_evidence( ) def add_locus_to_gene_features( - self: L2GPrediction, feature_matrix: L2GFeatureMatrix + self: L2GPrediction, feature_matrix: L2GFeatureMatrix, features_list: list[str] ) -> L2GPrediction: - """Add features to the L2G predictions. + """Add features used to extract the L2G predictions. Args: feature_matrix (L2GFeatureMatrix): Feature matrix dataset + features_list (list[str]): List of features used in the model Returns: L2GPrediction: L2G predictions with additional features @@ -143,38 +144,26 @@ def add_locus_to_gene_features( if "locusToGeneFeatures" in self.df.columns: self.df = self.df.drop("locusToGeneFeatures") - # Columns identifying a studyLocus/gene pair - prediction_id_columns = ["studyLocusId", "geneId"] - - # L2G matrix columns to build the map: - columns_to_map = [ - column - for column in feature_matrix._df.columns - if column not in prediction_id_columns - ] - # Aggregating all features into a single map column: aggregated_features = ( feature_matrix._df.withColumn( "locusToGeneFeatures", f.create_map( *sum( - [ - (f.lit(colname), f.col(colname)) - for colname in columns_to_map - ], + ((f.lit(feature), f.col(feature)) for feature in features_list), (), ) ), ) - # from the freshly created map, we filter out the null values .withColumn( "locusToGeneFeatures", - f.expr("map_filter(locusToGeneFeatures, (k, v) -> v is not null)"), + f.expr("map_filter(locusToGeneFeatures, (k, v) -> v != 0)"), ) - .drop(*columns_to_map) + .drop(*features_list) ) return L2GPrediction( - _df=self.df.join(aggregated_features, on=prediction_id_columns, how="left"), + _df=self.df.join( + aggregated_features, on=["studyLocusId", "geneId"], how="left" + ), _schema=self.get_schema(), ) diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index 16922ef78..ff0d47f58 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -7,7 +7,7 @@ import pyspark.sql.functions as f from sklearn.ensemble import GradientBoostingClassifier -from wandb import login as wandb_login +from wandb.sdk.wandb_login import login as wandb_login from gentropy.common.schemas import compare_struct_schemas from gentropy.common.session import Session @@ -285,9 +285,11 @@ def run_predict(self) -> None: ) predictions.filter( f.col("score") >= self.l2g_threshold - ).add_locus_to_gene_features(self.feature_matrix).df.coalesce( - self.session.output_partitions - ).write.mode(self.session.write_mode).parquet(self.predictions_path) + ).add_locus_to_gene_features( + self.feature_matrix, self.features_list + ).df.coalesce(self.session.output_partitions).write.mode( + self.session.write_mode + ).parquet(self.predictions_path) self.session.logger.info("L2G predictions saved successfully.") def run_train(self) -> None: From 79f6fcc383bcc0f165f9c51e4a32675bf6f2f8c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Irene=20L=C3=B3pez=20Santiago?= <45119610+ireneisdoomed@users.noreply.github.com> Date: Mon, 9 Dec 2024 16:34:54 +0000 Subject: [PATCH 188/188] feat(l2g)!: implement new training strategy splitting between EFO/gene pairs and with cross validation (#938) * feat(gold_standard): add traitFromSourceMappedId to schema * chore: adapt tests * feat(feature_matrix): consider `traitFromSourceMappedId` a static column * feat(feature_matrix): consider `traitFromSourceMappedId` an optional column * feat: update l2g config with best hyperparams * feat(trainer): new train runs when cross_validate=False * chore(model): add default hyperparams based on best params * chore: debug sweep, one single run * feat(trainer): new train runs when cross_validate=True * feat(cross_validate): sweep runs are now together * chore: pre-commit auto fixes [...] * chore: improve error message --- src/gentropy/config.py | 10 +- src/gentropy/l2g.py | 15 +- src/gentropy/method/l2g/model.py | 15 +- src/gentropy/method/l2g/trainer.py | 398 ++++++++++++++++++++--------- 4 files changed, 309 insertions(+), 129 deletions(-) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 65fdb5897..9c454d41b 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -264,20 +264,24 @@ class LocusToGeneConfig(StepConfig): "geneCount500kb", "proteinGeneCount500kb", "credibleSetConfidence", - # "isProteinCoding", ] ) hyperparameters: dict[str, Any] = field( default_factory=lambda: { "n_estimators": 100, - "max_depth": 5, - "loss": "log_loss", + "max_depth": 10, + "ccp_alpha": 0, + "learning_rate": 0.1, + "min_samples_leaf": 5, + "min_samples_split": 5, + "subsample": 1, } ) wandb_run_name: str | None = None hf_hub_repo_id: str | None = "opentargets/locus_to_gene" hf_model_commit_message: str | None = "chore: update model" download_from_hub: bool = True + cross_validate: bool = True _target_: str = "gentropy.l2g.LocusToGeneStep" diff --git a/src/gentropy/l2g.py b/src/gentropy/l2g.py index ff0d47f58..3b73a377d 100644 --- a/src/gentropy/l2g.py +++ b/src/gentropy/l2g.py @@ -100,11 +100,12 @@ class LocusToGeneStep: def __init__( self, session: Session, - hyperparameters: dict[str, Any], *, run_mode: str, features_list: list[str], + hyperparameters: dict[str, Any], download_from_hub: bool, + cross_validate: bool, wandb_run_name: str, credible_set_path: str, feature_matrix_path: str, @@ -113,18 +114,19 @@ def __init__( variant_index_path: str | None = None, gene_interactions_path: str | None = None, predictions_path: str | None = None, - l2g_threshold: float | None, - hf_hub_repo_id: str | None, + l2g_threshold: float | None = None, + hf_hub_repo_id: str | None = None, hf_model_commit_message: str | None = "chore: update model", ) -> None: """Initialise the step and run the logic based on mode. Args: session (Session): Session object that contains the Spark session - hyperparameters (dict[str, Any]): Hyperparameters for the model run_mode (str): Run mode, either 'train' or 'predict' features_list (list[str]): List of features to use for the model + hyperparameters (dict[str, Any]): Hyperparameters for the model download_from_hub (bool): Whether to download the model from Hugging Face Hub + cross_validate (bool): Whether to run cross validation (5-fold by default) to train the model. wandb_run_name (str): Name of the run to track model training in Weights and Biases credible_set_path (str): Path to the credible set dataset necessary to build the feature matrix feature_matrix_path (str): Path to the L2G feature matrix input dataset @@ -152,6 +154,7 @@ def __init__( self.features_list = list(features_list) self.hyperparameters = dict(hyperparameters) self.wandb_run_name = wandb_run_name + self.cross_validate = cross_validate self.hf_hub_repo_id = hf_hub_repo_id self.download_from_hub = download_from_hub self.hf_model_commit_message = hf_model_commit_message @@ -300,7 +303,7 @@ def run_train(self) -> None: # Instantiate classifier and train model l2g_model = LocusToGeneModel( - model=GradientBoostingClassifier(random_state=42), + model=GradientBoostingClassifier(random_state=42, loss="log_loss"), hyperparameters=self.hyperparameters, ) @@ -310,7 +313,7 @@ def run_train(self) -> None: # Run the training trained_model = LocusToGeneTrainer( model=l2g_model, feature_matrix=feature_matrix - ).train(self.wandb_run_name) + ).train(self.wandb_run_name, cross_validate=self.cross_validate) # Export the model if trained_model.training_data and trained_model.model and self.model_path: diff --git a/src/gentropy/method/l2g/model.py b/src/gentropy/method/l2g/model.py index 336efeb7f..1f18f227f 100644 --- a/src/gentropy/method/l2g/model.py +++ b/src/gentropy/method/l2g/model.py @@ -27,7 +27,17 @@ class LocusToGeneModel: """Wrapper for the Locus to Gene classifier.""" model: Any = GradientBoostingClassifier(random_state=42) - hyperparameters: dict[str, Any] | None = None + hyperparameters: dict[str, Any] = field( + default_factory=lambda: { + "n_estimators": 100, + "max_depth": 10, + "ccp_alpha": 0, + "learning_rate": 0.1, + "min_samples_leaf": 5, + "min_samples_split": 5, + "subsample": 1, + } + ) training_data: L2GFeatureMatrix | None = None label_encoder: dict[str, int] = field( default_factory=lambda: { @@ -38,8 +48,7 @@ class LocusToGeneModel: def __post_init__(self: LocusToGeneModel) -> None: """Post-initialisation to fit the estimator with the provided params.""" - if self.hyperparameters: - self.model.set_params(**self.hyperparameters_dict) + self.model.set_params(**self.hyperparameters_dict) @classmethod def load_from_disk(cls: Type[LocusToGeneModel], path: str) -> LocusToGeneModel: diff --git a/src/gentropy/method/l2g/trainer.py b/src/gentropy/method/l2g/trainer.py index ab2a3fa7e..a43d6609d 100644 --- a/src/gentropy/method/l2g/trainer.py +++ b/src/gentropy/method/l2g/trainer.py @@ -4,23 +4,26 @@ import os from dataclasses import dataclass -from functools import partial from typing import TYPE_CHECKING, Any import matplotlib.pyplot as plt +import numpy as np import pandas as pd import shap +from sklearn.base import clone from sklearn.metrics import ( accuracy_score, + average_precision_score, f1_score, precision_score, recall_score, roc_auc_score, ) -from sklearn.model_selection import train_test_split +from sklearn.model_selection import GroupKFold, GroupShuffleSplit from wandb.data_types import Image, Table from wandb.errors.term import termlog as wandb_termlog from wandb.sdk.wandb_init import init as wandb_init +from wandb.sdk.wandb_setup import _setup from wandb.sdk.wandb_sweep import sweep as wandb_sweep from wandb.sklearn import plot_classifier from wandb.wandb_agent import agent as wandb_agent @@ -34,6 +37,21 @@ from wandb.sdk.wandb_run import Run +def reset_wandb_env() -> None: + """Reset Wandb environment variables except for project, entity and API key. + + This is necessary to log multiple runs in the same sweep without overwriting. More context here: https://github.com/wandb/wandb/issues/5119 + """ + exclude = { + "WANDB_PROJECT", + "WANDB_ENTITY", + "WANDB_API_KEY", + } + for key in list(os.environ.keys()): + if key.startswith("WANDB_") and key not in exclude: + del os.environ[key] + + @dataclass class LocusToGeneTrainer: """Modelling of what is the most likely causal gene associated with a given locus.""" @@ -44,10 +62,11 @@ class LocusToGeneTrainer: # Initialise vars features_list: list[str] | None = None label_col: str = "goldStandardSet" - x_train: pd.DataFrame | None = None - y_train: pd.Series | None = None - x_test: pd.DataFrame | None = None - y_test: pd.Series | None = None + x_train: np.ndarray | None = None + y_train: np.ndarray | None = None + x_test: np.ndarray | None = None + y_test: np.ndarray | None = None + groups_train: np.ndarray | None = None run: Run | None = None wandb_l2g_project_name: str = "gentropy-locus-to-gene" @@ -72,9 +91,9 @@ def fit( """ if self.x_train is not None and self.y_train is not None: assert ( - not self.x_train.empty and not self.y_train.empty + self.x_train.size != 0 and self.y_train.size != 0 ), "Train data not set, nothing to fit." - fitted_model = self.model.model.fit(X=self.x_train.values, y=self.y_train) + fitted_model = self.model.model.fit(X=self.x_train, y=self.y_train) self.model = LocusToGeneModel( model=fitted_model, hyperparameters=fitted_model.get_params(), @@ -100,7 +119,10 @@ def _get_shap_explanation( Exception: (ExplanationError) When the additivity check fails. """ if self.x_train is not None and self.x_test is not None: - training_data = pd.concat([self.x_train, self.x_test], ignore_index=True) + training_data = pd.DataFrame( + np.vstack((self.x_train, self.x_test)), + columns=self.features_list, + ) explainer = shap.TreeExplainer( model.model, data=training_data, @@ -152,151 +174,293 @@ def log_to_wandb( wandb_run_name (str): Name of the W&B run Raises: - ValueError: If dependencies are not available. + RuntimeError: If dependencies are not available. """ if ( - self.x_train is not None - and self.x_test is not None - and self.y_train is not None - and self.y_test is not None - and self.features_list is not None + self.x_train is None + or self.x_test is None + or self.y_train is None + or self.y_test is None + or self.features_list is None ): - assert ( - not self.x_train.empty and not self.y_train.empty - ), "Train data not set, nothing to evaluate." - fitted_classifier = self.model.model - y_predicted = fitted_classifier.predict(self.x_test.values) - y_probas = fitted_classifier.predict_proba(self.x_test.values) - self.run = wandb_init( - project=self.wandb_l2g_project_name, - name=wandb_run_name, - config=fitted_classifier.get_params(), - ) - # Track classification plots - plot_classifier( - self.model.model, - self.x_train.values, - self.x_test.values, - self.y_train, - self.y_test, - y_predicted, - y_probas, - labels=list(self.model.label_encoder.values()), - model_name="L2G-classifier", - feature_names=self.features_list, - is_binary=True, - ) - # Track evaluation metrics - self.run.log( - { - "areaUnderROC": roc_auc_score( - self.y_test, y_probas[:, 1], average="weighted" - ) - } - ) - self.run.log({"accuracy": accuracy_score(self.y_test, y_predicted)}) - self.run.log( - { - "weightedPrecision": precision_score( - self.y_test, y_predicted, average="weighted" - ) - } - ) - self.run.log( - { - "weightedRecall": recall_score( - self.y_test, y_predicted, average="weighted" - ) - } - ) - self.run.log({"f1": f1_score(self.y_test, y_predicted, average="weighted")}) - # Track gold standards and their features - self.run.log( - {"featureMatrix": Table(dataframe=self.feature_matrix._df.toPandas())} - ) - # Log feature missingness - self.run.log( - { - "missingnessRates": self.feature_matrix.calculate_feature_missingness_rate() - } - ) - # Plot marginal contribution of each feature - explanation = self._get_shap_explanation(self.model) - self.log_plot_image_to_wandb( - "Feature Contribution", - shap.plots.bar( - explanation, max_display=len(self.x_train.columns), show=False - ), - ) + raise RuntimeError("Train data not set, we cannot log to W&B.") + assert ( + self.x_train.size != 0 and self.y_train.size != 0 + ), "Train data not set, nothing to evaluate." + fitted_classifier = self.model.model + y_predicted = fitted_classifier.predict(self.x_test) + y_probas = fitted_classifier.predict_proba(self.x_test) + self.run = wandb_init( + project=self.wandb_l2g_project_name, + name=wandb_run_name, + config=fitted_classifier.get_params(), + ) + # Track classification plots + plot_classifier( + self.model.model, + self.x_train, + self.x_test, + self.y_train, + self.y_test, + y_predicted, + y_probas, + labels=list(self.model.label_encoder.values()), + model_name="L2G-classifier", + feature_names=self.features_list, + is_binary=True, + ) + # Track evaluation metrics + self.run.log( + { + "areaUnderROC": roc_auc_score( + self.y_test, y_probas[:, 1], average="weighted" + ) + } + ) + self.run.log({"accuracy": accuracy_score(self.y_test, y_predicted)}) + self.run.log( + { + "weightedPrecision": precision_score( + self.y_test, y_predicted, average="weighted" + ) + } + ) + self.run.log( + { + "averagePrecision": average_precision_score( + self.y_test, y_predicted, average="weighted" + ) + } + ) + self.run.log( + { + "weightedRecall": recall_score( + self.y_test, y_predicted, average="weighted" + ) + } + ) + self.run.log({"f1": f1_score(self.y_test, y_predicted, average="weighted")}) + # Track gold standards and their features + self.run.log( + {"featureMatrix": Table(dataframe=self.feature_matrix._df.toPandas())} + ) + # Log feature missingness + self.run.log( + { + "missingnessRates": self.feature_matrix.calculate_feature_missingness_rate() + } + ) + # Plot marginal contribution of each feature + explanation = self._get_shap_explanation(self.model) + self.log_plot_image_to_wandb( + "Feature Contribution", + shap.plots.bar( + explanation, max_display=len(self.features_list), show=False + ), + ) + self.log_plot_image_to_wandb( + "Beeswarm Plot", + shap.plots.beeswarm( + explanation, max_display=len(self.features_list), show=False + ), + ) + # Plot correlation between feature values and their importance + for feature in self.features_list: self.log_plot_image_to_wandb( - "Beeswarm Plot", - shap.plots.beeswarm( - explanation, max_display=len(self.x_train.columns), show=False + f"Effect of {feature} on the predictions", + shap.plots.scatter( + explanation[:, feature], + show=False, ), ) - # Plot correlation between feature values and their importance - for feature in self.features_list: - self.log_plot_image_to_wandb( - f"Effect of {feature} on the predictions", - shap.plots.scatter( - explanation[:, feature], - show=False, - ), - ) - wandb_termlog("Logged Shapley contributions.") - self.run.finish() - else: - raise ValueError("Something went wrong, couldn't log to W&B.") + wandb_termlog("Logged Shapley contributions.") + self.run.finish() def train( self: LocusToGeneTrainer, wandb_run_name: str, + cross_validate: bool = True, + n_splits: int = 5, + hyperparameter_grid: dict[str, Any] | None = None, ) -> LocusToGeneModel: """Train the Locus to Gene model. + If cross_validation is set to True, we implement the following strategy: + 1. Create held-out test set + 2. Perform cross-validation on training set + 3. Train final model on full training set + 4. Evaluate once on test set + Args: wandb_run_name (str): Name of the W&B run. Unless this is provided, the model will not be logged to W&B. + cross_validate (bool): Whether to run cross-validation. Defaults to True. + n_splits(int): Number of folds the data is splitted in. The model is trained and evaluated `k - 1` times. Defaults to 5. + hyperparameter_grid (dict[str, Any] | None): Hyperparameter grid to sweep over. Defaults to None. Returns: LocusToGeneModel: Fitted model """ - data_df = self.feature_matrix._df.drop("geneId", "studyLocusId").toPandas() + data_df = self.feature_matrix._df.toPandas() + # enforce that data_df is a Pandas DataFrame # Encode labels in `goldStandardSet` to a numeric value data_df[self.label_col] = data_df[self.label_col].map(self.model.label_encoder) - # Ensure all columns are numeric and split - data_df = data_df.apply(pd.to_numeric) - X = data_df[self.features_list].copy() - y = data_df[self.label_col].copy() - self.x_train, self.x_test, self.y_train, self.y_test = train_test_split( - X, y, test_size=0.2, random_state=42 - ) + X = data_df[self.features_list].apply(pd.to_numeric).values + y = data_df[self.label_col].apply(pd.to_numeric).values + gene_trait_groups = ( + data_df["traitFromSourceMappedId"].astype(str) + + "_" + + data_df["geneId"].astype(str) + ) # Group identifier has to be a single string + + # Create hold-out test set separating EFO/Gene pairs between train/test + train_test_split = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42) + for train_idx, test_idx in train_test_split.split(X, y, gene_trait_groups): + self.x_train, self.x_test = X[train_idx], X[test_idx] + self.y_train, self.y_test = y[train_idx], y[test_idx] + self.groups_train = gene_trait_groups[train_idx] + + # Cross-validation + if cross_validate: + self.cross_validate( + wandb_run_name=f"{wandb_run_name}-cv", + parameter_grid=hyperparameter_grid, + n_splits=n_splits, + ) - # Train - model = self.fit() + # Train final model on full training set + self.fit() - # Evaluate + # Evaluate once on hold out test set self.log_to_wandb( - wandb_run_name=wandb_run_name, + wandb_run_name=f"{wandb_run_name}-holdout", ) - return model + return self.model - def hyperparameter_tuning( - self: LocusToGeneTrainer, wandb_run_name: str, parameter_grid: dict[str, Any] + def cross_validate( + self: LocusToGeneTrainer, + wandb_run_name: str, + parameter_grid: dict[str, Any] | None = None, + n_splits: int = 5, ) -> None: - """Perform hyperparameter tuning on the model with W&B Sweeps. Metrics for every combination of hyperparameters will be logged to W&B for comparison. + """Log results of cross validation and hyperparameter tuning with W&B Sweeps. Metrics for every combination of hyperparameters will be logged to W&B for comparison. Args: wandb_run_name (str): Name of the W&B run - parameter_grid (dict[str, Any]): Dictionary containing the hyperparameters to sweep over. The keys are the hyperparameter names, and the values are dictionaries containing the values to sweep over. + parameter_grid (dict[str, Any] | None): Dictionary containing the hyperparameters to sweep over. The keys are the hyperparameter names, and the values are dictionaries containing the values to sweep over. + n_splits (int): Number of folds the data is splitted in. The model is trained and evaluated `k - 1` times. Defaults to 5. """ + + def cross_validate_single_fold( + fold_index: int, + sweep_id: str, + sweep_run_name: str, + config: dict[str, Any], + ) -> None: + """Run cross-validation for a single fold. + + Args: + fold_index (int): Index of the fold to run + sweep_id (str): ID of the sweep + sweep_run_name (str): Name of the sweep run + config (dict[str, Any]): Configuration from the sweep + + Raises: + ValueError: If training data is not set + """ + reset_wandb_env() + train_idx, val_idx = cv_splits[fold_index] + + if ( + self.x_train is None + or self.y_train is None + or self.groups_train is None + ): + raise ValueError("Training data not set") + + # Initialize a new run for this fold + os.environ["WANDB_SWEEP_ID"] = sweep_id + run = wandb_init( + project=self.wandb_l2g_project_name, + name=sweep_run_name, + config=config, + group=sweep_run_name, + job_type="fold", + reinit=True, + ) + + x_fold_train, x_fold_val = ( + self.x_train[train_idx], + self.x_train[val_idx], + ) + y_fold_train, y_fold_val = ( + self.y_train[train_idx], + self.y_train[val_idx], + ) + + fold_model = clone(self.model.model) + fold_model.set_params(**config) + fold_model.fit(x_fold_train, y_fold_train) + y_pred_proba = fold_model.predict_proba(x_fold_val)[:, 1] + y_pred = (y_pred_proba >= 0.5).astype(int) + + # Log metrics + metrics = { + "weightedPrecision": precision_score(y_fold_val, y_pred), + "averagePrecision": average_precision_score(y_fold_val, y_pred_proba), + "areaUnderROC": roc_auc_score(y_fold_val, y_pred_proba), + "accuracy": accuracy_score(y_fold_val, y_pred), + "weightedRecall": recall_score(y_fold_val, y_pred, average="weighted"), + "f1": f1_score(y_fold_val, y_pred, average="weighted"), + } + + run.log(metrics) + wandb_termlog(f"Logged metrics for fold {fold_index + 1}.") + run.finish() + + # If no grid is provided, use default ones set in the model + parameter_grid = parameter_grid or { + param: {"values": [value]} + for param, value in self.model.hyperparameters.items() + } sweep_config = { "method": "grid", - "metric": {"name": "roc", "goal": "maximize"}, + "name": wandb_run_name, # Add name to sweep config + "metric": {"name": "areaUnderROC", "goal": "maximize"}, "parameters": parameter_grid, } sweep_id = wandb_sweep(sweep_config, project=self.wandb_l2g_project_name) - wandb_agent(sweep_id, partial(self.train, wandb_run_name=wandb_run_name)) + gkf = GroupKFold(n_splits=n_splits) + cv_splits = list(gkf.split(self.x_train, self.y_train, self.groups_train)) + + def run_all_folds() -> None: + """Run cross-validation for all folds within a sweep.""" + # Initialize the sweep run and get metadata + sweep_run = wandb_init(name=wandb_run_name) + sweep_id = sweep_run.sweep_id or "unknown" + sweep_url = sweep_run.get_sweep_url() + project_url = sweep_run.get_project_url() + sweep_group_url = f"{project_url}/groups/{sweep_id}" + sweep_run.notes = sweep_group_url + sweep_run.save() + config = dict(sweep_run.config) + + # Reset wandb setup to ensure clean state + _setup(_reset=True) + + # Run all folds + for fold_index in range(len(cv_splits)): + cross_validate_single_fold( + fold_index=fold_index, + sweep_id=sweep_id, + sweep_run_name=f"{wandb_run_name}-fold{fold_index+1}", + config=config, + ) + + wandb_termlog(f"Sweep URL: {sweep_url}") + wandb_termlog(f"Sweep Group URL: {sweep_group_url}") + + wandb_agent(sweep_id, run_all_folds)