From f92b8988f4b20f71025efd27771b76d72d17092c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Mon, 15 Apr 2024 18:49:23 +0300 Subject: [PATCH 01/18] [DOP-15023] Bump version --- docs/conf.py | 2 +- pyproject.toml | 3 ++- syncmaster/__init__.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index fc19a551..a291cf33 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -36,7 +36,7 @@ # The short X.Y version. # this value is updated automatically by `poetry version ...` and poetry-bumpversion plugin -ver = Version.parse("0.1.4") +ver = Version.parse("0.1.5") version = ver.base_version # The full version, including alpha/beta/rc tags. release = ver.public diff --git a/pyproject.toml b/pyproject.toml index 245180cf..bd4f5b7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "data-syncmaster" -version = "0.1.4" +version = "0.1.5" license = "Apache-2.0" description = "Syncmaster REST API + Worker" authors = ["DataOps.ETL "] @@ -201,6 +201,7 @@ exclude_lines = [ + [tool.poetry.group.docs.dependencies] autodoc-pydantic = {version = "^2.0.1", python = ">=3.8"} numpydoc = {version = "^1.6.0", python = ">=3.8"} diff --git a/syncmaster/__init__.py b/syncmaster/__init__.py index 6a8b0526..6f3ce8c3 100644 --- a/syncmaster/__init__.py +++ b/syncmaster/__init__.py @@ -1,6 +1,6 @@ # SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) # SPDX-License-Identifier: Apache-2.0 -_raw_version = "0.1.4" +_raw_version = "0.1.5" # version always contain only release number like 0.0.1 __version__ = ".".join(_raw_version.split(".")[:3]) # noqa: WPS410 From 2695fdd87043d902db25c91c009bfdd9888a6741 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Mon, 15 Apr 2024 18:50:18 +0300 Subject: [PATCH 02/18] [DOP-15023] Remove docs and test dependencies from prod images --- .github/workflows/backend_docker_image.yml | 1 + docker/Dockerfile.backend | 4 +- docker/Dockerfile.worker | 4 +- pyproject.toml | 47 ++++++++++------------ 4 files changed, 26 insertions(+), 30 deletions(-) diff --git a/.github/workflows/backend_docker_image.yml b/.github/workflows/backend_docker_image.yml index 8149b214..bd47cfee 100644 --- a/.github/workflows/backend_docker_image.yml +++ b/.github/workflows/backend_docker_image.yml @@ -48,6 +48,7 @@ jobs: with: tags: ${{ env.TAG }} context: . + target: prod file: docker/Dockerfile.backend pull: true push: true diff --git a/docker/Dockerfile.backend b/docker/Dockerfile.backend index 057180f2..b86f6932 100644 --- a/docker/Dockerfile.backend +++ b/docker/Dockerfile.backend @@ -18,7 +18,7 @@ WORKDIR /app COPY ./pyproject.toml ./poetry.lock* /app/ RUN pip install --upgrade pip setuptools wheel packaging -RUN poetry install --no-root --extras "backend" +RUN poetry install --no-root --extras "backend" --without test,docs,dev COPY ./syncmaster/ /app/syncmaster/ ENV PYTHONPATH=/app @@ -29,5 +29,5 @@ ENTRYPOINT ["/app/entrypoint.sh"] FROM prod as test -RUN poetry install --no-root --extras "backend" --with test +RUN poetry install --no-root --extras "backend" --with test --without docs,dev RUN sed -i 's/python -m/coverage run -m/g' /app/entrypoint.sh diff --git a/docker/Dockerfile.worker b/docker/Dockerfile.worker index da880759..53fd72ff 100644 --- a/docker/Dockerfile.worker +++ b/docker/Dockerfile.worker @@ -26,7 +26,7 @@ WORKDIR /app COPY ./pyproject.toml ./poetry.lock* /app/ RUN pip install --upgrade pip setuptools wheel packaging -RUN poetry install --no-root --extras "worker" +RUN poetry install --no-root --extras "worker" --without test,docs,dev COPY ./syncmaster/ /app/syncmaster/ ENV PYTHONPATH=/app @@ -41,5 +41,5 @@ FROM prod as test ENV CREATE_SPARK_SESSION_FUNCTION=tests.spark.get_worker_spark_session.get_worker_spark_session # CI runs tests in the worker container, so we need backend dependencies too -RUN poetry install --no-root --extras "worker backend" --with test +RUN poetry install --no-root --extras "worker backend" --with test --without docs,dev RUN sed -i 's/python -m/coverage run -m/g' /app/entrypoint.sh diff --git a/pyproject.toml b/pyproject.toml index bd4f5b7e..1d2eba21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,7 @@ +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + [tool.poetry] name = "data-syncmaster" version = "0.1.5" @@ -95,9 +99,23 @@ platformdirs = "4.2.0" sqlalchemy = {extras = ["mypy"], version = "^2.0.18"} types-python-jose = "^3.3.4.7" -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" +[tool.poetry.group.docs.dependencies] +autodoc-pydantic = {version = "^2.0.1", python = ">=3.8"} +numpydoc = {version = "^1.6.0", python = ">=3.8"} +sphinx = [ + {version = "^7.1.2", python = ">=3.8"}, + {version = "^7.2.6", python = ">=3.9"}, +] +furo = {version = "^2024.1.29", python = ">=3.8"} +sphinx-copybutton = {version = "^0.5.2", python = ">=3.8"} +sphinxcontrib-towncrier = {version = "^0.4.0a0", python = ">=3.8"} +towncrier = {version = "^23.11.0", python = ">=3.8"} +sphinx-issues = {version = ">=3.0.1,<5.0.0", python = ">=3.8"} +sphinx-design = {version = "^0.5.0", python = ">=3.8"} +sphinx-favicon = {version = "^1.0.1", python = ">=3.8"} +sphinx-argparse = {version = "^0.4.0", python = ">=3.8"} +# uncomment after https://github.com/zqmillet/sphinx-plantuml/pull/4 +# sphinx-plantuml = {version = "^1.0.0", python = ">=3.8"} [tool.black] line-length = 120 @@ -197,29 +215,6 @@ exclude_lines = [ "def downgrade\\(\\)", ] - - - - - -[tool.poetry.group.docs.dependencies] -autodoc-pydantic = {version = "^2.0.1", python = ">=3.8"} -numpydoc = {version = "^1.6.0", python = ">=3.8"} -sphinx = [ - {version = "^7.1.2", python = ">=3.8"}, - {version = "^7.2.6", python = ">=3.9"}, -] -furo = {version = "^2024.1.29", python = ">=3.8"} -sphinx-copybutton = {version = "^0.5.2", python = ">=3.8"} -sphinxcontrib-towncrier = {version = "^0.4.0a0", python = ">=3.8"} -towncrier = {version = "^23.11.0", python = ">=3.8"} -sphinx-issues = {version = ">=3.0.1,<5.0.0", python = ">=3.8"} -sphinx-design = {version = "^0.5.0", python = ">=3.8"} -sphinx-favicon = {version = "^1.0.1", python = ">=3.8"} -sphinx-argparse = {version = "^0.4.0", python = ">=3.8"} -# uncomment after https://github.com/zqmillet/sphinx-plantuml/pull/4 -# sphinx-plantuml = {version = "^1.0.0", python = ">=3.8"} - [tool.towncrier] name = "Syncmaster" package = "syncmaster" From 642dce558f5fe33221361726ac4db4eb3de6a050 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Mon, 15 Apr 2024 19:02:45 +0300 Subject: [PATCH 03/18] [DOP-15023] Install Kerberos libs to worker image --- poetry.lock | 359 ++++++++++++++++++++++++++++++++++--------------- pyproject.toml | 2 +- 2 files changed, 253 insertions(+), 108 deletions(-) diff --git a/poetry.lock b/poetry.lock index c4ecabd6..79602b38 100644 --- a/poetry.lock +++ b/poetry.lock @@ -779,6 +779,17 @@ ssh = ["bcrypt (>=3.1.5)"] test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test-randomorder = ["pytest-randomly"] +[[package]] +name = "decorator" +version = "5.1.1" +description = "Decorators for Humans" +optional = false +python-versions = ">=3.5" +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] + [[package]] name = "distlib" version = "0.3.8" @@ -813,13 +824,13 @@ files = [ [[package]] name = "ecdsa" -version = "0.18.0" +version = "0.19.0" description = "ECDSA cryptographic signature library (pure python)" optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.6" files = [ - {file = "ecdsa-0.18.0-py2.py3-none-any.whl", hash = "sha256:80600258e7ed2f16b9aa1d7c295bd70194109ad5a30fdee0eaeefef1d4c559dd"}, - {file = "ecdsa-0.18.0.tar.gz", hash = "sha256:190348041559e21b22a1d65cee485282ca11a6f81d503fddb84d5017e9ed1e49"}, + {file = "ecdsa-0.19.0-py2.py3-none-any.whl", hash = "sha256:2cea9b88407fdac7bbeca0833b189e4c9c53f2ef1e1eaa29f6224dbc809b707a"}, + {file = "ecdsa-0.19.0.tar.gz", hash = "sha256:60eaad1199659900dd0af521ed462b793bbdf867432b3948e87416ae4caf6bf8"}, ] [package.dependencies] @@ -894,13 +905,13 @@ all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)" [[package]] name = "filelock" -version = "3.13.3" +version = "3.13.4" description = "A platform independent file lock." optional = false python-versions = ">=3.8" files = [ - {file = "filelock-3.13.3-py3-none-any.whl", hash = "sha256:5ffa845303983e7a0b7ae17636509bc97997d58afeafa72fb141a17b152284cb"}, - {file = "filelock-3.13.3.tar.gz", hash = "sha256:a79895a25bbefdf55d1a2a0a80968f7dbb28edcd6d4234a0afb3f37ecde4b546"}, + {file = "filelock-3.13.4-py3-none-any.whl", hash = "sha256:404e5e9253aa60ad457cae1be07c0f0ca90a63931200a47d9b6a6af84fd7b45f"}, + {file = "filelock-3.13.4.tar.gz", hash = "sha256:d13f466618bfde72bd2c18255e269f72542c6e70e7bac83a0232d6b1cc5c8cf4"}, ] [package.extras] @@ -926,47 +937,47 @@ pyflakes = ">=3.2.0,<3.3.0" [[package]] name = "frozendict" -version = "2.4.0" +version = "2.4.2" description = "A simple immutable dictionary" optional = false python-versions = ">=3.6" files = [ - {file = "frozendict-2.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:475c65202a6f5421df8cacb8a2f29c5087134a0542b0540ae95fbf4db7af2ff9"}, - {file = "frozendict-2.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2607e82efdd2c277224a58bda3994d4cd48e49eff7fa31e404cf3066e8dbfeae"}, - {file = "frozendict-2.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fd4583194baabe100c135883017da76259a315d34e303eddf198541b7e02e44"}, - {file = "frozendict-2.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efca7281184b54f7abab6980cf25837b709f72ced62791f62dabcd7b184d958a"}, - {file = "frozendict-2.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fc4cba1ced988ce9020dfcaae6fe3f5521eebc00c5772b511aaf691b0be91e6"}, - {file = "frozendict-2.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8fab616e7c0fea2ac928f107c740bd9ba516fc083adfcd1c391d6bfc9164403d"}, - {file = "frozendict-2.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:09ba8ee37d260adde311b8eb4cd12bf27f64071242f736757ae6a11d331eb860"}, - {file = "frozendict-2.4.0-cp310-cp310-win_arm64.whl", hash = "sha256:0615ed71570eec3cc96df063930ea6e563211efeeac86e3f3cc8bdfc9c9bfab7"}, - {file = "frozendict-2.4.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cc754117a7d60ba8e55b3c39abd67f37fbc05dd63cdcb03d1717a382fe0a3421"}, - {file = "frozendict-2.4.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2804ea4bd2179bb33b99483cc8d69246630cc00632b9affe2914e8666f1cc7e5"}, - {file = "frozendict-2.4.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd4700c3f0aebdc8f4375c35590135794b1dbf2aca132f4756b584fa9910af2d"}, - {file = "frozendict-2.4.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:da4406d95c340e0b1cc43a3858fac729f52689325bcf61a9182eb94aff7451dc"}, - {file = "frozendict-2.4.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:1875e7b70a5724bf964354da8fd542240d2cead0d80053ac96bf4494ce3517fa"}, - {file = "frozendict-2.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:a60f353496637ca21396289a7d969af1eb4ec4d11a7c37a0e7f25fc1761a0c97"}, - {file = "frozendict-2.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b666f9c6c8a9e794d2713a944b10a65480ff459579d75b5f686c75031c2c2dfc"}, - {file = "frozendict-2.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9d81fb396ea81fcba3b3dde4a4b51adcb74ff31632014fbfd030f8acd5a7292"}, - {file = "frozendict-2.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4925c8e82d2bd23d45996cd0827668a52b9c51103897c98ce409a763d0c00c61"}, - {file = "frozendict-2.4.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:aa86325da6a6071284b4ed3d9d2cd9db068560aebad503b658d6a889a0575683"}, - {file = "frozendict-2.4.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:5bb5b62d4e2bce12e91800496d94de41bec8f16e4d8a7b16e8f263676ae2031a"}, - {file = "frozendict-2.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:3909df909516cfd7bcefd9a3003948970a12a50c5648d8bbddafcef171f2117f"}, - {file = "frozendict-2.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:204f2c5c10fc018d1ba8ccc67758aa83fe769c782547bd26dc250317a7ccba71"}, - {file = "frozendict-2.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d8d1d269874c94b1ed2b6667e5e43dcf4541838019b1caa4c48f848ac73634df"}, - {file = "frozendict-2.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:809f1cffb602cf06e5186c69c0e3b74bec7a3684593145331f9aa2a65b5ba3b7"}, - {file = "frozendict-2.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b017cba5f73869b04c2977139ad08e57a7480de1e384c34193939698119baa1d"}, - {file = "frozendict-2.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0b75e5e231621dedaef88334997e79fbd137dd89895543d3862fe0220fc3572c"}, - {file = "frozendict-2.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:df3819a5d48ab3aae1548e62093d0111ad7c3b62ff9392421b7bbf149c08b629"}, - {file = "frozendict-2.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:42a9b33ccf9d417b22146e59803c53d5c39d7d9151d2df8df59c235f6a1a5ed7"}, - {file = "frozendict-2.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a3f51bfa64e0c4a6608e3f2878bab1211a6b3b197de6fa57151bbe73f1184457"}, - {file = "frozendict-2.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a1d232f092dc686e6ef23d436bde30f82c018f31cef1b89b31caef03814b1617"}, - {file = "frozendict-2.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9e530658134e88607ff8c2c8934a07b2bb5e9fffab5045f127746f6542c6c77e"}, - {file = "frozendict-2.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23a52bbea30c9e35b89291273944393770fb031e522a172e3aff19b62cc50047"}, - {file = "frozendict-2.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f91acaff475d0ef0d3436b805c9b91fc627a6a8a281771a24f7ab7f458a0b34f"}, - {file = "frozendict-2.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:08d9c7c1aa92b94538b3a79c43999f999012e174588435f197794d5e5a80e0f5"}, - {file = "frozendict-2.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:05c5a77957ecba4286c7ab33861a8f4f2badc7ea86fc82b834fb360d3aa4c108"}, - {file = "frozendict-2.4.0-cp39-cp39-win_arm64.whl", hash = "sha256:c8af8a6a39e0050d3f3193cda56c42b43534a9b3995c44241bb9527e3c3fd451"}, - {file = "frozendict-2.4.0.tar.gz", hash = "sha256:c26758198e403337933a92b01f417a8240c954f553e1d4b5e0f8e39d9c8e3f0a"}, + {file = "frozendict-2.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:19743495b1e92a7e4db56fcd6a5d36ea1d1b0f550822d6fd780e44d58f0b8c18"}, + {file = "frozendict-2.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:81efb4ea854a1c93d954a67389eaf78c508acb2d4768321a835cda2754ec5c01"}, + {file = "frozendict-2.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5f1a4d9662b854dce52b560b60f51349905dc871826b8c6be20141a13067a53"}, + {file = "frozendict-2.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1412aeb325e4a28cfe32106c66c046372bb7fd5a9af1748193549c5d01a9e9c1"}, + {file = "frozendict-2.4.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f7ce0535f02eba9746e4e2cf0abef0f0f2051d20fdccf4af31bc3d1adecf5a71"}, + {file = "frozendict-2.4.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:07153e6d2720fa1131bb180ce388c7042affb29561d8bcd1c0d6e683a8beaea2"}, + {file = "frozendict-2.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:f7a90ea6d5248617a1222daef07d22fb146ff07635a36db327e1ce114bf3e304"}, + {file = "frozendict-2.4.2-cp310-cp310-win_arm64.whl", hash = "sha256:20a6f741c92fdeb3766924cde42b8ee445cf568e3be8aa983cb83e9fe5b61e63"}, + {file = "frozendict-2.4.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:146129502cd9d96de64e0c8f7dc4c66422da3d4bfccf891dd80a3821b358a926"}, + {file = "frozendict-2.4.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ac1f74ccf818977abbc1868090c06436b8f06534d306f808f15cffc304ae046"}, + {file = "frozendict-2.4.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8d2ea4f10505ad15f53ce3742420682d916d0c4d566edb8e1019756e7cea30"}, + {file = "frozendict-2.4.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:4a5841681e70d2862ca153543f2912e0bab034bf29e2d3610e86ea42506121c2"}, + {file = "frozendict-2.4.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d4a10119f17552cbeab48d4ae830ba091c6d47616589618adc31f251184579a7"}, + {file = "frozendict-2.4.2-cp36-cp36m-win_amd64.whl", hash = "sha256:7d13ffe649e9db6f4bb5e107d9be7dfd23e13101bc69f97aa5fa6cbf6aecaadd"}, + {file = "frozendict-2.4.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:19e64630e164a297f83e9a1c69f1cd36fa4b3d1196c1f9fc006a0385aa198ea4"}, + {file = "frozendict-2.4.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bedb0a6587bae53bd53727b92a87c4cf90ad7a7e0bd2db562d439beb6982712e"}, + {file = "frozendict-2.4.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83cc9d063131fd8adbeb18a473d222b5dc8301cac9505cfe578158f9a9bf55a9"}, + {file = "frozendict-2.4.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:92c46b155ea9eb9ecabc66ba2d9030f2634319f55c6448688965ece094f14b51"}, + {file = "frozendict-2.4.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f958d40637e0440bce2453019821c94fe86cfc5f3847ae11cd4f02c3548b1d1b"}, + {file = "frozendict-2.4.2-cp37-cp37m-win_amd64.whl", hash = "sha256:ac954be447a907face9b652207fbd943b9b552212890db959ba653e8f1dc3f56"}, + {file = "frozendict-2.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f7e0ff5e84742604a1b42c2de4f1e67630c0868cf52a5c585b54a99e06f6b453"}, + {file = "frozendict-2.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:84c36bfa819cd8442f6e0bdb86413c7678b2822a46b1a22cfa0f0dd30d9e5c45"}, + {file = "frozendict-2.4.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cead3bfe70c90c634a9b76807c9d7e75e6c5666ec96fa2cea8e7412ccf22a1f8"}, + {file = "frozendict-2.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3fc6e3158107b5431255978b954758b1041cc70a3b8e7657373110512eb528e3"}, + {file = "frozendict-2.4.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4db1d6cc412bd865cab36723995208b82166a97bc6c724753bcd2b90cf24f164"}, + {file = "frozendict-2.4.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ff6fb5831539fffb09d71cc0cc0462b1f27c0160cb6c6fa2d1f4c1bc7fffe52a"}, + {file = "frozendict-2.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:79e1c94ad2a925ad5723d82a4134c6d851d5a7bc72b7e9da8b2087c42758a512"}, + {file = "frozendict-2.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:34704f9ffb21448d4b5c0f9239f8f058c0efab4bfdbe2956c5be978fef0b929c"}, + {file = "frozendict-2.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5280d685cd1659883a3010dec843afe3065416ae92e453498997d4474a898a39"}, + {file = "frozendict-2.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ca09a376114172e4d9918e6d576f58244c45e21f5af1245085699fd3a171c47"}, + {file = "frozendict-2.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55953aa2acf5bf183c664f3d0f540f8c8ac8f5fa97170f2098d413414318eb2b"}, + {file = "frozendict-2.4.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:476e4857e1d87b05c9102dd5409216ce4716cb7df619e6657429bc99279303cc"}, + {file = "frozendict-2.4.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4a8b298f39242d25770d029588ce9d4f524e9f4edc60d2d34b6178fb07c8a93e"}, + {file = "frozendict-2.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:c157b8a92743a7905b341edb0663044fecdc7780f96c59a2843d3da68d694b90"}, + {file = "frozendict-2.4.2-cp39-cp39-win_arm64.whl", hash = "sha256:cbab325c0a98b2f3ee291b36710623781b4977a3057f9103a7b0f11bcc23b177"}, + {file = "frozendict-2.4.2.tar.gz", hash = "sha256:741779e1d1a2e6bb2c623f78423bd5d14aad35dc0c57e6ccc89e54eaab5f1b8a"}, ] [[package]] @@ -1120,6 +1131,42 @@ files = [ docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] +[[package]] +name = "gssapi" +version = "1.8.3" +description = "Python GSSAPI Wrapper" +optional = false +python-versions = ">=3.7" +files = [ + {file = "gssapi-1.8.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4e4a83e9b275fe69b5d40be6d5479889866b80333a12c51a9243f2712d4f0554"}, + {file = "gssapi-1.8.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8d57d67547e18f4e44a688bfb20abbf176d1b8df547da2b31c3f2df03cfdc269"}, + {file = "gssapi-1.8.3-cp310-cp310-win32.whl", hash = "sha256:3a3f63105f39c4af29ffc8f7b6542053d87fe9d63010c689dd9a9f5571facb8e"}, + {file = "gssapi-1.8.3-cp310-cp310-win_amd64.whl", hash = "sha256:b031c0f186ab4275186da385b2c7470dd47c9b27522cb3b753757c9ac4bebf11"}, + {file = "gssapi-1.8.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b03d6b30f1fcd66d9a688b45a97e302e4dd3f1386d5c333442731aec73cdb409"}, + {file = "gssapi-1.8.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ca6ceb17fc15eda2a69f2e8c6cf10d11e2edb32832255e5d4c65b21b6db4680a"}, + {file = "gssapi-1.8.3-cp311-cp311-win32.whl", hash = "sha256:edc8ef3a9e397dbe18bb6016f8e2209969677b534316d20bb139da2865a38efe"}, + {file = "gssapi-1.8.3-cp311-cp311-win_amd64.whl", hash = "sha256:8fdb1ff130cee49bc865ec1624dee8cf445cd6c6e93b04bffef2c6f363a60cb9"}, + {file = "gssapi-1.8.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:19c373b3ba63ce19cd3163aa1495635e3d01b0de6cc4ff1126095eded1df6e01"}, + {file = "gssapi-1.8.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:37f1a8046d695f2c9b8d640a6e385780d3945c0741571ed6fee6f94c31e431dc"}, + {file = "gssapi-1.8.3-cp312-cp312-win32.whl", hash = "sha256:338db18612e3e6ed64e92b6d849242a535fdc98b365f21122992fb8cae737617"}, + {file = "gssapi-1.8.3-cp312-cp312-win_amd64.whl", hash = "sha256:5731c5b40ecc3116cfe7fb7e1d1e128583ec8b3df1e68bf8cd12073160793acd"}, + {file = "gssapi-1.8.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e556878da197ad115a566d36e46a8082d0079731d9c24d1ace795132d725ff2a"}, + {file = "gssapi-1.8.3-cp37-cp37m-win32.whl", hash = "sha256:e2bb081f2db2111377effe7d40ba23f9a87359b9d2f4881552b731e9da88b36b"}, + {file = "gssapi-1.8.3-cp37-cp37m-win_amd64.whl", hash = "sha256:4d9ed83f2064cda60aad90e6840ae282096801b2c814b8cbd390bf0df4635aab"}, + {file = "gssapi-1.8.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7d91fe6e2a5c89b32102ea8e374b8ae13b9031d43d7b55f3abc1f194ddce820d"}, + {file = "gssapi-1.8.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d5b28237afc0668046934792756dd4b6b7e957b0d95a608d02f296734a2819ad"}, + {file = "gssapi-1.8.3-cp38-cp38-win32.whl", hash = "sha256:791e44f7bea602b8e3da1ec56fbdb383b8ee3326fdeb736f904c2aa9af13a67d"}, + {file = "gssapi-1.8.3-cp38-cp38-win_amd64.whl", hash = "sha256:5b4bf84d0a6d7779a4bf11dacfd3db57ae02dd53562e2aeadac4219a68eaee07"}, + {file = "gssapi-1.8.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e40efc88ccefefd6142f8c47b8af498731938958b808bad49990442a91f45160"}, + {file = "gssapi-1.8.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ee74b9211c977b9181ff4652d886d7712c9a221560752a35393b58e5ea07887a"}, + {file = "gssapi-1.8.3-cp39-cp39-win32.whl", hash = "sha256:465c6788f2ac6ef7c738394ba8fde1ede6004e5721766f386add63891d8c90af"}, + {file = "gssapi-1.8.3-cp39-cp39-win_amd64.whl", hash = "sha256:8fb8ee70458f47b51ed881a6881f30b187c987c02af16cc0fff0079255d4d465"}, + {file = "gssapi-1.8.3.tar.gz", hash = "sha256:aa3c8d0b1526f52559552bb2c9d2d6be013d76a8e5db00b39a1db5727e93b0b0"}, +] + +[package.dependencies] +decorator = "*" + [[package]] name = "h11" version = "0.14.0" @@ -1226,13 +1273,13 @@ license = ["ukkonen"] [[package]] name = "idna" -version = "3.6" +version = "3.7" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.5" files = [ - {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, - {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, ] [[package]] @@ -1324,13 +1371,13 @@ i18n = ["Babel (>=2.7)"] [[package]] name = "kombu" -version = "5.3.6" +version = "5.3.7" description = "Messaging library for Python." optional = false python-versions = ">=3.8" files = [ - {file = "kombu-5.3.6-py3-none-any.whl", hash = "sha256:49f1e62b12369045de2662f62cc584e7df83481a513db83b01f87b5b9785e378"}, - {file = "kombu-5.3.6.tar.gz", hash = "sha256:f3da5b570a147a5da8280180aa80b03807283d63ea5081fcdb510d18242431d9"}, + {file = "kombu-5.3.7-py3-none-any.whl", hash = "sha256:5634c511926309c7f9789f1433e9ed402616b56836ef9878f01bd59267b4c7a9"}, + {file = "kombu-5.3.7.tar.gz", hash = "sha256:011c4cd9a355c14a1de8d35d257314a1d2456d52b7140388561acac3cf1a97bf"}, ] [package.dependencies] @@ -1354,15 +1401,36 @@ sqs = ["boto3 (>=1.26.143)", "pycurl (>=7.43.0.5)", "urllib3 (>=1.26.16)"] yaml = ["PyYAML (>=3.10)"] zookeeper = ["kazoo (>=2.8.0)"] +[[package]] +name = "krb5" +version = "0.5.1" +description = "Kerberos API bindings for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "krb5-0.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e51c700cf148e63fef60bc4b2c453018218a3170dedbfe2840f122aee5a453e7"}, + {file = "krb5-0.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ca9dcb23dc0014f79af0f151bb501bfe4f371b3e54bde78e79ea73dad272eda"}, + {file = "krb5-0.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:83d0a7d44130681f6a8168fc3609d783c77868fe1ab4a9861da30ae8212d632a"}, + {file = "krb5-0.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2269fb6c0813cd7f58526a152d746aebb8e48026b92856093865414395c185e9"}, + {file = "krb5-0.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f65fbbcf6de0fecee56a05370b6f65230c121a0cadad8e6a56f5a852bdeecaa6"}, + {file = "krb5-0.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e0412d84484bf37158f040baa86ac3c08604251f9d0afdf2e9659b237ce3cdfa"}, + {file = "krb5-0.5.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fb69fe96be7197f007b5b20172346728349d0b03a39b3343e8793fabb3d28626"}, + {file = "krb5-0.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:73badd6982e8af81e4cca82c4f1a6dbcc50257d700072b8df7c84ea003c1b5e4"}, + {file = "krb5-0.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a9b4109adf9f02d0885c96611aba1945970b5319a93cb427617049d6536921ac"}, + {file = "krb5-0.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:df15e3fe8b1d03cf715b5866215da6b131e1fd5ddd6e7f659e74bb79498033b2"}, + {file = "krb5-0.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3008124d01da50559ee7ac2ce0045ee069963f3086ec5c2460b07da6fae4fdda"}, + {file = "krb5-0.5.1.tar.gz", hash = "sha256:7125ee240dad951cc0a71e567c51b215238e490e87ad67b1af9a69dd90e63bca"}, +] + [[package]] name = "mako" -version = "1.3.2" +version = "1.3.3" description = "A super-fast templating language that borrows the best ideas from the existing templating languages." optional = true python-versions = ">=3.8" files = [ - {file = "Mako-1.3.2-py3-none-any.whl", hash = "sha256:32a99d70754dfce237019d17ffe4a282d2d3351b9c476e90d8a60e63f133b80c"}, - {file = "Mako-1.3.2.tar.gz", hash = "sha256:2a0c8ad7f6274271b3bb7467dd37cf9cc6dab4bc19cb69a4ef10669402de698e"}, + {file = "Mako-1.3.3-py3-none-any.whl", hash = "sha256:5324b88089a8978bf76d1629774fcc2f1c07b82acdf00f4c5dd8ceadfffc4b40"}, + {file = "Mako-1.3.3.tar.gz", hash = "sha256:e16c01d9ab9c11f7290eef1cfefc093fb5a45ee4a3da09e2fec2e4d1bae54e73"}, ] [package.dependencies] @@ -1666,6 +1734,7 @@ platformdirs = "*" pydantic = ">=1.9.2,<1.10.2 || >1.10.2,<3" pyspark = {version = "*", optional = true, markers = "extra == \"spark\""} pyyaml = "*" +requests-kerberos = {version = ">=0.7.0", optional = true, markers = "extra == \"kerberos\""} typing-extensions = {version = ">=4.5.0", markers = "python_version > \"3.7\""} [package.extras] @@ -1949,13 +2018,13 @@ files = [ [[package]] name = "pycparser" -version = "2.21" +version = "2.22" description = "C parser in Python" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.8" files = [ - {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, - {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, + {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, + {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] [[package]] @@ -2174,6 +2243,27 @@ mllib = ["numpy (>=1.15)"] pandas-on-spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] sql = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] +[[package]] +name = "pyspnego" +version = "0.10.2" +description = "Windows Negotiate Authentication Client and Server" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyspnego-0.10.2-py3-none-any.whl", hash = "sha256:3d5c5c28dbd0cd6a679acf45219630254db3c0e5ad4a16de521caa0585b088c0"}, + {file = "pyspnego-0.10.2.tar.gz", hash = "sha256:9a22c23aeae7b4424fdb2482450d3f8302ac012e2644e1cfe735cf468fcd12ed"}, +] + +[package.dependencies] +cryptography = "*" +gssapi = {version = ">=1.6.0", optional = true, markers = "sys_platform != \"win32\" and extra == \"kerberos\""} +krb5 = {version = ">=0.3.0", optional = true, markers = "sys_platform != \"win32\" and extra == \"kerberos\""} +sspilib = {version = ">=0.1.0", markers = "sys_platform == \"win32\""} + +[package.extras] +kerberos = ["gssapi (>=1.6.0)", "krb5 (>=0.3.0)"] +yaml = ["ruamel.yaml"] + [[package]] name = "pytest" version = "8.1.1" @@ -2402,6 +2492,22 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "requests-kerberos" +version = "0.14.0" +description = "A Kerberos authentication handler for python-requests" +optional = false +python-versions = ">=3.6" +files = [ + {file = "requests-kerberos-0.14.0.tar.gz", hash = "sha256:cda9d1240ae5392e081869881c8742d0e171fd6a893a7ac0875db2748e966fd1"}, + {file = "requests_kerberos-0.14.0-py2.py3-none-any.whl", hash = "sha256:da74ea478ccd8584de88092bdcd17a7c29d494374a340d1d8677189903c9ac6a"}, +] + +[package.dependencies] +cryptography = ">=1.3" +pyspnego = {version = "*", extras = ["kerberos"]} +requests = ">=1.1.0" + [[package]] name = "rich" version = "13.7.1" @@ -2436,18 +2542,18 @@ pyasn1 = ">=0.1.3" [[package]] name = "setuptools" -version = "69.2.0" +version = "69.5.1" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-69.2.0-py3-none-any.whl", hash = "sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c"}, - {file = "setuptools-69.2.0.tar.gz", hash = "sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e"}, + {file = "setuptools-69.5.1-py3-none-any.whl", hash = "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32"}, + {file = "setuptools-69.5.1.tar.gz", hash = "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987"}, ] [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.2)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] [[package]] @@ -2865,6 +2971,45 @@ test-all = ["Babel (>=1.3)", "Jinja2 (>=2.3)", "Pygments (>=1.2)", "arrow (>=0.3 timezone = ["python-dateutil"] url = ["furl (>=0.4.1)"] +[[package]] +name = "sspilib" +version = "0.1.0" +description = "SSPI API bindings for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sspilib-0.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5e43f3e684e9d29c80324bd54f52dac65ac4b18d81a2dcd529dce3994369a14d"}, + {file = "sspilib-0.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1eb34eda5d362b6603707a55751f1eff81775709b821e51cb64d1d2fa2bb8b6e"}, + {file = "sspilib-0.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ffe123f056f78cbe18aaed6b15f06e252020061c3387a72615abd46699a0b24"}, + {file = "sspilib-0.1.0-cp310-cp310-win32.whl", hash = "sha256:a4151072e28ec3b7d785beac9548a3d6a4549c431eb5487a5b8a1de028e9fef0"}, + {file = "sspilib-0.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:2a19696c7b96b6bbef2b2ddf35df5a92f09b268476a348390a2f0da18cf29510"}, + {file = "sspilib-0.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:d2778e5e2881405b4d359a604e2802f5b7a7ed433ff62d6073d04c203af10eb1"}, + {file = "sspilib-0.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:09d7f72ad5e4bbf9a8f1acf0d5f0c3f9fbe500f44c4a45ac24a99ece84f5654f"}, + {file = "sspilib-0.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e5705e11aaa030a61d2b0a2ce09d2b8a1962dd950e55adc7a3c87dd463c6878"}, + {file = "sspilib-0.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dced8213d311c56f5f38044716ebff5412cc156f19678659e8ffa9bb6a642bd7"}, + {file = "sspilib-0.1.0-cp311-cp311-win32.whl", hash = "sha256:d30d38d52dbd857732224e86ae3627d003cc510451083c69fa481fc7de88a7b6"}, + {file = "sspilib-0.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:61c9067168cce962f7fead42c28804c3a39a164b9a7b660200b8cfe31e3af071"}, + {file = "sspilib-0.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:b526b8e5a236553f5137b951b89a2f108f56138ad05f31fd0a51b10f80b6c3cc"}, + {file = "sspilib-0.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3ff356d40cd34c900f94f1591eaabd458284042af611ebc1dbf609002066dba5"}, + {file = "sspilib-0.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b0fee3a52d0acef090f6c9b49953a8400fdc1c10aca7334319414a3038aa493"}, + {file = "sspilib-0.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab52d190dad1d578ec40d1fb417a8571954f4e32f35442a14cb709f57d3acbc9"}, + {file = "sspilib-0.1.0-cp312-cp312-win32.whl", hash = "sha256:b3cf819094383ec883e9a63c11b81d622618c815c18a6c9d761d9a14d9f028d1"}, + {file = "sspilib-0.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:b83825a2c43ff84ddff72d09b098057efaabf3841d3c42888078e154cf8e9595"}, + {file = "sspilib-0.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:9aa6ab4c3fc1057251cf1f3f199daf90b99599cdfafc9eade8fdf0c01526dec8"}, + {file = "sspilib-0.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:82bff5df178386027d0112458b6971bbd18c76eb9e7be53fd61dab33d7bf8417"}, + {file = "sspilib-0.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:18393a9e6e0447cb7f319d361b65e9a0eaa5484705f16787133ffc49ad364c28"}, + {file = "sspilib-0.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88a423fbca206ba0ca811dc995d8c3af045402b7d330f033e938b24f3a1d93fc"}, + {file = "sspilib-0.1.0-cp38-cp38-win32.whl", hash = "sha256:86bd936b1ef0aa63c6d9623ad08473e74ceb15f342f6e92cbade15ed9574cd33"}, + {file = "sspilib-0.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:d4f688b94f0a64128444063e1d3d59152614175999222f6e2920681faea833f4"}, + {file = "sspilib-0.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2acef24e13e40d9dd8697eaae84ead9f417528ff741d087ec4eb4260518f4dc7"}, + {file = "sspilib-0.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4b625802d80144d856d5eb6e8f4412f186565758da4493c7ad1b88e3d6d353de"}, + {file = "sspilib-0.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c06ca1e34702bca1c750dcb5133b716f316b38dccb28d55a1a44d9842bc3f391"}, + {file = "sspilib-0.1.0-cp39-cp39-win32.whl", hash = "sha256:68496c9bd52b57a1b6d2e5529b43c30060249b8db901127b8343c4ad8cd93670"}, + {file = "sspilib-0.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:369727097f07a440099882580e284e137d9c27b7de354d63b65e327a454e7bee"}, + {file = "sspilib-0.1.0-cp39-cp39-win_arm64.whl", hash = "sha256:87d8268c0517149c51a53b3888961ebf66826bb3dbb82c4e5cf10108f5456104"}, + {file = "sspilib-0.1.0.tar.gz", hash = "sha256:58b5291553cf6220549c0f855e0e6973f4977375d8236ce47bb581efb3e9b1cf"}, +] + [[package]] name = "starlette" version = "0.37.2" @@ -2931,13 +3076,13 @@ dev = ["furo", "packaging", "sphinx (>=5)", "twisted"] [[package]] name = "types-pyasn1" -version = "0.5.0.20240301" +version = "0.6.0.20240402" description = "Typing stubs for pyasn1" optional = false python-versions = ">=3.8" files = [ - {file = "types-pyasn1-0.5.0.20240301.tar.gz", hash = "sha256:da328f5771d54a2016863270b281047f9cc38e39f65a297ba9f987d5de3403f1"}, - {file = "types_pyasn1-0.5.0.20240301-py3-none-any.whl", hash = "sha256:d9989899184bbd6e2adf6f812c8f49c48197fceea251a6fb13666dae3203f80d"}, + {file = "types-pyasn1-0.6.0.20240402.tar.gz", hash = "sha256:5d54dcb33f69dd269071ca098e923ac20c5f03c814631fa7f3ed9ee035a5da3a"}, + {file = "types_pyasn1-0.6.0.20240402-py3-none-any.whl", hash = "sha256:848d01e7313c200acc035a8b3d377fe7b2aecbe77f2be49eb160a7f82835aaaf"}, ] [[package]] @@ -2967,13 +3112,13 @@ files = [ [[package]] name = "typing-extensions" -version = "4.10.0" +version = "4.11.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.10.0-py3-none-any.whl", hash = "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475"}, - {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"}, + {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, + {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, ] [[package]] @@ -3099,54 +3244,54 @@ test = ["zope.testrunner"] [[package]] name = "zope-interface" -version = "6.2" +version = "6.3" description = "Interfaces for Python" optional = false python-versions = ">=3.7" files = [ - {file = "zope.interface-6.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:506f5410b36e5ba494136d9fa04c548eaf1a0d9c442b0b0e7a0944db7620e0ab"}, - {file = "zope.interface-6.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b386b8b9d2b6a5e1e4eadd4e62335571244cb9193b7328c2b6e38b64cfda4f0e"}, - {file = "zope.interface-6.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abb0b3f2cb606981c7432f690db23506b1db5899620ad274e29dbbbdd740e797"}, - {file = "zope.interface-6.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de7916380abaef4bb4891740879b1afcba2045aee51799dfd6d6ca9bdc71f35f"}, - {file = "zope.interface-6.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b240883fb43160574f8f738e6d09ddbdbf8fa3e8cea051603d9edfd947d9328"}, - {file = "zope.interface-6.2-cp310-cp310-win_amd64.whl", hash = "sha256:8af82afc5998e1f307d5e72712526dba07403c73a9e287d906a8aa2b1f2e33dd"}, - {file = "zope.interface-6.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4d45d2ba8195850e3e829f1f0016066a122bfa362cc9dc212527fc3d51369037"}, - {file = "zope.interface-6.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:76e0531d86523be7a46e15d379b0e975a9db84316617c0efe4af8338dc45b80c"}, - {file = "zope.interface-6.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59f7374769b326a217d0b2366f1c176a45a4ff21e8f7cebb3b4a3537077eff85"}, - {file = "zope.interface-6.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:25e0af9663eeac6b61b231b43c52293c2cb7f0c232d914bdcbfd3e3bd5c182ad"}, - {file = "zope.interface-6.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14e02a6fc1772b458ebb6be1c276528b362041217b9ca37e52ecea2cbdce9fac"}, - {file = "zope.interface-6.2-cp311-cp311-win_amd64.whl", hash = "sha256:02adbab560683c4eca3789cc0ac487dcc5f5a81cc48695ec247f00803cafe2fe"}, - {file = "zope.interface-6.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8f5d2c39f3283e461de3655e03faf10e4742bb87387113f787a7724f32db1e48"}, - {file = "zope.interface-6.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:75d2ec3d9b401df759b87bc9e19d1b24db73083147089b43ae748aefa63067ef"}, - {file = "zope.interface-6.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa994e8937e8ccc7e87395b7b35092818905cf27c651e3ff3e7f29729f5ce3ce"}, - {file = "zope.interface-6.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ede888382882f07b9e4cd942255921ffd9f2901684198b88e247c7eabd27a000"}, - {file = "zope.interface-6.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2606955a06c6852a6cff4abeca38346ed01e83f11e960caa9a821b3626a4467b"}, - {file = "zope.interface-6.2-cp312-cp312-win_amd64.whl", hash = "sha256:ac7c2046d907e3b4e2605a130d162b1b783c170292a11216479bb1deb7cadebe"}, - {file = "zope.interface-6.2-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:febceb04ee7dd2aef08c2ff3d6f8a07de3052fc90137c507b0ede3ea80c21440"}, - {file = "zope.interface-6.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fc711acc4a1c702ca931fdbf7bf7c86f2a27d564c85c4964772dadf0e3c52f5"}, - {file = "zope.interface-6.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:396f5c94654301819a7f3a702c5830f0ea7468d7b154d124ceac823e2419d000"}, - {file = "zope.interface-6.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd374927c00764fcd6fe1046bea243ebdf403fba97a937493ae4be2c8912c2b"}, - {file = "zope.interface-6.2-cp37-cp37m-win_amd64.whl", hash = "sha256:a3046e8ab29b590d723821d0785598e0b2e32b636a0272a38409be43e3ae0550"}, - {file = "zope.interface-6.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:de125151a53ecdb39df3cb3deb9951ed834dd6a110a9e795d985b10bb6db4532"}, - {file = "zope.interface-6.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f444de0565db46d26c9fa931ca14f497900a295bd5eba480fc3fad25af8c763e"}, - {file = "zope.interface-6.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e2fefad268ff5c5b314794e27e359e48aeb9c8bb2cbb5748a071757a56f6bb8f"}, - {file = "zope.interface-6.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:97785604824981ec8c81850dd25c8071d5ce04717a34296eeac771231fbdd5cd"}, - {file = "zope.interface-6.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7b2bed4eea047a949296e618552d3fed00632dc1b795ee430289bdd0e3717f3"}, - {file = "zope.interface-6.2-cp38-cp38-win_amd64.whl", hash = "sha256:d54f66c511ea01b9ef1d1a57420a93fbb9d48a08ec239f7d9c581092033156d0"}, - {file = "zope.interface-6.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5ee9789a20b0081dc469f65ff6c5007e67a940d5541419ca03ef20c6213dd099"}, - {file = "zope.interface-6.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:af27b3fe5b6bf9cd01b8e1c5ddea0a0d0a1b8c37dc1c7452f1e90bf817539c6d"}, - {file = "zope.interface-6.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4bce517b85f5debe07b186fc7102b332676760f2e0c92b7185dd49c138734b70"}, - {file = "zope.interface-6.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4ae9793f114cee5c464cc0b821ae4d36e1eba961542c6086f391a61aee167b6f"}, - {file = "zope.interface-6.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e87698e2fea5ca2f0a99dff0a64ce8110ea857b640de536c76d92aaa2a91ff3a"}, - {file = "zope.interface-6.2-cp39-cp39-win_amd64.whl", hash = "sha256:b66335bbdbb4c004c25ae01cc4a54fd199afbc1fd164233813c6d3c2293bb7e1"}, - {file = "zope.interface-6.2.tar.gz", hash = "sha256:3b6c62813c63c543a06394a636978b22dffa8c5410affc9331ce6cdb5bfa8565"}, + {file = "zope.interface-6.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f32010ffb87759c6a3ad1c65ed4d2e38e51f6b430a1ca11cee901ec2b42e021"}, + {file = "zope.interface-6.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e78a183a3c2f555c2ad6aaa1ab572d1c435ba42f1dc3a7e8c82982306a19b785"}, + {file = "zope.interface-6.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa0491a9f154cf8519a02026dc85a416192f4cb1efbbf32db4a173ba28b289a"}, + {file = "zope.interface-6.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62e32f02b3f26204d9c02c3539c802afc3eefb19d601a0987836ed126efb1f21"}, + {file = "zope.interface-6.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c40df4aea777be321b7e68facb901bc67317e94b65d9ab20fb96e0eb3c0b60a1"}, + {file = "zope.interface-6.3-cp310-cp310-win_amd64.whl", hash = "sha256:46034be614d1f75f06e7dcfefba21d609b16b38c21fc912b01a99cb29e58febb"}, + {file = "zope.interface-6.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:600101f43a7582d5b9504a7c629a1185a849ce65e60fca0f6968dfc4b76b6d39"}, + {file = "zope.interface-6.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4d6b229f5e1a6375f206455cc0a63a8e502ed190fe7eb15e94a312dc69d40299"}, + {file = "zope.interface-6.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10cde8dc6b2fd6a1d0b5ca4be820063e46ddba417ab82bcf55afe2227337b130"}, + {file = "zope.interface-6.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40aa8c8e964d47d713b226c5baf5f13cdf3a3169c7a2653163b17ff2e2334d10"}, + {file = "zope.interface-6.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d165d7774d558ea971cb867739fb334faf68fc4756a784e689e11efa3becd59e"}, + {file = "zope.interface-6.3-cp311-cp311-win_amd64.whl", hash = "sha256:69dedb790530c7ca5345899a1b4cb837cc53ba669051ea51e8c18f82f9389061"}, + {file = "zope.interface-6.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8d407e0fd8015f6d5dfad481309638e1968d70e6644e0753f229154667dd6cd5"}, + {file = "zope.interface-6.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:72d5efecad16c619a97744a4f0b67ce1bcc88115aa82fcf1dc5be9bb403bcc0b"}, + {file = "zope.interface-6.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:567d54c06306f9c5b6826190628d66753b9f2b0422f4c02d7c6d2b97ebf0a24e"}, + {file = "zope.interface-6.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:483e118b1e075f1819b3c6ace082b9d7d3a6a5eb14b2b375f1b80a0868117920"}, + {file = "zope.interface-6.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb78c12c1ad3a20c0d981a043d133299117b6854f2e14893b156979ed4e1d2c"}, + {file = "zope.interface-6.3-cp312-cp312-win_amd64.whl", hash = "sha256:ad4524289d8dbd6fb5aa17aedb18f5643e7d48358f42c007a5ee51a2afc2a7c5"}, + {file = "zope.interface-6.3-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:a56fe1261230093bfeedc1c1a6cd6f3ec568f9b07f031c9a09f46b201f793a85"}, + {file = "zope.interface-6.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:014bb94fe6bf1786da1aa044eadf65bc6437bcb81c451592987e5be91e70a91e"}, + {file = "zope.interface-6.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22e8a218e8e2d87d4d9342aa973b7915297a08efbebea5b25900c73e78ed468e"}, + {file = "zope.interface-6.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f95bebd0afe86b2adc074df29edb6848fc4d474ff24075e2c263d698774e108d"}, + {file = "zope.interface-6.3-cp37-cp37m-win_amd64.whl", hash = "sha256:d0e7321557c702bd92dac3c66a2f22b963155fdb4600133b6b29597f62b71b12"}, + {file = "zope.interface-6.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:187f7900b63845dcdef1be320a523dbbdba94d89cae570edc2781eb55f8c2f86"}, + {file = "zope.interface-6.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a058e6cf8d68a5a19cb5449f42a404f0d6c2778b897e6ce8fadda9cea308b1b0"}, + {file = "zope.interface-6.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8fa0fb05083a1a4216b4b881fdefa71c5d9a106e9b094cd4399af6b52873e91"}, + {file = "zope.interface-6.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:26c9a37fb395a703e39b11b00b9e921c48f82b6e32cc5851ad5d0618cd8876b5"}, + {file = "zope.interface-6.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b0c4c90e5eefca2c3e045d9f9ed9f1e2cdbe70eb906bff6b247e17119ad89a1"}, + {file = "zope.interface-6.3-cp38-cp38-win_amd64.whl", hash = "sha256:5683aa8f2639016fd2b421df44301f10820e28a9b96382a6e438e5c6427253af"}, + {file = "zope.interface-6.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2c3cfb272bcb83650e6695d49ae0d14dd06dc694789a3d929f23758557a23d92"}, + {file = "zope.interface-6.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:01a0b3dd012f584afcf03ed814bce0fc40ed10e47396578621509ac031be98bf"}, + {file = "zope.interface-6.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4137025731e824eee8d263b20682b28a0bdc0508de9c11d6c6be54163e5b7c83"}, + {file = "zope.interface-6.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c8731596198198746f7ce2a4487a0edcbc9ea5e5918f0ab23c4859bce56055c"}, + {file = "zope.interface-6.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf34840e102d1d0b2d39b1465918d90b312b1119552cebb61a242c42079817b9"}, + {file = "zope.interface-6.3-cp39-cp39-win_amd64.whl", hash = "sha256:a1adc14a2a9d5e95f76df625a9b39f4709267a483962a572e3f3001ef90ea6e6"}, + {file = "zope.interface-6.3.tar.gz", hash = "sha256:f83d6b4b22262d9a826c3bd4b2fbfafe1d0000f085ef8e44cd1328eea274ae6a"}, ] [package.dependencies] setuptools = "*" [package.extras] -docs = ["Sphinx", "repoze.sphinx.autointerface", "sphinx_rtd_theme"] +docs = ["Sphinx", "repoze.sphinx.autointerface", "sphinx-rtd-theme"] test = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] @@ -3157,4 +3302,4 @@ worker = ["onetl", "psycopg2-binary"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "0a823a381af17d26a97485fd57425bdd141dcace9a047be6f1e77e77c2ae5408" +content-hash = "d7ccf6b7a56333be37973074706e0638929e349f2c80b384e847afe6f40984ff" diff --git a/pyproject.toml b/pyproject.toml index 1d2eba21..c9d47100 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ pydantic = "^2.6.4" python-jose = {extras = ["cryptography"], version = "^3.3.0"} python-multipart = "^0.0.9" celery = "^5.3.3" -onetl = {version = "^0.10.2", extras = ["spark"]} +onetl = {version = "^0.10.2", extras = ["spark", "kerberos"]} psycopg2-binary = {version = "^2.9.7", optional = true } fastapi = {version = "^0.110.0", optional = true} uvicorn = {version = "^0.29.0", optional = true } From d7099769b20de245c132eacb9a63d9b60540ad01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Mon, 15 Apr 2024 19:09:20 +0300 Subject: [PATCH 04/18] [DOP-15023] Install Kerberos libs to worker image --- docker/Dockerfile.worker | 4 ++-- poetry.lock | 16 ++++++++-------- pyproject.toml | 7 ++++++- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/docker/Dockerfile.worker b/docker/Dockerfile.worker index 53fd72ff..d3a7e71b 100644 --- a/docker/Dockerfile.worker +++ b/docker/Dockerfile.worker @@ -26,7 +26,7 @@ WORKDIR /app COPY ./pyproject.toml ./poetry.lock* /app/ RUN pip install --upgrade pip setuptools wheel packaging -RUN poetry install --no-root --extras "worker" --without test,docs,dev +RUN poetry install --no-root --extras "worker kerberos" --without test,docs,dev COPY ./syncmaster/ /app/syncmaster/ ENV PYTHONPATH=/app @@ -41,5 +41,5 @@ FROM prod as test ENV CREATE_SPARK_SESSION_FUNCTION=tests.spark.get_worker_spark_session.get_worker_spark_session # CI runs tests in the worker container, so we need backend dependencies too -RUN poetry install --no-root --extras "worker backend" --with test --without docs,dev +RUN poetry install --no-root --extras "worker backend kerberos" --with test --without docs,dev RUN sed -i 's/python -m/coverage run -m/g' /app/entrypoint.sh diff --git a/poetry.lock b/poetry.lock index 79602b38..2257160b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -783,7 +783,7 @@ test-randomorder = ["pytest-randomly"] name = "decorator" version = "5.1.1" description = "Decorators for Humans" -optional = false +optional = true python-versions = ">=3.5" files = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, @@ -1135,7 +1135,7 @@ test = ["objgraph", "psutil"] name = "gssapi" version = "1.8.3" description = "Python GSSAPI Wrapper" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "gssapi-1.8.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4e4a83e9b275fe69b5d40be6d5479889866b80333a12c51a9243f2712d4f0554"}, @@ -1405,7 +1405,7 @@ zookeeper = ["kazoo (>=2.8.0)"] name = "krb5" version = "0.5.1" description = "Kerberos API bindings for Python" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "krb5-0.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e51c700cf148e63fef60bc4b2c453018218a3170dedbfe2840f122aee5a453e7"}, @@ -1734,7 +1734,6 @@ platformdirs = "*" pydantic = ">=1.9.2,<1.10.2 || >1.10.2,<3" pyspark = {version = "*", optional = true, markers = "extra == \"spark\""} pyyaml = "*" -requests-kerberos = {version = ">=0.7.0", optional = true, markers = "extra == \"kerberos\""} typing-extensions = {version = ">=4.5.0", markers = "python_version > \"3.7\""} [package.extras] @@ -2247,7 +2246,7 @@ sql = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] name = "pyspnego" version = "0.10.2" description = "Windows Negotiate Authentication Client and Server" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "pyspnego-0.10.2-py3-none-any.whl", hash = "sha256:3d5c5c28dbd0cd6a679acf45219630254db3c0e5ad4a16de521caa0585b088c0"}, @@ -2496,7 +2495,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "requests-kerberos" version = "0.14.0" description = "A Kerberos authentication handler for python-requests" -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "requests-kerberos-0.14.0.tar.gz", hash = "sha256:cda9d1240ae5392e081869881c8742d0e171fd6a893a7ac0875db2748e966fd1"}, @@ -2975,7 +2974,7 @@ url = ["furl (>=0.4.1)"] name = "sspilib" version = "0.1.0" description = "SSPI API bindings for Python" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "sspilib-0.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5e43f3e684e9d29c80324bd54f52dac65ac4b18d81a2dcd529dce3994369a14d"}, @@ -3297,9 +3296,10 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [extras] backend = ["alembic", "asyncpg", "fastapi", "uvicorn"] +kerberos = ["requests-kerberos"] worker = ["onetl", "psycopg2-binary"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "d7ccf6b7a56333be37973074706e0638929e349f2c80b384e847afe6f40984ff" +content-hash = "7f3cb7e67bade09a3d4be6c42496eeab07574e334b6cab42e6ddef43c9f5018d" diff --git a/pyproject.toml b/pyproject.toml index c9d47100..5aec0186 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,8 @@ pydantic = "^2.6.4" python-jose = {extras = ["cryptography"], version = "^3.3.0"} python-multipart = "^0.0.9" celery = "^5.3.3" -onetl = {version = "^0.10.2", extras = ["spark", "kerberos"]} +onetl = {version = "^0.10.2", extras = ["spark"]} +requests-kerberos = {version = "^0.14.0", optional = true} psycopg2-binary = {version = "^2.9.7", optional = true } fastapi = {version = "^0.110.0", optional = true} uvicorn = {version = "^0.29.0", optional = true } @@ -74,6 +75,10 @@ worker = [ "psycopg2-binary", ] +kerberos = [ + "requests-kerberos", +] + [tool.poetry.group.test.dependencies] pandas-stubs = "^2.1.4.231227" pytest = "^8.1.1" From b2f8cd7b64b42b750a6a3686ce4bbb4da9de6960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Mon, 15 Apr 2024 19:11:26 +0300 Subject: [PATCH 05/18] [DOP-15023] Remove Kerberos libs from worker image --- docker/Dockerfile.worker | 6 +- poetry.lock | 147 +-------------------------------------- pyproject.toml | 5 -- 3 files changed, 3 insertions(+), 155 deletions(-) diff --git a/docker/Dockerfile.worker b/docker/Dockerfile.worker index d3a7e71b..5cc28c70 100644 --- a/docker/Dockerfile.worker +++ b/docker/Dockerfile.worker @@ -2,8 +2,6 @@ ARG BASE_IMAGE=python:3.11-slim FROM $BASE_IMAGE AS prod RUN apt-get update && apt-get install -y \ - krb5-user \ - libkrb5-dev \ libsasl2-dev \ libsasl2-modules-gssapi-mit \ libsasl2-modules-ldap \ @@ -26,7 +24,7 @@ WORKDIR /app COPY ./pyproject.toml ./poetry.lock* /app/ RUN pip install --upgrade pip setuptools wheel packaging -RUN poetry install --no-root --extras "worker kerberos" --without test,docs,dev +RUN poetry install --no-root --extras "worker" --without test,docs,dev COPY ./syncmaster/ /app/syncmaster/ ENV PYTHONPATH=/app @@ -41,5 +39,5 @@ FROM prod as test ENV CREATE_SPARK_SESSION_FUNCTION=tests.spark.get_worker_spark_session.get_worker_spark_session # CI runs tests in the worker container, so we need backend dependencies too -RUN poetry install --no-root --extras "worker backend kerberos" --with test --without docs,dev +RUN poetry install --no-root --extras "worker backend" --with test --without docs,dev RUN sed -i 's/python -m/coverage run -m/g' /app/entrypoint.sh diff --git a/poetry.lock b/poetry.lock index 2257160b..e5dc51de 100644 --- a/poetry.lock +++ b/poetry.lock @@ -779,17 +779,6 @@ ssh = ["bcrypt (>=3.1.5)"] test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test-randomorder = ["pytest-randomly"] -[[package]] -name = "decorator" -version = "5.1.1" -description = "Decorators for Humans" -optional = true -python-versions = ">=3.5" -files = [ - {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, - {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, -] - [[package]] name = "distlib" version = "0.3.8" @@ -1131,42 +1120,6 @@ files = [ docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] -[[package]] -name = "gssapi" -version = "1.8.3" -description = "Python GSSAPI Wrapper" -optional = true -python-versions = ">=3.7" -files = [ - {file = "gssapi-1.8.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4e4a83e9b275fe69b5d40be6d5479889866b80333a12c51a9243f2712d4f0554"}, - {file = "gssapi-1.8.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8d57d67547e18f4e44a688bfb20abbf176d1b8df547da2b31c3f2df03cfdc269"}, - {file = "gssapi-1.8.3-cp310-cp310-win32.whl", hash = "sha256:3a3f63105f39c4af29ffc8f7b6542053d87fe9d63010c689dd9a9f5571facb8e"}, - {file = "gssapi-1.8.3-cp310-cp310-win_amd64.whl", hash = "sha256:b031c0f186ab4275186da385b2c7470dd47c9b27522cb3b753757c9ac4bebf11"}, - {file = "gssapi-1.8.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b03d6b30f1fcd66d9a688b45a97e302e4dd3f1386d5c333442731aec73cdb409"}, - {file = "gssapi-1.8.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ca6ceb17fc15eda2a69f2e8c6cf10d11e2edb32832255e5d4c65b21b6db4680a"}, - {file = "gssapi-1.8.3-cp311-cp311-win32.whl", hash = "sha256:edc8ef3a9e397dbe18bb6016f8e2209969677b534316d20bb139da2865a38efe"}, - {file = "gssapi-1.8.3-cp311-cp311-win_amd64.whl", hash = "sha256:8fdb1ff130cee49bc865ec1624dee8cf445cd6c6e93b04bffef2c6f363a60cb9"}, - {file = "gssapi-1.8.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:19c373b3ba63ce19cd3163aa1495635e3d01b0de6cc4ff1126095eded1df6e01"}, - {file = "gssapi-1.8.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:37f1a8046d695f2c9b8d640a6e385780d3945c0741571ed6fee6f94c31e431dc"}, - {file = "gssapi-1.8.3-cp312-cp312-win32.whl", hash = "sha256:338db18612e3e6ed64e92b6d849242a535fdc98b365f21122992fb8cae737617"}, - {file = "gssapi-1.8.3-cp312-cp312-win_amd64.whl", hash = "sha256:5731c5b40ecc3116cfe7fb7e1d1e128583ec8b3df1e68bf8cd12073160793acd"}, - {file = "gssapi-1.8.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e556878da197ad115a566d36e46a8082d0079731d9c24d1ace795132d725ff2a"}, - {file = "gssapi-1.8.3-cp37-cp37m-win32.whl", hash = "sha256:e2bb081f2db2111377effe7d40ba23f9a87359b9d2f4881552b731e9da88b36b"}, - {file = "gssapi-1.8.3-cp37-cp37m-win_amd64.whl", hash = "sha256:4d9ed83f2064cda60aad90e6840ae282096801b2c814b8cbd390bf0df4635aab"}, - {file = "gssapi-1.8.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7d91fe6e2a5c89b32102ea8e374b8ae13b9031d43d7b55f3abc1f194ddce820d"}, - {file = "gssapi-1.8.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d5b28237afc0668046934792756dd4b6b7e957b0d95a608d02f296734a2819ad"}, - {file = "gssapi-1.8.3-cp38-cp38-win32.whl", hash = "sha256:791e44f7bea602b8e3da1ec56fbdb383b8ee3326fdeb736f904c2aa9af13a67d"}, - {file = "gssapi-1.8.3-cp38-cp38-win_amd64.whl", hash = "sha256:5b4bf84d0a6d7779a4bf11dacfd3db57ae02dd53562e2aeadac4219a68eaee07"}, - {file = "gssapi-1.8.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e40efc88ccefefd6142f8c47b8af498731938958b808bad49990442a91f45160"}, - {file = "gssapi-1.8.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ee74b9211c977b9181ff4652d886d7712c9a221560752a35393b58e5ea07887a"}, - {file = "gssapi-1.8.3-cp39-cp39-win32.whl", hash = "sha256:465c6788f2ac6ef7c738394ba8fde1ede6004e5721766f386add63891d8c90af"}, - {file = "gssapi-1.8.3-cp39-cp39-win_amd64.whl", hash = "sha256:8fb8ee70458f47b51ed881a6881f30b187c987c02af16cc0fff0079255d4d465"}, - {file = "gssapi-1.8.3.tar.gz", hash = "sha256:aa3c8d0b1526f52559552bb2c9d2d6be013d76a8e5db00b39a1db5727e93b0b0"}, -] - -[package.dependencies] -decorator = "*" - [[package]] name = "h11" version = "0.14.0" @@ -1401,27 +1354,6 @@ sqs = ["boto3 (>=1.26.143)", "pycurl (>=7.43.0.5)", "urllib3 (>=1.26.16)"] yaml = ["PyYAML (>=3.10)"] zookeeper = ["kazoo (>=2.8.0)"] -[[package]] -name = "krb5" -version = "0.5.1" -description = "Kerberos API bindings for Python" -optional = true -python-versions = ">=3.7" -files = [ - {file = "krb5-0.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e51c700cf148e63fef60bc4b2c453018218a3170dedbfe2840f122aee5a453e7"}, - {file = "krb5-0.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ca9dcb23dc0014f79af0f151bb501bfe4f371b3e54bde78e79ea73dad272eda"}, - {file = "krb5-0.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:83d0a7d44130681f6a8168fc3609d783c77868fe1ab4a9861da30ae8212d632a"}, - {file = "krb5-0.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2269fb6c0813cd7f58526a152d746aebb8e48026b92856093865414395c185e9"}, - {file = "krb5-0.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f65fbbcf6de0fecee56a05370b6f65230c121a0cadad8e6a56f5a852bdeecaa6"}, - {file = "krb5-0.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e0412d84484bf37158f040baa86ac3c08604251f9d0afdf2e9659b237ce3cdfa"}, - {file = "krb5-0.5.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fb69fe96be7197f007b5b20172346728349d0b03a39b3343e8793fabb3d28626"}, - {file = "krb5-0.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:73badd6982e8af81e4cca82c4f1a6dbcc50257d700072b8df7c84ea003c1b5e4"}, - {file = "krb5-0.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a9b4109adf9f02d0885c96611aba1945970b5319a93cb427617049d6536921ac"}, - {file = "krb5-0.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:df15e3fe8b1d03cf715b5866215da6b131e1fd5ddd6e7f659e74bb79498033b2"}, - {file = "krb5-0.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3008124d01da50559ee7ac2ce0045ee069963f3086ec5c2460b07da6fae4fdda"}, - {file = "krb5-0.5.1.tar.gz", hash = "sha256:7125ee240dad951cc0a71e567c51b215238e490e87ad67b1af9a69dd90e63bca"}, -] - [[package]] name = "mako" version = "1.3.3" @@ -2242,27 +2174,6 @@ mllib = ["numpy (>=1.15)"] pandas-on-spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] sql = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] -[[package]] -name = "pyspnego" -version = "0.10.2" -description = "Windows Negotiate Authentication Client and Server" -optional = true -python-versions = ">=3.8" -files = [ - {file = "pyspnego-0.10.2-py3-none-any.whl", hash = "sha256:3d5c5c28dbd0cd6a679acf45219630254db3c0e5ad4a16de521caa0585b088c0"}, - {file = "pyspnego-0.10.2.tar.gz", hash = "sha256:9a22c23aeae7b4424fdb2482450d3f8302ac012e2644e1cfe735cf468fcd12ed"}, -] - -[package.dependencies] -cryptography = "*" -gssapi = {version = ">=1.6.0", optional = true, markers = "sys_platform != \"win32\" and extra == \"kerberos\""} -krb5 = {version = ">=0.3.0", optional = true, markers = "sys_platform != \"win32\" and extra == \"kerberos\""} -sspilib = {version = ">=0.1.0", markers = "sys_platform == \"win32\""} - -[package.extras] -kerberos = ["gssapi (>=1.6.0)", "krb5 (>=0.3.0)"] -yaml = ["ruamel.yaml"] - [[package]] name = "pytest" version = "8.1.1" @@ -2491,22 +2402,6 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] -[[package]] -name = "requests-kerberos" -version = "0.14.0" -description = "A Kerberos authentication handler for python-requests" -optional = true -python-versions = ">=3.6" -files = [ - {file = "requests-kerberos-0.14.0.tar.gz", hash = "sha256:cda9d1240ae5392e081869881c8742d0e171fd6a893a7ac0875db2748e966fd1"}, - {file = "requests_kerberos-0.14.0-py2.py3-none-any.whl", hash = "sha256:da74ea478ccd8584de88092bdcd17a7c29d494374a340d1d8677189903c9ac6a"}, -] - -[package.dependencies] -cryptography = ">=1.3" -pyspnego = {version = "*", extras = ["kerberos"]} -requests = ">=1.1.0" - [[package]] name = "rich" version = "13.7.1" @@ -2970,45 +2865,6 @@ test-all = ["Babel (>=1.3)", "Jinja2 (>=2.3)", "Pygments (>=1.2)", "arrow (>=0.3 timezone = ["python-dateutil"] url = ["furl (>=0.4.1)"] -[[package]] -name = "sspilib" -version = "0.1.0" -description = "SSPI API bindings for Python" -optional = true -python-versions = ">=3.8" -files = [ - {file = "sspilib-0.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5e43f3e684e9d29c80324bd54f52dac65ac4b18d81a2dcd529dce3994369a14d"}, - {file = "sspilib-0.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1eb34eda5d362b6603707a55751f1eff81775709b821e51cb64d1d2fa2bb8b6e"}, - {file = "sspilib-0.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ffe123f056f78cbe18aaed6b15f06e252020061c3387a72615abd46699a0b24"}, - {file = "sspilib-0.1.0-cp310-cp310-win32.whl", hash = "sha256:a4151072e28ec3b7d785beac9548a3d6a4549c431eb5487a5b8a1de028e9fef0"}, - {file = "sspilib-0.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:2a19696c7b96b6bbef2b2ddf35df5a92f09b268476a348390a2f0da18cf29510"}, - {file = "sspilib-0.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:d2778e5e2881405b4d359a604e2802f5b7a7ed433ff62d6073d04c203af10eb1"}, - {file = "sspilib-0.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:09d7f72ad5e4bbf9a8f1acf0d5f0c3f9fbe500f44c4a45ac24a99ece84f5654f"}, - {file = "sspilib-0.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e5705e11aaa030a61d2b0a2ce09d2b8a1962dd950e55adc7a3c87dd463c6878"}, - {file = "sspilib-0.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dced8213d311c56f5f38044716ebff5412cc156f19678659e8ffa9bb6a642bd7"}, - {file = "sspilib-0.1.0-cp311-cp311-win32.whl", hash = "sha256:d30d38d52dbd857732224e86ae3627d003cc510451083c69fa481fc7de88a7b6"}, - {file = "sspilib-0.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:61c9067168cce962f7fead42c28804c3a39a164b9a7b660200b8cfe31e3af071"}, - {file = "sspilib-0.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:b526b8e5a236553f5137b951b89a2f108f56138ad05f31fd0a51b10f80b6c3cc"}, - {file = "sspilib-0.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3ff356d40cd34c900f94f1591eaabd458284042af611ebc1dbf609002066dba5"}, - {file = "sspilib-0.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b0fee3a52d0acef090f6c9b49953a8400fdc1c10aca7334319414a3038aa493"}, - {file = "sspilib-0.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab52d190dad1d578ec40d1fb417a8571954f4e32f35442a14cb709f57d3acbc9"}, - {file = "sspilib-0.1.0-cp312-cp312-win32.whl", hash = "sha256:b3cf819094383ec883e9a63c11b81d622618c815c18a6c9d761d9a14d9f028d1"}, - {file = "sspilib-0.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:b83825a2c43ff84ddff72d09b098057efaabf3841d3c42888078e154cf8e9595"}, - {file = "sspilib-0.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:9aa6ab4c3fc1057251cf1f3f199daf90b99599cdfafc9eade8fdf0c01526dec8"}, - {file = "sspilib-0.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:82bff5df178386027d0112458b6971bbd18c76eb9e7be53fd61dab33d7bf8417"}, - {file = "sspilib-0.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:18393a9e6e0447cb7f319d361b65e9a0eaa5484705f16787133ffc49ad364c28"}, - {file = "sspilib-0.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88a423fbca206ba0ca811dc995d8c3af045402b7d330f033e938b24f3a1d93fc"}, - {file = "sspilib-0.1.0-cp38-cp38-win32.whl", hash = "sha256:86bd936b1ef0aa63c6d9623ad08473e74ceb15f342f6e92cbade15ed9574cd33"}, - {file = "sspilib-0.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:d4f688b94f0a64128444063e1d3d59152614175999222f6e2920681faea833f4"}, - {file = "sspilib-0.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2acef24e13e40d9dd8697eaae84ead9f417528ff741d087ec4eb4260518f4dc7"}, - {file = "sspilib-0.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4b625802d80144d856d5eb6e8f4412f186565758da4493c7ad1b88e3d6d353de"}, - {file = "sspilib-0.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c06ca1e34702bca1c750dcb5133b716f316b38dccb28d55a1a44d9842bc3f391"}, - {file = "sspilib-0.1.0-cp39-cp39-win32.whl", hash = "sha256:68496c9bd52b57a1b6d2e5529b43c30060249b8db901127b8343c4ad8cd93670"}, - {file = "sspilib-0.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:369727097f07a440099882580e284e137d9c27b7de354d63b65e327a454e7bee"}, - {file = "sspilib-0.1.0-cp39-cp39-win_arm64.whl", hash = "sha256:87d8268c0517149c51a53b3888961ebf66826bb3dbb82c4e5cf10108f5456104"}, - {file = "sspilib-0.1.0.tar.gz", hash = "sha256:58b5291553cf6220549c0f855e0e6973f4977375d8236ce47bb581efb3e9b1cf"}, -] - [[package]] name = "starlette" version = "0.37.2" @@ -3296,10 +3152,9 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [extras] backend = ["alembic", "asyncpg", "fastapi", "uvicorn"] -kerberos = ["requests-kerberos"] worker = ["onetl", "psycopg2-binary"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "7f3cb7e67bade09a3d4be6c42496eeab07574e334b6cab42e6ddef43c9f5018d" +content-hash = "0a823a381af17d26a97485fd57425bdd141dcace9a047be6f1e77e77c2ae5408" diff --git a/pyproject.toml b/pyproject.toml index 5aec0186..1d2eba21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,6 @@ python-jose = {extras = ["cryptography"], version = "^3.3.0"} python-multipart = "^0.0.9" celery = "^5.3.3" onetl = {version = "^0.10.2", extras = ["spark"]} -requests-kerberos = {version = "^0.14.0", optional = true} psycopg2-binary = {version = "^2.9.7", optional = true } fastapi = {version = "^0.110.0", optional = true} uvicorn = {version = "^0.29.0", optional = true } @@ -75,10 +74,6 @@ worker = [ "psycopg2-binary", ] -kerberos = [ - "requests-kerberos", -] - [tool.poetry.group.test.dependencies] pandas-stubs = "^2.1.4.231227" pytest = "^8.1.1" From c4366f9619b1a4bae466f55cce838aeecbe61580 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 21:15:09 +0000 Subject: [PATCH 06/18] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/psf/black-pre-commit-mirror: 24.3.0 → 24.4.0](https://github.com/psf/black-pre-commit-mirror/compare/24.3.0...24.4.0) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 321fdb1b..ae14adc0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -73,7 +73,7 @@ repos: - --ignore-init-module-imports - repo: https://github.com/psf/black-pre-commit-mirror - rev: 24.3.0 + rev: 24.4.0 hooks: - id: black language_version: python3.11 From 3932c3c676aa95e7ca481e2f9d7594b6cfd6efc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Fri, 19 Apr 2024 21:12:50 +0300 Subject: [PATCH 07/18] [DOP-14025] Run HDFS tests in CI --- .github/workflows/hdfs-tests.yml | 14 ++++++++++++++ .github/workflows/hive-tests.yml | 14 ++++++++++++++ .github/workflows/oracle-tests.yml | 14 ++++++++++++++ .github/workflows/s3-tests.yml | 14 ++++++++++++++ .github/workflows/tests.yml | 4 ++++ 5 files changed, 60 insertions(+) diff --git a/.github/workflows/hdfs-tests.yml b/.github/workflows/hdfs-tests.yml index 921dc739..d34c078e 100644 --- a/.github/workflows/hdfs-tests.yml +++ b/.github/workflows/hdfs-tests.yml @@ -66,6 +66,13 @@ jobs: run: | docker compose -f ./docker-compose.test.yml exec -T worker coverage run -m pytest -vvv -s -m "worker and hdfs" + - name: Dump worker logs on failure + if: failure() + uses: jwalton/gh-docker-logs@v2 + with: + images: mtsrus/syncmaster-worker + dest: ./logs + - name: Shutdown if: always() run: | @@ -76,3 +83,10 @@ jobs: with: name: hdfs-tests path: reports/* + + - name: Upload worker logs + uses: actions/upload-artifact@v4 + if: failure() + with: + name: hdfs-worker-logs + path: logs/* diff --git a/.github/workflows/hive-tests.yml b/.github/workflows/hive-tests.yml index 346ab9ae..b2ca2305 100644 --- a/.github/workflows/hive-tests.yml +++ b/.github/workflows/hive-tests.yml @@ -66,6 +66,13 @@ jobs: run: | docker compose -f ./docker-compose.test.yml exec -T worker coverage run -m pytest -vvv -s -m "worker and hive" + - name: Dump worker logs on failure + if: failure() + uses: jwalton/gh-docker-logs@v2 + with: + images: mtsrus/syncmaster-worker + dest: ./logs + - name: Shutdown if: always() run: | @@ -76,3 +83,10 @@ jobs: with: name: hive-tests path: reports/* + + - name: Upload worker logs + uses: actions/upload-artifact@v4 + if: failure() + with: + name: hive-worker-logs + path: logs/* diff --git a/.github/workflows/oracle-tests.yml b/.github/workflows/oracle-tests.yml index 1b2a348b..73890528 100644 --- a/.github/workflows/oracle-tests.yml +++ b/.github/workflows/oracle-tests.yml @@ -66,6 +66,13 @@ jobs: run: | docker compose -f ./docker-compose.test.yml exec -T worker coverage run -m pytest -vvv -s -m "worker and oracle" + - name: Dump worker logs on failure + if: failure() + uses: jwalton/gh-docker-logs@v2 + with: + images: mtsrus/syncmaster-worker + dest: ./logs + - name: Shutdown if: always() run: | @@ -76,3 +83,10 @@ jobs: with: name: oracle-tests path: reports/* + + - name: Upload worker logs + uses: actions/upload-artifact@v4 + if: failure() + with: + name: oracle-worker-logs + path: logs/* diff --git a/.github/workflows/s3-tests.yml b/.github/workflows/s3-tests.yml index 46a0fd42..88341f3a 100644 --- a/.github/workflows/s3-tests.yml +++ b/.github/workflows/s3-tests.yml @@ -65,6 +65,13 @@ jobs: run: | docker compose -f ./docker-compose.test.yml exec -T worker coverage run -m pytest -vvv -s -m "worker and s3" + - name: Dump worker logs on failure + if: failure() + uses: jwalton/gh-docker-logs@v2 + with: + images: mtsrus/syncmaster-worker + dest: ./logs + # This is important, as coverage is exported after receiving SIGTERM - name: Shutdown if: always() @@ -76,3 +83,10 @@ jobs: with: name: s3-tests path: reports/* + + - name: Upload worker logs + uses: actions/upload-artifact@v4 + if: failure() + with: + name: s3-worker-logs + path: logs/* diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5d1392dc..f2ca6691 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -20,6 +20,10 @@ jobs: name: Oracle tests uses: ./.github/workflows/oracle-tests.yml + hdfs_tests: + name: HDFS tests + uses: ./.github/workflows/hdfs-tests.yml + hive_tests: name: Hive tests uses: ./.github/workflows/hive-tests.yml From 1395655c731097bd82df26753a1609398f578ffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Fri, 19 Apr 2024 14:45:55 +0300 Subject: [PATCH 08/18] [DOP-14025] Remove pathlib.PurePosixPath from API schemas --- docs/changelog/next_release/41.bugfix.rst | 1 + syncmaster/backend/api/v1/router.py | 2 +- .../v1/{transfers/router.py => transfers.py} | 7 -- .../backend/api/v1/transfers/__init__.py | 2 - syncmaster/backend/api/v1/transfers/utils.py | 17 ---- syncmaster/schemas/v1/transfers/file/base.py | 24 +++--- .../test_transfers/test_create_transfer.py | 4 +- .../test_create_transfer.py | 78 +++++++++++++++++++ 8 files changed, 97 insertions(+), 38 deletions(-) create mode 100644 docs/changelog/next_release/41.bugfix.rst rename syncmaster/backend/api/v1/{transfers/router.py => transfers.py} (98%) delete mode 100644 syncmaster/backend/api/v1/transfers/__init__.py delete mode 100644 syncmaster/backend/api/v1/transfers/utils.py diff --git a/docs/changelog/next_release/41.bugfix.rst b/docs/changelog/next_release/41.bugfix.rst new file mode 100644 index 00000000..5e4c696e --- /dev/null +++ b/docs/changelog/next_release/41.bugfix.rst @@ -0,0 +1 @@ +Fix 500 error while creating HDFS connection. diff --git a/syncmaster/backend/api/v1/router.py b/syncmaster/backend/api/v1/router.py index 28da8cc1..c4cbe0d3 100644 --- a/syncmaster/backend/api/v1/router.py +++ b/syncmaster/backend/api/v1/router.py @@ -6,7 +6,7 @@ from syncmaster.backend.api.v1.connections import router as connection_router from syncmaster.backend.api.v1.groups import router as group_router from syncmaster.backend.api.v1.queue import router as queue_router -from syncmaster.backend.api.v1.transfers.router import router as transfer_router +from syncmaster.backend.api.v1.transfers import router as transfer_router from syncmaster.backend.api.v1.users import router as user_router router = APIRouter(prefix="/v1") diff --git a/syncmaster/backend/api/v1/transfers/router.py b/syncmaster/backend/api/v1/transfers.py similarity index 98% rename from syncmaster/backend/api/v1/transfers/router.py rename to syncmaster/backend/api/v1/transfers.py index 4c795d07..3aca7302 100644 --- a/syncmaster/backend/api/v1/transfers/router.py +++ b/syncmaster/backend/api/v1/transfers.py @@ -6,9 +6,6 @@ from kombu.exceptions import KombuError from syncmaster.backend.api.deps import UnitOfWorkMarker -from syncmaster.backend.api.v1.transfers.utils import ( - process_file_transfer_directory_path, -) from syncmaster.backend.services import UnitOfWork, get_user from syncmaster.db.models import Status, User from syncmaster.db.utils import Permission @@ -115,8 +112,6 @@ async def create_transfer( if transfer_data.group_id != queue.group_id: raise DifferentTransferAndQueueGroupError - transfer_data = process_file_transfer_directory_path(transfer_data) # type: ignore - async with unit_of_work: transfer = await unit_of_work.transfer.create( group_id=transfer_data.group_id, @@ -316,8 +311,6 @@ async def update_transfer( params_type=transfer_data.source_params.type, ) - transfer_data = process_file_transfer_directory_path(transfer_data) # type: ignore - async with unit_of_work: transfer = await unit_of_work.transfer.update( transfer=transfer, diff --git a/syncmaster/backend/api/v1/transfers/__init__.py b/syncmaster/backend/api/v1/transfers/__init__.py deleted file mode 100644 index 104aecaf..00000000 --- a/syncmaster/backend/api/v1/transfers/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) -# SPDX-License-Identifier: Apache-2.0 diff --git a/syncmaster/backend/api/v1/transfers/utils.py b/syncmaster/backend/api/v1/transfers/utils.py deleted file mode 100644 index 22b8232a..00000000 --- a/syncmaster/backend/api/v1/transfers/utils.py +++ /dev/null @@ -1,17 +0,0 @@ -# SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) -# SPDX-License-Identifier: Apache-2.0 -from syncmaster.schemas.v1.transfers import CreateTransferSchema, UpdateTransferSchema - - -def process_file_transfer_directory_path( - transfer_data: UpdateTransferSchema | CreateTransferSchema, -) -> UpdateTransferSchema | CreateTransferSchema: - if transfer_data.source_params is not None: - if hasattr(transfer_data.source_params, "directory_path"): # s3 or hdfs connection - transfer_data.source_params.directory_path = str(transfer_data.source_params.directory_path) - - if transfer_data.target_params is not None: - if hasattr(transfer_data.source_params, "directory_path"): # s3 or hdfs connection - transfer_data.target_params.directory_path = str(transfer_data.target_params.directory_path) # type: ignore - - return transfer_data diff --git a/syncmaster/schemas/v1/transfers/file/base.py b/syncmaster/schemas/v1/transfers/file/base.py index d01041c3..f03217e6 100644 --- a/syncmaster/schemas/v1/transfers/file/base.py +++ b/syncmaster/schemas/v1/transfers/file/base.py @@ -4,15 +4,11 @@ from pathlib import PurePosixPath -from pydantic import BaseModel, Field, validator +from pydantic import BaseModel, Field, field_validator from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, JSONLine -def validate_directory_path(path: str) -> PurePosixPath: - return PurePosixPath(path) - - # At the moment the ReadTransferSourceParams and ReadTransferTargetParams # classes are identical but may change in the future class ReadFileTransferSource(BaseModel): @@ -28,20 +24,30 @@ class ReadFileTransferTarget(BaseModel): # At the moment the CreateTransferSourceParams and CreateTransferTargetParams # classes are identical but may change in the future class CreateFileTransferSource(BaseModel): - directory_path: PurePosixPath + directory_path: str file_format: CSV | JSONLine | JSON = Field(..., discriminator="type") class Config: arbitrary_types_allowed = True - _validate_dir_path = validator("directory_path", allow_reuse=True, pre=True)(validate_directory_path) + @field_validator("directory_path", mode="before") + @classmethod + def _directory_path_is_valid_path(cls, value): + if not PurePosixPath(value).is_absolute(): + raise ValueError("Directory path must be absolute") + return value class CreateFileTransferTarget(BaseModel): - directory_path: PurePosixPath + directory_path: str file_format: CSV | JSONLine = Field(..., discriminator="type") # JSON FORMAT IS NOT SUPPORTED AS A TARGET ! class Config: arbitrary_types_allowed = True - _validate_dir_path = validator("directory_path", allow_reuse=True, pre=True)(validate_directory_path) + @field_validator("directory_path", mode="before") + @classmethod + def _directory_path_is_valid_path(cls, value): + if not PurePosixPath(value).is_absolute(): + raise ValueError("Directory path must be absolute") + return value diff --git a/tests/test_unit/test_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_create_transfer.py index f3af13df..866ed7f0 100644 --- a/tests/test_unit/test_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_create_transfer.py @@ -521,7 +521,7 @@ async def test_developer_plus_cannot_create_transfer_with_other_group_queue( } -async def test_developer_plus_can_not_create_transfer_with_target_s3_json( +async def test_developer_plus_can_not_create_transfer_with_target_format_json( client: AsyncClient, two_group_connections: tuple[MockConnection, MockConnection], session: AsyncSession, @@ -548,7 +548,7 @@ async def test_developer_plus_can_not_create_transfer_with_target_s3_json( "source_params": {"type": "postgres", "table_name": "source_table"}, "target_params": { "type": "s3", - "directory_path": "some/dir", + "directory_path": "/some/dir", "df_schema": {}, "options": {}, "file_format": { diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py index f8762140..27277f60 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py @@ -173,3 +173,81 @@ async def test_developer_plus_can_create_hdfs_transfer( "strategy_params": transfer.strategy_params, "queue_id": transfer.queue_id, } + + +@pytest.mark.parametrize( + "create_connection_data", + [ + { + "type": "s3", + "host": "localhost", + "port": 443, + }, + ], + indirect=True, +) +@pytest.mark.parametrize( + "target_source_params", + [ + { + "type": "s3", + "directory_path": "some/path", + "file_format": { + "type": "csv", + }, + }, + ], +) +async def test_cannot_create_file_transfer_with_relative_path( + client: AsyncClient, + two_group_connections: tuple[MockConnection, MockConnection], + group_queue: Queue, + mock_group: MockGroup, + target_source_params: dict, + create_connection_data: dict, +): + # Arrange + first_connection, second_connection = two_group_connections + user = mock_group.get_member_of_role(UserTestRoles.Developer) + + # Act + result = await client.post( + "v1/transfers", + headers={"Authorization": f"Bearer {user.token}"}, + json={ + "group_id": mock_group.group.id, + "name": "new test transfer", + "description": "", + "is_scheduled": False, + "schedule": "", + "source_connection_id": first_connection.id, + "target_connection_id": second_connection.id, + "source_params": target_source_params, + "target_params": target_source_params, + "strategy_params": {"type": "full"}, + "queue_id": group_queue.id, + }, + ) + + # Assert + assert result.status_code == 422 + assert result.json() == { + "detail": [ + { + "ctx": {"error": {}}, + "input": "some/path", + "loc": ["body", "source_params", "s3", "directory_path"], + "msg": "Value error, Directory path must be absolute", + "type": "value_error", + "url": "https://errors.pydantic.dev/2.7/v/value_error", + }, + { + "ctx": {"error": {}}, + "input": "some/path", + "loc": ["body", "target_params", "s3", "directory_path"], + "msg": "Value error, Directory path must be absolute", + "type": "value_error", + "url": "https://errors.pydantic.dev/2.7/v/value_error", + }, + ], + } From 2a5d95ff24b6325a55461e03a349f30bcc201207 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Fri, 19 Apr 2024 14:33:14 +0300 Subject: [PATCH 09/18] [DOP-14025] Remove asyncio.gather from SQLAlchemy requests --- docs/changelog/next_release/40.bugfix.rst | 1 + syncmaster/backend/api/v1/connections.py | 68 ++++++++----------- syncmaster/backend/api/v1/transfers.py | 57 +++++++--------- .../db/repositories/credentials_repository.py | 27 ++++---- 4 files changed, 64 insertions(+), 89 deletions(-) create mode 100644 docs/changelog/next_release/40.bugfix.rst diff --git a/docs/changelog/next_release/40.bugfix.rst b/docs/changelog/next_release/40.bugfix.rst new file mode 100644 index 00000000..f62ecd7c --- /dev/null +++ b/docs/changelog/next_release/40.bugfix.rst @@ -0,0 +1 @@ +Do not use ``asyncio.gather`` with SQLAlchemy requests. diff --git a/syncmaster/backend/api/v1/connections.py b/syncmaster/backend/api/v1/connections.py index acfe9a2a..b6965e70 100644 --- a/syncmaster/backend/api/v1/connections.py +++ b/syncmaster/backend/api/v1/connections.py @@ -1,6 +1,5 @@ # SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) # SPDX-License-Identifier: Apache-2.0 -import asyncio from typing import get_args from fastapi import APIRouter, Depends, Query, status @@ -59,19 +58,17 @@ async def read_connections( items: list[ReadConnectionSchema] = [] if pagination.items: - creds = await asyncio.gather( - *[unit_of_work.credentials.get_for_connection(connection_id=item.id) for item in pagination.items] - ) + credentials = await unit_of_work.credentials.read_bulk([item.id for item in pagination.items]) items = [ ReadConnectionSchema( id=item.id, group_id=item.group_id, name=item.name, description=item.description, - auth_data=creds[n_item], + auth_data=credentials.get(item.id, None), data=item.data, ) - for n_item, item in enumerate(pagination.items) + for item in pagination.items ] return ConnectionPageSchema( @@ -121,7 +118,7 @@ async def create_connection( data=data, ) - await unit_of_work.credentials.add_to_connection( + await unit_of_work.credentials.create( connection_id=connection.id, data=auth_data, ) @@ -155,12 +152,9 @@ async def read_connection( if resource_role == Permission.NONE: raise ConnectionNotFoundError - connection = await unit_of_work.connection.read_by_id(connection_id=connection_id) - + connection = await unit_of_work.connection.read_by_id(connection_id) try: - credentials = await unit_of_work.credentials.get_for_connection( - connection_id=connection.id, - ) + credentials = await unit_of_work.credentials.read(connection.id) except AuthDataNotFoundError: credentials = None @@ -206,7 +200,7 @@ async def update_connection( credential_data=connection_data.auth_data.dict(), ) - auth_data = await unit_of_work.credentials.get_for_connection(connection_id) + auth_data = await unit_of_work.credentials.read(connection_id) return ReadConnectionSchema( id=connection.id, group_id=connection.group_id, @@ -227,28 +221,26 @@ async def delete_connection( user=current_user, resource_id=connection_id, ) - if resource_role == Permission.NONE: raise ConnectionNotFoundError if resource_role < Permission.DELETE: raise ActionNotAllowedError - connection = await unit_of_work.connection.read_by_id(connection_id=connection_id) + connection = await unit_of_work.connection.read_by_id(connection_id) + transfers = await unit_of_work.transfer.list_by_connection_id(connection.id) + if transfers: + raise ConnectionDeleteError( + f"The connection has an associated transfers. Number of the connected transfers: {len(transfers)}", + ) - transfers = await unit_of_work.transfer.list_by_connection_id(conn_id=connection.id) async with unit_of_work: - if not transfers: - await unit_of_work.connection.delete(connection_id=connection_id) + await unit_of_work.connection.delete(connection_id) - return StatusResponseSchema( - ok=True, - status_code=status.HTTP_200_OK, - message="Connection was deleted", - ) - - raise ConnectionDeleteError( - f"The connection has an associated transfers. Number of the connected transfers: {len(transfers)}", + return StatusResponseSchema( + ok=True, + status_code=status.HTTP_200_OK, + message="Connection was deleted", ) @@ -259,24 +251,20 @@ async def copy_connection( current_user: User = Depends(get_user(is_active=True)), unit_of_work: UnitOfWork = Depends(UnitOfWorkMarker), ) -> StatusResponseSchema: - target_source_rules = await asyncio.gather( - unit_of_work.connection.get_resource_permission( - user=current_user, - resource_id=connection_id, - ), - unit_of_work.connection.get_group_permission( - user=current_user, - group_id=copy_connection_data.new_group_id, - ), + resource_role = await unit_of_work.connection.get_resource_permission( + user=current_user, + resource_id=connection_id, ) - resource_role, target_group_role = target_source_rules + if resource_role == Permission.NONE: + raise ConnectionNotFoundError if copy_connection_data.remove_source and resource_role < Permission.DELETE: raise ActionNotAllowedError - if resource_role == Permission.NONE: - raise ConnectionNotFoundError - + target_group_role = await unit_of_work.connection.get_group_permission( + user=current_user, + group_id=copy_connection_data.new_group_id, + ) if target_group_role == Permission.NONE: raise GroupNotFoundError @@ -291,7 +279,7 @@ async def copy_connection( ) if copy_connection_data.remove_source: - await unit_of_work.connection.delete(connection_id=connection_id) + await unit_of_work.connection.delete(connection_id) return StatusResponseSchema( ok=True, diff --git a/syncmaster/backend/api/v1/transfers.py b/syncmaster/backend/api/v1/transfers.py index 3aca7302..56fda049 100644 --- a/syncmaster/backend/api/v1/transfers.py +++ b/syncmaster/backend/api/v1/transfers.py @@ -1,6 +1,5 @@ # SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) # SPDX-License-Identifier: Apache-2.0 -import asyncio from fastapi import APIRouter, Depends, Query, status from kombu.exceptions import KombuError @@ -76,16 +75,11 @@ async def create_transfer( user=current_user, group_id=transfer_data.group_id, ) - if group_permission < Permission.WRITE: raise ActionNotAllowedError - target_connection = await unit_of_work.connection.read_by_id( - connection_id=transfer_data.target_connection_id, - ) - source_connection = await unit_of_work.connection.read_by_id( - connection_id=transfer_data.source_connection_id, - ) + target_connection = await unit_of_work.connection.read_by_id(transfer_data.target_connection_id) + source_connection = await unit_of_work.connection.read_by_id(transfer_data.source_connection_id) queue = await unit_of_work.queue.read_by_id(transfer_data.queue_id) if ( @@ -153,44 +147,39 @@ async def copy_transfer( current_user: User = Depends(get_user(is_active=True)), unit_of_work: UnitOfWork = Depends(UnitOfWorkMarker), ) -> StatusCopyTransferResponseSchema: - # Check: user can copy transfer - target_source_transfer_rules = await asyncio.gather( - unit_of_work.transfer.get_resource_permission( - user=current_user, - resource_id=transfer_id, - ), - unit_of_work.transfer.get_group_permission( - user=current_user, - group_id=transfer_data.new_group_id, - ), + resource_role = await unit_of_work.transfer.get_resource_permission( + user=current_user, + resource_id=transfer_id, ) - resource_role, target_group_role = target_source_transfer_rules - if resource_role == Permission.NONE: raise TransferNotFoundError - if target_group_role < Permission.WRITE: - raise ActionNotAllowedError - # Check: user can delete transfer if transfer_data.remove_source and resource_role < Permission.DELETE: raise ActionNotAllowedError + target_group_role = await unit_of_work.transfer.get_group_permission( + user=current_user, + group_id=transfer_data.new_group_id, + ) + if target_group_role < Permission.WRITE: + raise ActionNotAllowedError + transfer = await unit_of_work.transfer.read_by_id(transfer_id=transfer_id) + # Check: user can copy connection - target_source_connection_rules = await asyncio.gather( - unit_of_work.connection.get_resource_permission( - user=current_user, - resource_id=transfer.source_connection_id, - ), - unit_of_work.connection.get_resource_permission( - user=current_user, - resource_id=transfer.target_connection_id, - ), + source_connection_role = await unit_of_work.connection.get_resource_permission( + user=current_user, + resource_id=transfer.source_connection_id, ) - source_connection_role, target_connection_role = target_source_connection_rules + if source_connection_role == Permission.NONE: + raise ConnectionNotFoundError - if source_connection_role == Permission.NONE or target_connection_role == Permission.NONE: + target_connection_role = await unit_of_work.connection.get_resource_permission( + user=current_user, + resource_id=transfer.target_connection_id, + ) + if target_connection_role == Permission.NONE: raise ConnectionNotFoundError # Check: new queue exists diff --git a/syncmaster/db/repositories/credentials_repository.py b/syncmaster/db/repositories/credentials_repository.py index b16e94d5..e96b651a 100644 --- a/syncmaster/db/repositories/credentials_repository.py +++ b/syncmaster/db/repositories/credentials_repository.py @@ -4,7 +4,7 @@ from typing import NoReturn -from sqlalchemy import ScalarResult, delete, insert, select +from sqlalchemy import ScalarResult, insert, select from sqlalchemy.exc import DBAPIError, IntegrityError, NoResultFound from sqlalchemy.ext.asyncio import AsyncSession @@ -26,7 +26,7 @@ def __init__( super().__init__(model=model, session=session) self._settings = settings - async def get_for_connection( + async def read( self, connection_id: int, ) -> dict: @@ -37,7 +37,15 @@ async def get_for_connection( except NoResultFound as e: raise AuthDataNotFoundError(f"Connection id = {connection_id}") from e - async def add_to_connection(self, connection_id: int, data: dict) -> AuthData: + async def read_bulk( + self, + connection_ids: list[int], + ) -> dict[int, dict]: + query = select(AuthData).where(AuthData.connection_id.in_(connection_ids)) + result: ScalarResult[AuthData] = await self._session.scalars(query) + return {item.connection_id: decrypt_auth_data(item.value, settings=self._settings) for item in result} + + async def create(self, connection_id: int, data: dict) -> AuthData: query = ( insert(AuthData) .values( @@ -54,23 +62,12 @@ async def add_to_connection(self, connection_id: int, data: dict) -> AuthData: await self._session.flush() return result.one() - async def delete_from_connection(self, connection_id: int) -> AuthData: - query = delete(AuthData).where(AuthData.connection_id == connection_id).returning(AuthData) - - try: - result: ScalarResult[AuthData] = await self._session.scalars(query) - except IntegrityError as e: - self._raise_error(e) - else: - await self._session.flush() - return result.one() - async def update( self, connection_id: int, credential_data: dict, ) -> AuthData: - creds = await self.get_for_connection(connection_id) + creds = await self.read(connection_id) try: for key in creds: if key not in credential_data or credential_data[key] is None: From b229b92c7becc34dfaea0c5fde2c5d6f4b13793e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Fri, 19 Apr 2024 14:11:03 +0300 Subject: [PATCH 10/18] [DOP-14025] Fix patching connection with new auth_data.password value --- docs/changelog/next_release/39.bugfix.rst | 1 + syncmaster/backend/api/v1/connections.py | 31 ++++++---------- syncmaster/db/repositories/connection.py | 8 ++-- .../db/repositories/credentials_repository.py | 8 ++-- syncmaster/db/repositories/utils.py | 24 +++++++++--- .../test_update_connection.py | 37 ++++++++++++++++++- 6 files changed, 74 insertions(+), 35 deletions(-) create mode 100644 docs/changelog/next_release/39.bugfix.rst diff --git a/docs/changelog/next_release/39.bugfix.rst b/docs/changelog/next_release/39.bugfix.rst new file mode 100644 index 00000000..2f621f6f --- /dev/null +++ b/docs/changelog/next_release/39.bugfix.rst @@ -0,0 +1 @@ +Fix 500 error in case of ``PATCH v1/connections/:id`` request with passed ``auth_data.password`` field value diff --git a/syncmaster/backend/api/v1/connections.py b/syncmaster/backend/api/v1/connections.py index b6965e70..6f40651e 100644 --- a/syncmaster/backend/api/v1/connections.py +++ b/syncmaster/backend/api/v1/connections.py @@ -3,7 +3,6 @@ from typing import get_args from fastapi import APIRouter, Depends, Query, status -from pydantic import SecretStr from syncmaster.backend.api.deps import UnitOfWorkMarker from syncmaster.backend.services import UnitOfWork, get_user @@ -103,33 +102,27 @@ async def create_connection( if group_permission < Permission.WRITE: raise ActionNotAllowedError - data = connection_data.data.dict() - auth_data = connection_data.auth_data.dict() - - # Trick to serialize SecretStr to JSON - for k, v in auth_data.items(): - if isinstance(v, SecretStr): - auth_data[k] = v.get_secret_value() async with unit_of_work: connection = await unit_of_work.connection.create( name=connection_data.name, description=connection_data.description, group_id=connection_data.group_id, - data=data, + data=connection_data.data.dict(), ) await unit_of_work.credentials.create( connection_id=connection.id, - data=auth_data, + data=connection_data.auth_data.dict(), ) + credentials = await unit_of_work.credentials.read(connection.id) return ReadConnectionSchema( id=connection.id, group_id=connection.group_id, name=connection.name, description=connection.description, data=connection.data, - auth_data=auth_data, + auth_data=credentials, ) @@ -171,7 +164,7 @@ async def read_connection( @router.patch("/connections/{connection_id}") async def update_connection( connection_id: int, - connection_data: UpdateConnectionSchema, + changes: UpdateConnectionSchema, current_user: User = Depends(get_user(is_active=True)), unit_of_work: UnitOfWork = Depends(UnitOfWorkMarker), ) -> ReadConnectionSchema: @@ -189,25 +182,25 @@ async def update_connection( async with unit_of_work: connection = await unit_of_work.connection.update( connection_id=connection_id, - name=connection_data.name, - description=connection_data.description, - connection_data=connection_data.data.dict(exclude={"auth_data"}) if connection_data.data else {}, + name=changes.name, + description=changes.description, + data=changes.data.dict(exclude={"auth_data"}) if changes.data else {}, ) - if connection_data.auth_data: + if changes.auth_data: await unit_of_work.credentials.update( connection_id=connection_id, - credential_data=connection_data.auth_data.dict(), + data=changes.auth_data.dict(), ) - auth_data = await unit_of_work.credentials.read(connection_id) + credentials = await unit_of_work.credentials.read(connection_id) return ReadConnectionSchema( id=connection.id, group_id=connection.group_id, name=connection.name, description=connection.description, data=connection.data, - auth_data=auth_data, + auth_data=credentials, ) diff --git a/syncmaster/db/repositories/connection.py b/syncmaster/db/repositories/connection.py index 301a3180..d6b03ac0 100644 --- a/syncmaster/db/repositories/connection.py +++ b/syncmaster/db/repositories/connection.py @@ -81,19 +81,19 @@ async def update( connection_id: int, name: str | None, description: str | None, - connection_data: dict[str, Any], + data: dict[str, Any], ) -> Connection: try: connection = await self.read_by_id(connection_id=connection_id) for key in connection.data: - if key not in connection_data or connection_data[key] is None: - connection_data[key] = connection.data[key] + data[key] = data.get(key, None) or connection.data[key] + return await self._update( Connection.id == connection_id, Connection.is_deleted.is_(False), name=name or connection.name, description=description or connection.description, - data=connection_data, + data=data, ) except IntegrityError as e: self._raise_error(e) diff --git a/syncmaster/db/repositories/credentials_repository.py b/syncmaster/db/repositories/credentials_repository.py index e96b651a..03e5d298 100644 --- a/syncmaster/db/repositories/credentials_repository.py +++ b/syncmaster/db/repositories/credentials_repository.py @@ -65,17 +65,15 @@ async def create(self, connection_id: int, data: dict) -> AuthData: async def update( self, connection_id: int, - credential_data: dict, + data: dict, ) -> AuthData: creds = await self.read(connection_id) try: for key in creds: - if key not in credential_data or credential_data[key] is None: - credential_data[key] = creds[key] - + data[key] = data.get(key, None) or creds[key] return await self._update( AuthData.connection_id == connection_id, - value=encrypt_auth_data(value=credential_data, settings=self._settings), + value=encrypt_auth_data(value=data, settings=self._settings), ) except IntegrityError as e: self._raise_error(e) diff --git a/syncmaster/db/repositories/utils.py b/syncmaster/db/repositories/utils.py index c039acfa..2b2acdf3 100644 --- a/syncmaster/db/repositories/utils.py +++ b/syncmaster/db/repositories/utils.py @@ -3,6 +3,7 @@ import json from cryptography.fernet import Fernet +from pydantic import SecretStr from syncmaster.config import Settings @@ -11,15 +12,26 @@ def decrypt_auth_data( value: str, settings: Settings, ) -> dict: - f = Fernet(settings.CRYPTO_KEY) - return json.loads(f.decrypt(value)) + decryptor = Fernet(settings.CRYPTO_KEY) + decrypted = decryptor.decrypt(value) + return json.loads(decrypted) + + +def _json_default(value): + if isinstance(value, SecretStr): + return value.get_secret_value() def encrypt_auth_data( value: dict, settings: Settings, ) -> str: - key = str.encode(settings.CRYPTO_KEY) - f = Fernet(key) - token = f.encrypt(str.encode(json.dumps(value))) - return token.decode(encoding="utf-8") + encryptor = Fernet(settings.CRYPTO_KEY) + serialized = json.dumps( + value, + ensure_ascii=False, + sort_keys=True, + default=_json_default, + ) + encrypted = encryptor.encrypt(serialized.encode("utf-8")) + return encrypted.decode("utf-8") diff --git a/tests/test_unit/test_connections/test_update_connection.py b/tests/test_unit/test_connections/test_update_connection.py index 00bcc2f7..1135b62f 100644 --- a/tests/test_unit/test_connections/test_update_connection.py +++ b/tests/test_unit/test_connections/test_update_connection.py @@ -209,7 +209,42 @@ async def test_update_connection_data_fields( assert result.status_code == 200 -async def test_update_connection_auth_data_fields( +async def test_update_connection_auth_data_all_felds( + client: AsyncClient, + group_connection: MockConnection, + role_developer_plus: UserTestRoles, +): + # Arrange + user = group_connection.owner_group.get_member_of_role(role_developer_plus) + # Act + result = await client.patch( + f"v1/connections/{group_connection.id}", + headers={"Authorization": f"Bearer {user.token}"}, + json={"auth_data": {"type": "postgres", "user": "new_user", "password": "new_password"}}, + ) + + # Assert + assert result.json() == { + "id": group_connection.id, + "name": group_connection.name, + "description": group_connection.description, + "group_id": group_connection.group_id, + "connection_data": { + "type": group_connection.data["type"], + "host": "127.0.0.1", + "port": group_connection.data["port"], + "additional_params": group_connection.data["additional_params"], + "database_name": group_connection.data["database_name"], + }, + "auth_data": { + "type": group_connection.credentials.value["type"], + "user": "new_user", + }, + } + assert result.status_code == 200 + + +async def test_update_connection_auth_data_partial( client: AsyncClient, group_connection: MockConnection, role_developer_plus: UserTestRoles, From a1d4dba684ddf33de9056b3a040520e1b73216f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Fri, 19 Apr 2024 16:06:46 +0000 Subject: [PATCH 11/18] [DOP-14025] Split up fixtures to integration tests --- .github/workflows/hdfs-tests.yml | 13 - .github/workflows/hive-tests.yml | 13 - .github/workflows/oracle-tests.yml | 13 - .github/workflows/s3-tests.yml | 13 - docker-compose.test.yml | 16 +- docker-compose.yml | 6 +- tests/conftest.py | 10 +- .../test_run_transfer/conftest.py | 762 +++++++----------- .../test_run_transfer/test_hdfs.py | 227 ++++++ .../test_hdfs_to_postgres.py | 137 ---- .../test_hive_to_postgres.py | 46 -- .../test_run_transfer/test_hve.py | 239 ++++++ .../test_run_transfer/test_oracle.py | 243 ++++++ .../test_oracle_to_postgres.py | 46 -- .../test_postgres_to_hdfs.py | 106 --- .../test_postgres_to_hive.py | 46 -- .../test_postgres_to_oracle.py | 46 -- .../test_run_transfer/test_postgres_to_s3.py | 106 --- .../test_read_mixed_column_naming.py | 132 --- .../test_run_transfer/test_s3.py | 226 ++++++ .../test_run_transfer/test_s3_to_postgres.py | 137 ---- tests/utils.py | 15 +- 22 files changed, 1247 insertions(+), 1351 deletions(-) create mode 100644 tests/test_integration/test_run_transfer/test_hdfs.py delete mode 100644 tests/test_integration/test_run_transfer/test_hdfs_to_postgres.py delete mode 100644 tests/test_integration/test_run_transfer/test_hive_to_postgres.py create mode 100644 tests/test_integration/test_run_transfer/test_hve.py create mode 100644 tests/test_integration/test_run_transfer/test_oracle.py delete mode 100644 tests/test_integration/test_run_transfer/test_oracle_to_postgres.py delete mode 100644 tests/test_integration/test_run_transfer/test_postgres_to_hdfs.py delete mode 100644 tests/test_integration/test_run_transfer/test_postgres_to_hive.py delete mode 100644 tests/test_integration/test_run_transfer/test_postgres_to_oracle.py delete mode 100644 tests/test_integration/test_run_transfer/test_postgres_to_s3.py delete mode 100644 tests/test_integration/test_run_transfer/test_read_mixed_column_naming.py create mode 100644 tests/test_integration/test_run_transfer/test_s3.py delete mode 100644 tests/test_integration/test_run_transfer/test_s3_to_postgres.py diff --git a/.github/workflows/hdfs-tests.yml b/.github/workflows/hdfs-tests.yml index d34c078e..e93e59f6 100644 --- a/.github/workflows/hdfs-tests.yml +++ b/.github/workflows/hdfs-tests.yml @@ -20,19 +20,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - large-packages: true - docker-images: true - swap-storage: true - - name: Cache jars uses: actions/cache@v4 with: diff --git a/.github/workflows/hive-tests.yml b/.github/workflows/hive-tests.yml index b2ca2305..cb21a1bf 100644 --- a/.github/workflows/hive-tests.yml +++ b/.github/workflows/hive-tests.yml @@ -20,19 +20,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - large-packages: true - docker-images: true - swap-storage: true - - name: Cache jars uses: actions/cache@v4 with: diff --git a/.github/workflows/oracle-tests.yml b/.github/workflows/oracle-tests.yml index 73890528..f2ca4379 100644 --- a/.github/workflows/oracle-tests.yml +++ b/.github/workflows/oracle-tests.yml @@ -20,19 +20,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - large-packages: true - docker-images: true - swap-storage: true - - name: Cache jars uses: actions/cache@v4 with: diff --git a/.github/workflows/s3-tests.yml b/.github/workflows/s3-tests.yml index 88341f3a..815f4efe 100644 --- a/.github/workflows/s3-tests.yml +++ b/.github/workflows/s3-tests.yml @@ -20,19 +20,6 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - large-packages: true - docker-images: true - swap-storage: true - - name: Cache jars uses: actions/cache@v4 with: diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 13d43ff1..d4608a0d 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -1,7 +1,7 @@ version: '3.8' services: db: - image: postgres:15 + image: postgres restart: unless-stopped env_file: .env.docker ports: @@ -26,6 +26,7 @@ services: command: --loglevel=info -Q test_queue env_file: .env.docker volumes: + - ./syncmaster:/app/syncmaster - ./cached_jars:/root/.ivy2 - ./reports:/app/reports - ./tests:/app/tests @@ -35,14 +36,6 @@ services: condition: service_healthy rabbitmq: condition: service_healthy - test-oracle: - condition: service_started # Oracle image does not have healthcheck - test-postgres: - condition: service_healthy - test-hive: - condition: service_healthy - test-s3: - condition: service_healthy backend: image: mtsrus/syncmaster-backend::${BACKEND_IMAGE_TAG:-test} @@ -56,6 +49,7 @@ services: ports: - 8000:8000 volumes: + - ./syncmaster:/app/syncmaster - ./cached_jars:/root/.ivy2 - ./reports:/app/reports - ./tests:/app/tests @@ -96,7 +90,7 @@ services: retries: 3 test-postgres: - image: postgres:15 + image: postgres restart: unless-stopped ports: - 5433:5432 @@ -121,7 +115,7 @@ services: APP_USER_PASSWORD: test_password metastore-hive: - image: postgres:15 + image: postgres restart: unless-stopped environment: POSTGRES_DB: metastore diff --git a/docker-compose.yml b/docker-compose.yml index b606bdc5..3e7262ec 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,7 @@ version: '3.8' services: db: - image: postgres:15 + image: postgres restart: unless-stopped env_file: .env.docker ports: @@ -16,7 +16,7 @@ services: retries: 3 worker: - image: syncmaster_worker + image: mtsrus/syncmaster-worker restart: unless-stopped build: dockerfile: docker/Dockerfile.worker @@ -25,7 +25,7 @@ services: env_file: .env.docker backend: - image: syncmaster_back + image: mtsrus/syncmaster-backend restart: unless-stopped build: dockerfile: docker/Dockerfile.backend diff --git a/tests/conftest.py b/tests/conftest.py index 31293944..ef82ef93 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,6 @@ import os import secrets from collections.abc import AsyncGenerator -from datetime import datetime from pathlib import Path import pytest @@ -105,15 +104,12 @@ async def session(sessionmaker: async_sessionmaker[AsyncSession]): @pytest_asyncio.fixture(scope="session") -async def client( - settings: Settings, - async_engine: AsyncEngine, -) -> AsyncGenerator: - logger.info("START CLIENT FIXTURE", datetime.now().isoformat()) +async def client(settings: Settings) -> AsyncGenerator: + logger.info("START CLIENT FIXTURE") app = application_factory(settings=settings) async with AsyncClient(app=app, base_url="http://testserver") as client: yield client - logger.info("END CLIENT FIXTURE", datetime.now().isoformat()) + logger.info("END CLIENT FIXTURE") @pytest_asyncio.fixture diff --git a/tests/test_integration/test_run_transfer/conftest.py b/tests/test_integration/test_run_transfer/conftest.py index 8d6bb74a..58f6fcf7 100644 --- a/tests/test_integration/test_run_transfer/conftest.py +++ b/tests/test_integration/test_run_transfer/conftest.py @@ -3,9 +3,7 @@ import os import secrets from collections import namedtuple -from itertools import permutations from pathlib import Path, PurePosixPath -from typing import Literal import pytest import pytest_asyncio @@ -22,10 +20,12 @@ StructType, TimestampType, ) +from pytest import FixtureRequest from sqlalchemy.ext.asyncio import AsyncSession from syncmaster.backend.api.v1.auth.utils import sign_jwt from syncmaster.config import Settings, TestSettings +from syncmaster.db.models import Group from syncmaster.dto.connections import ( HDFSConnectionDTO, HiveConnectionDTO, @@ -40,25 +40,12 @@ create_credentials, create_group, create_queue, - create_transfer, create_user, upload_files, ) logger = logging.getLogger(__name__) -df_schema = StructType( - [ - StructField("ID", IntegerType()), - StructField("PHONE_NUMBER", StringType()), - StructField("REGION", StringType()), - StructField("NUMBER", IntegerType()), - StructField("BIRTH_DATE", DateType()), - StructField("REGISTERED_AT", TimestampType()), - StructField("ACCOUNT_BALANCE", DoubleType()), - ], -) - @pytest.fixture(scope="session") def spark(settings: Settings) -> SparkSession: @@ -66,7 +53,7 @@ def spark(settings: Settings) -> SparkSession: def get_spark_session(connection_settings: Settings) -> SparkSession: - logger.info("START GET SPARK SESSION", datetime.datetime.now().isoformat()) + logger.info("START GET SPARK SESSION") maven_packages = [p for connection in (Postgres, Oracle) for p in connection.get_packages()] maven_s3_packages = [p for p in SparkS3.get_packages(spark_version="3.4.1")] maven_packages.extend(maven_s3_packages) @@ -105,7 +92,10 @@ def get_spark_session(connection_settings: Settings) -> SparkSession: return spark.getOrCreate() -@pytest.fixture +@pytest.fixture( + scope="session", + params=[pytest.param("hive", marks=[pytest.mark.hive])], +) def hive(test_settings: TestSettings) -> HiveConnectionDTO: return HiveConnectionDTO( type="hive", @@ -115,7 +105,10 @@ def hive(test_settings: TestSettings) -> HiveConnectionDTO: ) -@pytest.fixture +@pytest.fixture( + scope="session", + params=[pytest.param("hdfs", marks=[pytest.mark.hdfs])], +) def hdfs(test_settings: TestSettings) -> HDFSConnectionDTO: return HDFSConnectionDTO( type="hdfs", @@ -125,7 +118,10 @@ def hdfs(test_settings: TestSettings) -> HDFSConnectionDTO: ) -@pytest.fixture +@pytest.fixture( + scope="session", + params=[pytest.param("oracle", marks=[pytest.mark.oracle])], +) def oracle(test_settings: TestSettings) -> OracleConnectionDTO: return OracleConnectionDTO( type="oracle", @@ -139,7 +135,10 @@ def oracle(test_settings: TestSettings) -> OracleConnectionDTO: ) -@pytest.fixture +@pytest.fixture( + scope="session", + params=[pytest.param("postgres", marks=[pytest.mark.postgres])], +) def postgres(test_settings: TestSettings) -> PostgresConnectionDTO: return PostgresConnectionDTO( type="postgres", @@ -152,7 +151,10 @@ def postgres(test_settings: TestSettings) -> PostgresConnectionDTO: ) -@pytest.fixture(scope="session") +@pytest.fixture( + scope="session", + params=[pytest.param("s3", marks=[pytest.mark.s3])], +) def s3(test_settings: TestSettings) -> S3ConnectionDTO: return S3ConnectionDTO( type="s3", @@ -168,10 +170,20 @@ def s3(test_settings: TestSettings) -> S3ConnectionDTO: @pytest.fixture def init_df(spark: SparkSession) -> DataFrame: - logger.info("START INIT DF", datetime.datetime.now().isoformat()) - df = spark.createDataFrame(data, df_schema) - logger.info("END INIT DF", datetime.datetime.now().isoformat()) - + logger.info("START INIT DF") + df_schema = StructType( + [ + StructField("ID", IntegerType()), + StructField("PHONE_NUMBER", StringType()), + StructField("REGION", StringType()), + StructField("NUMBER", IntegerType()), + StructField("BIRTH_DATE", DateType()), + StructField("REGISTERED_AT", TimestampType()), + StructField("ACCOUNT_BALANCE", DoubleType()), + ], + ) + df = spark.createDataFrame(data, schema=df_schema) + logger.info("END INIT DF") return df @@ -179,10 +191,8 @@ def init_df(spark: SparkSession) -> DataFrame: def prepare_postgres( spark: SparkSession, postgres: PostgresConnectionDTO, - init_df: DataFrame, -) -> Postgres: - logger.info("START PREPARE POSTGRES", datetime.datetime.now().isoformat()) - postgres_connection = Postgres( +): + result = Postgres( host=postgres.host, port=postgres.port, user=postgres.user, @@ -190,21 +200,25 @@ def prepare_postgres( database=postgres.database_name, spark=spark, ).check() - postgres_connection.execute("DROP TABLE IF EXISTS public.source_table") - postgres_connection.execute("DROP TABLE IF EXISTS public.target_table") - db_writer = DBWriter( - connection=postgres_connection, - target="public.source_table", - options=Postgres.WriteOptions(if_exists="append"), - ) - db_writer.run(init_df) - logger.info("END PREPARE POSTGRES", datetime.datetime.now().isoformat()) - return postgres_connection + result.execute("DROP TABLE IF EXISTS public.source_table") + result.execute("DROP TABLE IF EXISTS public.target_table") + + def fill_with_data(df: DataFrame): + logger.info("START PREPARE POSTGRES") + db_writer = DBWriter( + connection=result, + target="public.source_table", + options=Postgres.WriteOptions(if_exists="append"), + ) + db_writer.run(df) + logger.info("END PREPARE POSTGRES") + yield result, fill_with_data + result.execute("DROP TABLE IF EXISTS public.source_table") + result.execute("DROP TABLE IF EXISTS public.target_table") -@pytest_asyncio.fixture( - scope="session", -) + +@pytest.fixture(scope="session") def s3_server(s3): S3Server = namedtuple("S3Server", ["host", "port", "bucket", "access_key", "secret_key", "protocol"]) @@ -218,9 +232,7 @@ def s3_server(s3): ) -@pytest_asyncio.fixture( - scope="session", -) +@pytest.fixture(scope="session") def s3_file_connection(s3_server): from onetl.connection import S3 @@ -239,7 +251,7 @@ def s3_file_connection(s3_server): return s3_connection -@pytest_asyncio.fixture(scope="session") +@pytest.fixture(scope="session") def s3_file_connection_with_path(request, s3_file_connection): connection = s3_file_connection source = PurePosixPath("/data") @@ -256,7 +268,7 @@ def finalizer(): return connection, source -@pytest_asyncio.fixture(scope="session") +@pytest.fixture(scope="session") def s3_file_df_connection_with_path(s3_file_connection_with_path, s3_file_df_connection): _, root = s3_file_connection_with_path return s3_file_df_connection, root @@ -269,9 +281,7 @@ def resource_path(): return path -@pytest.fixture( - scope="session", -) +@pytest.fixture(scope="session") def s3_file_df_connection(s3_file_connection, spark, s3_server): from onetl.connection import SparkS3 @@ -289,17 +299,16 @@ def s3_file_df_connection(s3_file_connection, spark, s3_server): ) -@pytest_asyncio.fixture(scope="session") +@pytest.fixture(scope="session") def prepare_s3(resource_path, s3_file_connection, s3_file_df_connection_with_path: tuple[SparkS3, PurePosixPath]): + logger.info("START PREPARE HDFS") connection, upload_to = s3_file_df_connection_with_path files = upload_files(resource_path, upload_to, s3_file_connection) - + logger.info("END PREPARE HDFS") return connection, upload_to, files -@pytest.fixture( - scope="session", -) +@pytest.fixture(scope="session") def hdfs_server(): HDFSServer = namedtuple("HDFSServer", ["host", "webhdfs_port", "ipc_port"]) return HDFSServer( @@ -309,9 +318,7 @@ def hdfs_server(): ) -@pytest.fixture( - scope="session", -) +@pytest.fixture(scope="session") def hdfs_file_df_connection(spark, hdfs_server): from onetl.connection import SparkHDFS @@ -323,9 +330,7 @@ def hdfs_file_df_connection(spark, hdfs_server): ) -@pytest.fixture( - scope="session", -) +@pytest.fixture(scope="session") def hdfs_file_connection(hdfs_server): from onetl.connection import HDFS @@ -363,10 +368,10 @@ def prepare_hdfs( hdfs_file_connection, resource_path, ): - logger.info("START PREPARE HDFS", datetime.datetime.now().isoformat()) + logger.info("START PREPARE HDFS") connection, upload_to = hdfs_file_df_connection_with_path files = upload_files(resource_path, upload_to, hdfs_file_connection) - logger.info("END PREPARE HDFS", datetime.datetime.now().isoformat()) + logger.info("END PREPARE HDFS") return connection, upload_to, files @@ -374,34 +379,37 @@ def prepare_hdfs( def prepare_hive( spark: SparkSession, hive: HiveConnectionDTO, - init_df: DataFrame, -) -> Hive: - logger.info("START PREPARE HIVE", datetime.datetime.now().isoformat()) - hive_connection = Hive( +): + result = Hive( cluster=hive.cluster, spark=spark, ).check() - hive_connection.execute("DROP TABLE IF EXISTS public.source_table") - hive_connection.execute("DROP TABLE IF EXISTS public.target_table") - hive_connection.execute("CREATE DATABASE IF NOT EXISTS public") - db_writer = DBWriter( - connection=hive_connection, - target="public.source_table", - ) - db_writer.run(init_df) - spark.catalog.refreshTable("public.source_table") - logger.info("END PREPARE HIVE", datetime.datetime.now().isoformat()) - return hive_connection + result.execute("DROP TABLE IF EXISTS default.source_table") + result.execute("DROP TABLE IF EXISTS default.target_table") + result.execute("CREATE DATABASE IF NOT EXISTS default") + + def fill_with_data(df: DataFrame): + logger.info("START PREPARE HIVE") + db_writer = DBWriter( + connection=result, + target="default.source_table", + ) + db_writer.run(df) + spark.catalog.refreshTable("default.source_table") + logger.info("END PREPARE HIVE") + + yield result, fill_with_data + + result.execute("DROP TABLE IF EXISTS default.source_table") + result.execute("DROP TABLE IF EXISTS default.target_table") @pytest.fixture def prepare_oracle( - init_df: DataFrame, oracle: OracleConnectionDTO, spark: SparkSession, -) -> Oracle: - logger.info("START PREPARE ORACLE", datetime.datetime.now().isoformat()) - oracle_connection = Oracle( +): + result = Oracle( host=oracle.host, port=oracle.port, user=oracle.user, @@ -411,101 +419,143 @@ def prepare_oracle( spark=spark, ).check() try: - oracle_connection.execute(f"DROP TABLE {oracle.user}.source_table") + result.execute(f"DROP TABLE {oracle.user}.source_table") except Exception: pass try: - oracle_connection.execute(f"DROP TABLE {oracle.user}.target_table") + result.execute(f"DROP TABLE {oracle.user}.target_table") except Exception: pass - db_writer = DBWriter( - connection=oracle_connection, - target=f"{oracle.user}.source_table", - options=Oracle.WriteOptions(if_exists="append"), - ) - db_writer.run(init_df) - logger.info("END PREPARE ORACLE", datetime.datetime.now().isoformat()) - return oracle_connection + def fill_with_data(df: DataFrame): + logger.info("START PREPARE ORACLE") + db_writer = DBWriter( + connection=result, + target=f"{oracle.user}.source_table", + options=Oracle.WriteOptions(if_exists="append"), + ) + db_writer.run(df) + logger.info("END PREPARE ORACLE") -@pytest_asyncio.fixture(params=["csv"]) -def choice_file_format(request): - file_format: Literal["csv", "jsonline"] = request.param - file_format_object = None - if file_format == "csv": - file_format_object = CSV( + yield result, fill_with_data + + try: + result.execute(f"DROP TABLE {oracle.user}.source_table") + except Exception: + pass + try: + result.execute(f"DROP TABLE {oracle.user}.target_table") + except Exception: + pass + + +@pytest.fixture(params=[("csv", {}), ("jsonline", {}), ("json", {})]) +def source_file_format(request: FixtureRequest): + name, params = request.param + if name == "csv": + return "csv", CSV( lineSep="\n", header=True, + **params, ) - if file_format == "jsonline": - file_format_object = JSONLine( + + if name == "jsonline": + return "jsonline", JSONLine( encoding="utf-8", lineSep="\n", + **params, ) - if file_format == "json": - file_format_object = JSON( + + if name == "json": + return "json", JSON( lineSep="\n", encoding="utf-8", + **params, ) - return file_format, file_format_object + raise ValueError(f"Unsupported file format: {name}") -@pytest_asyncio.fixture(params=[""]) -def choice_file_type(request): - return request.param + +@pytest.fixture(params=[("csv", {}), ("jsonline", {})]) +def target_file_format(request: FixtureRequest): + name, params = request.param + if name == "csv": + return "csv", CSV( + lineSep="\n", + header=True, + timestampFormat="yyyy-MM-dd'T'HH:mm:ss.SSSSSS+00:00", + **params, + ) + + if name == "jsonline": + return "jsonline", JSONLine( + encoding="utf-8", + lineSep="\n", + timestampFormat="yyyy-MM-dd'T'HH:mm:ss.SSSSSS+00:00", + **params, + ) + + raise ValueError(f"Unsupported file format: {name}") @pytest_asyncio.fixture -async def transfers( - choice_file_format, - choice_file_type, - prepare_postgres, - prepare_oracle, - prepare_hdfs, - prepare_hive, - prepare_s3, - postgres: PostgresConnectionDTO, +async def group_owner( settings: Settings, session: AsyncSession, - oracle: OracleConnectionDTO, - hive: HiveConnectionDTO, - hdfs: HDFSConnectionDTO, - s3: S3ConnectionDTO, ): - logger.info("START TRANSFERS FIXTURE", datetime.datetime.now().isoformat()) - s3_file_format, file_format_object = choice_file_format - _, source_path, _ = prepare_s3 - user = await create_user( session=session, - username=f"owner_group_{secrets.token_hex(5)}", + username=secrets.token_hex(5), is_active=True, ) - group = await create_group(session=session, name=f"connection_group_{secrets.token_hex(5)}", owner_id=user.id) - hive_connection = await create_connection( - session=session, - name=f"integration_hive_{secrets.token_hex(5)}", - data=dict( - type=hive.type, - cluster=hive.cluster, - ), - group_id=group.id, + + yield MockUser( + user=user, + auth_token=sign_jwt(user.id, settings), + role=UserTestRoles.Owner, ) - await create_credentials( + await session.delete(user) + await session.commit() + + +@pytest_asyncio.fixture +async def group( + session: AsyncSession, + group_owner: MockUser, +): + result = await create_group(session=session, name=secrets.token_hex(5), owner_id=group_owner.user.id) + yield result + await session.delete(result) + await session.commit() + + +@pytest_asyncio.fixture(params=["test_queue"]) +async def queue( + request: FixtureRequest, + session: AsyncSession, + group: Group, +): + result = await create_queue( session=session, - settings=settings, - connection_id=hive_connection.id, - auth_data=dict( - type="hive", - user=hive.user, - password=hive.password, - ), + name=request.param, + group_id=group.id, ) + yield result + await session.delete(result) + await session.commit() - postgres_connection = await create_connection( + +@pytest_asyncio.fixture +async def postgres_connection( + postgres: PostgresConnectionDTO, + settings: Settings, + session: AsyncSession, + group: Group, +): + result = await create_connection( session=session, - name=f"integration_postgres_{secrets.token_hex(5)}", + name=secrets.token_hex(5), data=dict( type=postgres.type, host=postgres.host, @@ -519,7 +569,7 @@ async def transfers( await create_credentials( session=session, settings=settings, - connection_id=postgres_connection.id, + connection_id=result.id, auth_data=dict( type="postgres", user=postgres.user, @@ -527,16 +577,24 @@ async def transfers( ), ) - oracle_connection = await create_connection( + yield result + await session.delete(result) + await session.commit() + + +@pytest_asyncio.fixture +async def hive_connection( + hive: HiveConnectionDTO, + settings: Settings, + session: AsyncSession, + group: Group, +): + result = await create_connection( session=session, - name=f"integration_oracle_{secrets.token_hex(5)}", + name=secrets.token_hex(5), data=dict( - type=oracle.type, - host=oracle.host, - port=oracle.port, - sid=oracle.sid, - service_name=oracle.service_name, - additional_params={}, + type=hive.type, + cluster=hive.cluster, ), group_id=group.id, ) @@ -544,26 +602,36 @@ async def transfers( await create_credentials( session=session, settings=settings, - connection_id=oracle_connection.id, + connection_id=result.id, auth_data=dict( - type="oracle", - user=oracle.user, - password=oracle.password, + type="hive", + user=hive.user, + password=hive.password, ), ) - s3_connection = await create_connection( + yield result + await session.delete(result) + await session.commit() + + +@pytest_asyncio.fixture +async def oracle_connection( + oracle: OracleConnectionDTO, + settings: Settings, + session: AsyncSession, + group: Group, +): + result = await create_connection( session=session, - name=f"integration_s3_{secrets.token_hex(5)}", + name=secrets.token_hex(5), data=dict( - type=s3.type, - host=s3.host, - port=s3.port, - bucket=s3.bucket, - protocol=s3.protocol, - additional_params={ - "path.style.access": True, - }, + type=oracle.type, + host=oracle.host, + port=oracle.port, + sid=oracle.sid, + service_name=oracle.service_name, + additional_params={}, ), group_id=group.id, ) @@ -571,17 +639,29 @@ async def transfers( await create_credentials( session=session, settings=settings, - connection_id=s3_connection.id, + connection_id=result.id, auth_data=dict( - type="s3", - access_key=s3.access_key, - secret_key=s3.secret_key, + type="oracle", + user=oracle.user, + password=oracle.password, ), ) - hdfs_connection = await create_connection( + yield result + await session.delete(result) + await session.commit() + + +@pytest_asyncio.fixture +async def hdfs_connection( + hdfs: HDFSConnectionDTO, + settings: Settings, + session: AsyncSession, + group: Group, +): + result = await create_connection( session=session, - name=f"integration_hdfs_{secrets.token_hex(5)}", + name=secrets.token_hex(5), data=dict( type=hdfs.type, cluster=hdfs.cluster, @@ -592,7 +672,7 @@ async def transfers( await create_credentials( session=session, settings=settings, - connection_id=hdfs_connection.id, + connection_id=result.id, auth_data=dict( type="hdfs", user=hdfs.user, @@ -600,91 +680,47 @@ async def transfers( ), ) - queue = await create_queue( + yield result + await session.delete(result) + await session.commit() + + +@pytest_asyncio.fixture +async def s3_connection( + s3: S3ConnectionDTO, + settings: Settings, + session: AsyncSession, + group: Group, +): + result = await create_connection( session=session, - name="test_queue", + name=secrets.token_hex(5), + data=dict( + type=s3.type, + host=s3.host, + port=s3.port, + bucket=s3.bucket, + protocol=s3.protocol, + additional_params={ + "path.style.access": True, + }, + ), group_id=group.id, ) - transfers = {} - for source, target in permutations( - [ - hive_connection, - oracle_connection, - postgres_connection, - s3_connection, - hdfs_connection, - ], - 2, - ): - transfer_type = ("s3", "hdfs") - source_type = source.data["type"] - target_type = target.data["type"] - - file_format = {} - if (source_type in transfer_type) or (target_type in transfer_type): - file_format = file_format_object.dict() - file_format["type"] = s3_file_format - file_format["timestampFormat"] = "yyyy-MM-dd'T'HH:mm:ss.SSSSSS+00:00" - - if source_type in transfer_type: - source_params = { - "type": source_type, - "directory_path": str(source_path / "file_df_connection" / s3_file_format / choice_file_type), - "file_format": file_format, - "df_schema": df_schema.json(), - "options": {}, - } - else: - source_params = { - "type": source_type, - "table_name": (oracle.user if source_type == "oracle" else "public") + ".source_table", - } - - if target_type in transfer_type: - target_params = { - "type": target_type, - "directory_path": f"/target/{s3_file_format}/{choice_file_type}", - "file_format": file_format, - "options": {}, - } - else: - target_params = { - "type": target_type, - "table_name": (oracle.user if target_type == "oracle" else "public") + ".target_table", - } - - transfer = await create_transfer( - session=session, - group_id=group.id, - name=f"integration_transfer_{source_type}_{target_type}", - source_connection_id=source.id, - target_connection_id=target.id, - source_params=source_params, - target_params=target_params, - queue_id=queue.id, - ) - transfers[f"{source_type}_{target_type}"] = transfer - - data = { - "group_owner": MockUser( - user=user, - auth_token=sign_jwt(user.id, settings), - role=UserTestRoles.Owner, + await create_credentials( + session=session, + settings=settings, + connection_id=result.id, + auth_data=dict( + type="s3", + access_key=s3.access_key, + secret_key=s3.secret_key, ), - } - data.update(transfers) - logger.info("END TRANSFERS FIXTURE", datetime.datetime.now().isoformat()) - yield data - for transfer in transfers.values(): - await session.delete(transfer) - await session.delete(postgres_connection) - await session.delete(oracle_connection) - await session.delete(hive_connection) - await session.delete(s3_connection) - await session.delete(hdfs_connection) - await session.delete(user) - await session.delete(queue) + ) + + yield result + await session.delete(result) await session.commit() @@ -714,223 +750,3 @@ def init_df_with_mixed_column_naming(spark: SparkSession) -> DataFrame: ], schema=df_schema, ) - - -@pytest.fixture -def prepare_postgres_with_mixed_column_naming( - spark: SparkSession, - postgres: PostgresConnectionDTO, - init_df_with_mixed_column_naming: DataFrame, -) -> Postgres: - postgres_connection = Postgres( - host=postgres.host, - port=postgres.port, - user=postgres.user, - password=postgres.password, - database=postgres.database_name, - spark=spark, - ).check() - postgres_connection.execute("DROP TABLE IF EXISTS public.source_table") - postgres_connection.execute("DROP TABLE IF EXISTS public.target_table") - db_writer = DBWriter( - connection=postgres_connection, - target="public.source_table", - options=Postgres.WriteOptions(if_exists="append"), - ) - db_writer.run(init_df_with_mixed_column_naming) - return postgres_connection - - -@pytest.fixture -def prepare_hive_with_mixed_column_naming( - spark: SparkSession, - hive: HiveConnectionDTO, - init_df_with_mixed_column_naming: DataFrame, -) -> Hive: - hive_connection = Hive( - cluster=hive.cluster, - spark=spark, - ).check() - hive_connection.execute("DROP TABLE IF EXISTS public.source_table") - hive_connection.execute("DROP TABLE IF EXISTS public.target_table") - hive_connection.execute("CREATE DATABASE IF NOT EXISTS public") - db_writer = DBWriter( - connection=hive_connection, - target="public.source_table", - ) - db_writer.run(init_df_with_mixed_column_naming) - spark.catalog.refreshTable("public.source_table") - return hive_connection - - -@pytest.fixture -def prepare_oracle_with_mixed_column_naming( - spark: SparkSession, - oracle: OracleConnectionDTO, - init_df_with_mixed_column_naming: DataFrame, -) -> Oracle: - oracle_connection = Oracle( - host=oracle.host, - port=oracle.port, - user=oracle.user, - password=oracle.password, - sid=oracle.sid, - service_name=oracle.service_name, - spark=spark, - ).check() - try: - oracle_connection.execute(f"DROP TABLE {oracle.user}.source_table") - except Exception: - pass - try: - oracle_connection.execute(f"DROP TABLE {oracle.user}.target_table") - except Exception: - pass - db_writer = DBWriter( - connection=oracle_connection, - target=f"{oracle.user}.source_table", - options=Oracle.WriteOptions(if_exists="append"), - ) - db_writer.run(init_df_with_mixed_column_naming) - return oracle_connection - - -@pytest_asyncio.fixture -async def transfers_with_mixed_column_naming( - prepare_postgres_with_mixed_column_naming, - prepare_oracle_with_mixed_column_naming, - prepare_hive_with_mixed_column_naming, - postgres: PostgresConnectionDTO, - oracle: OracleConnectionDTO, - hive: HiveConnectionDTO, - session: AsyncSession, - settings: Settings, -): - user = await create_user( - session=session, - username="owner_group", - is_active=True, - ) - group = await create_group(session=session, name="connection_group", owner_id=user.id) - hive_connection = await create_connection( - session=session, - name="integration_hive", - data=dict( - type=hive.type, - cluster=hive.cluster, - ), - group_id=group.id, - ) - - await create_credentials( - session=session, - settings=settings, - connection_id=hive_connection.id, - auth_data=dict( - type="hive", - user=hive.user, - password=hive.password, - ), - ) - - postgres_connection = await create_connection( - session=session, - name="integration_postgres", - data=dict( - type=postgres.type, - host=postgres.host, - port=postgres.port, - database_name=postgres.database_name, - additional_params={}, - ), - group_id=group.id, - ) - - await create_credentials( - session=session, - settings=settings, - connection_id=postgres_connection.id, - auth_data=dict( - type="postgres", - user=postgres.user, - password=postgres.password, - ), - ) - - oracle_connection = await create_connection( - session=session, - name="integration_oracle", - data=dict( - type=oracle.type, - host=oracle.host, - port=oracle.port, - sid=oracle.sid, - service_name=oracle.service_name, - additional_params={}, - ), - group_id=group.id, - ) - - await create_credentials( - session=session, - settings=settings, - connection_id=oracle_connection.id, - auth_data=dict( - type="oracle", - user=oracle.user, - password=oracle.password, - ), - ) - - queue = await create_queue( - session=session, - name="test_queue", - group_id=group.id, - ) - - transfers = {} - for source, target in permutations( - [ - hive_connection, - oracle_connection, - postgres_connection, - ], - 2, - ): - source_type = source.data["type"] - target_type = target.data["type"] - transfer = await create_transfer( - session=session, - group_id=group.id, - name=f"integration_transfer_{source_type}_{target_type}", - source_connection_id=source.id, - target_connection_id=target.id, - source_params={ - "type": source_type, - "table_name": (oracle.user if source_type == "oracle" else "public") + ".source_table", - }, - target_params={ - "type": target_type, - "table_name": (oracle.user if target_type == "oracle" else "public") + ".target_table", - }, - queue_id=queue.id, - ) - transfers[f"{source_type}_{target_type}"] = transfer - - data = { - "group_owner": MockUser( - user=user, - auth_token=sign_jwt(user.id, settings), - role=UserTestRoles.Owner, - ), - } - data.update(transfers) - yield data - for transfer in transfers.values(): - await session.delete(transfer) - await session.delete(postgres_connection) - await session.delete(oracle_connection) - await session.delete(hive_connection) - await session.delete(user) - await session.delete(queue) - await session.commit() diff --git a/tests/test_integration/test_run_transfer/test_hdfs.py b/tests/test_integration/test_run_transfer/test_hdfs.py new file mode 100644 index 00000000..99396318 --- /dev/null +++ b/tests/test_integration/test_run_transfer/test_hdfs.py @@ -0,0 +1,227 @@ +import os +import secrets + +import pytest +import pytest_asyncio +from httpx import AsyncClient +from onetl.connection import SparkHDFS +from onetl.db import DBReader +from onetl.file import FileDFReader +from pyspark.sql import DataFrame +from pytest import FixtureRequest +from sqlalchemy.ext.asyncio import AsyncSession + +from syncmaster.db.models import Connection, Group, Queue, Status +from tests.mocks import MockUser +from tests.test_unit.utils import create_transfer +from tests.utils import get_run_on_end + +pytestmark = [pytest.mark.asyncio, pytest.mark.worker] + + +@pytest.fixture(params=[""]) +def file_format_flavor(request: FixtureRequest): + return request.param + + +@pytest_asyncio.fixture +async def hdfs_to_postgres( + session: AsyncSession, + group: Group, + queue: Queue, + init_df: DataFrame, + hdfs_connection: Connection, + postgres_connection: Connection, + prepare_hdfs, + source_file_format, + file_format_flavor: str, +): + format_name, file_format = source_file_format + _, source_path, _ = prepare_hdfs + + result = await create_transfer( + session=session, + group_id=group.id, + name=f"hdfs2postgres_{secrets.token_hex(5)}", + source_connection_id=hdfs_connection.id, + target_connection_id=postgres_connection.id, + source_params={ + "type": "hdfs", + "directory_path": os.fspath(source_path / "file_df_connection" / format_name / file_format_flavor), + "file_format": { + "type": format_name, + **file_format.dict(), + }, + "df_schema": init_df.schema.json(), + "options": {}, + }, + target_params={ + "type": "postgres", + "table_name": "public.target_table", + }, + queue_id=queue.id, + ) + yield result + await session.delete(result) + await session.commit() + + +@pytest_asyncio.fixture(params=[""]) +async def postgres_to_hdfs( + session: AsyncSession, + group: Group, + queue: Queue, + hdfs_connection: Connection, + postgres_connection: Connection, + target_file_format, + file_format_flavor: str, +): + format_name, file_format = target_file_format + result = await create_transfer( + session=session, + group_id=group.id, + name=f"postgres2hdfs_{secrets.token_hex(5)}", + source_connection_id=postgres_connection.id, + target_connection_id=hdfs_connection.id, + source_params={ + "type": "postgres", + "table_name": "public.source_table", + }, + target_params={ + "type": "hdfs", + "directory_path": f"/target/{format_name}/{file_format_flavor}", + "file_format": { + "type": format_name, + **file_format.dict(), + }, + "options": {}, + }, + queue_id=queue.id, + ) + yield result + await session.delete(result) + await session.commit() + + +@pytest.mark.parametrize( + "source_file_format, file_format_flavor", + [ + pytest.param( + ("csv", {}), + "with_header", + id="csv", + ), + pytest.param( + ("json", {}), + "without_compression", + id="json", + ), + pytest.param( + ("jsonline", {}), + "without_compression", + id="jsonline", + ), + ], + indirect=["source_file_format", "file_format_flavor"], +) +async def test_run_transfer_hdfs_to_postgres( + prepare_postgres, + group_owner: MockUser, + init_df: DataFrame, + client: AsyncClient, + hdfs_to_postgres: Connection, + source_file_format, + file_format_flavor, +): + # Arrange + postgres, _ = prepare_postgres + + # Act + result = await client.post( + "v1/runs", + headers={"Authorization": f"Bearer {group_owner.token}"}, + json={"transfer_id": hdfs_to_postgres.id}, + ) + # Assert + assert result.status_code == 200 + + run_data = await get_run_on_end( + client=client, + run_id=result.json()["id"], + token=group_owner.token, + ) + assert run_data["status"] == Status.FINISHED.value + + reader = DBReader( + connection=postgres, + table="public.target_table", + ) + df = reader.run() + for field in init_df.schema: + df = df.withColumn(field.name, df[field.name].cast(field.dataType)) + + assert df.sort("id").collect() == init_df.sort("id").collect() + + +@pytest.mark.parametrize( + "target_file_format, file_format_flavor", + [ + pytest.param( + ("csv", {}), + "with_header", + id="csv", + ), + pytest.param( + ("jsonline", {}), + "without_compression", + id="jsonline", + ), + ], + indirect=["target_file_format", "file_format_flavor"], +) +async def test_run_transfer_postgres_to_hdfs( + group_owner: MockUser, + init_df: DataFrame, + client: AsyncClient, + prepare_postgres, + hdfs_file_df_connection: SparkHDFS, + postgres_to_hdfs: Connection, + hdfs_connection: SparkHDFS, + target_file_format, + file_format_flavor: str, +): + format_name, format = target_file_format + + # Arrange + _, fill_with_data = prepare_postgres + fill_with_data(init_df) + + # Act + result = await client.post( + "v1/runs", + headers={"Authorization": f"Bearer {group_owner.token}"}, + json={"transfer_id": postgres_to_hdfs.id}, + ) + # Assert + assert result.status_code == 200 + + run_data = await get_run_on_end( + client=client, + run_id=result.json()["id"], + token=group_owner.token, + ) + assert run_data["status"] == Status.FINISHED.value + + reader = FileDFReader( + connection=hdfs_file_df_connection, + format=format, + source_path=f"/target/{format_name}/{file_format_flavor}", + df_schema=init_df.schema, + options={}, + ) + df = reader.run() + + for field in init_df.schema: + df = df.withColumn(field.name, df[field.name].cast(field.dataType)) + + assert df.sort("id").collect() == init_df.sort("id").collect() diff --git a/tests/test_integration/test_run_transfer/test_hdfs_to_postgres.py b/tests/test_integration/test_run_transfer/test_hdfs_to_postgres.py deleted file mode 100644 index dfc435bf..00000000 --- a/tests/test_integration/test_run_transfer/test_hdfs_to_postgres.py +++ /dev/null @@ -1,137 +0,0 @@ -import pytest -from httpx import AsyncClient -from onetl.db import DBReader -from pyspark.sql import DataFrame - -from syncmaster.db.models import Status, Transfer -from tests.mocks import MockUser -from tests.test_integration.test_run_transfer.conftest import df_schema -from tests.utils import get_run_on_end - -pytestmark = [pytest.mark.asyncio, pytest.mark.worker, pytest.mark.hdfs, pytest.mark.postgres] - - -@pytest.mark.parametrize("choice_file_type", ["with_header"], indirect=True) -@pytest.mark.parametrize("choice_file_format", ["csv"], indirect=True) -async def test_run_hdfs_transfer_csv( - choice_file_format, - choice_file_type, - prepare_postgres, - prepare_hdfs, - transfers: dict[str, MockUser | Transfer], - init_df: DataFrame, - client: AsyncClient, - spark, -): - # Arrange - user: MockUser = transfers["group_owner"] - transfer: Transfer = transfers["hdfs_postgres"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - reader = DBReader( - connection=prepare_postgres, - table="public.target_table", - ) - df = reader.run() - for field in df_schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - - assert df.sort("id").collect() == init_df.sort("id").collect() - - -@pytest.mark.parametrize("choice_file_type", ["without_compression"], indirect=True) -@pytest.mark.parametrize("choice_file_format", ["jsonline"], indirect=True) -async def test_run_hdfs_transfer_jsonline( - choice_file_format, - choice_file_type, - prepare_postgres, - prepare_hdfs, - transfers: dict[str, MockUser | Transfer], - init_df: DataFrame, - client: AsyncClient, - spark, -): - # Arrange - user: MockUser = transfers["group_owner"] - transfer: Transfer = transfers["hdfs_postgres"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - reader = DBReader( - connection=prepare_postgres, - table="public.target_table", - ) - df = reader.run() - for field in df_schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - - assert df.sort("id").collect() == init_df.sort("id").collect() - - -@pytest.mark.parametrize("choice_file_type", ["without_compression"], indirect=True) -@pytest.mark.parametrize("choice_file_format", ["json"], indirect=True) -async def test_run_hdfs_transfer_json( - choice_file_format, - choice_file_type, - prepare_postgres, - prepare_hdfs, - transfers: dict[str, MockUser | Transfer], - init_df: DataFrame, - client: AsyncClient, - spark, -): - # Arrange - user: MockUser = transfers["group_owner"] - transfer: Transfer = transfers["hdfs_postgres"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - reader = DBReader( - connection=prepare_postgres, - table="public.target_table", - ) - df = reader.run() - for field in df_schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - - assert df.sort("ID").collect() == init_df.sort("ID").collect() diff --git a/tests/test_integration/test_run_transfer/test_hive_to_postgres.py b/tests/test_integration/test_run_transfer/test_hive_to_postgres.py deleted file mode 100644 index 81947cc7..00000000 --- a/tests/test_integration/test_run_transfer/test_hive_to_postgres.py +++ /dev/null @@ -1,46 +0,0 @@ -import pytest -from httpx import AsyncClient -from onetl.db import DBReader -from pyspark.sql import DataFrame - -from syncmaster.db.models import Status, Transfer -from tests.mocks import MockUser -from tests.utils import get_run_on_end - -pytestmark = [pytest.mark.asyncio, pytest.mark.worker, pytest.mark.hive, pytest.mark.postgres] - - -async def test_run_simple_transfer( - client: AsyncClient, - transfers: dict[str, MockUser | Transfer], - prepare_postgres, - init_df: DataFrame, -): - # Arrange - user: MockUser = transfers["group_owner"] - transfer: Transfer = transfers["hive_postgres"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - reader = DBReader( - connection=prepare_postgres, - table="public.target_table", - ) - df = reader.run() - for field in init_df.schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - - assert df.sort("ID").collect() == init_df.sort("ID").collect() diff --git a/tests/test_integration/test_run_transfer/test_hve.py b/tests/test_integration/test_run_transfer/test_hve.py new file mode 100644 index 00000000..b042b6c9 --- /dev/null +++ b/tests/test_integration/test_run_transfer/test_hve.py @@ -0,0 +1,239 @@ +import secrets + +import pytest +import pytest_asyncio +from httpx import AsyncClient +from onetl.db import DBReader +from pyspark.sql import DataFrame +from sqlalchemy.ext.asyncio import AsyncSession + +from syncmaster.db.models import Connection, Group, Queue, Status, Transfer +from tests.mocks import MockUser +from tests.test_unit.utils import create_transfer +from tests.utils import get_run_on_end + +pytestmark = [pytest.mark.asyncio, pytest.mark.worker] + + +@pytest_asyncio.fixture +async def postgres_to_hive( + session: AsyncSession, + group: Group, + queue: Queue, + hive_connection: Connection, + postgres_connection: Connection, +): + result = await create_transfer( + session=session, + group_id=group.id, + name=f"postgres2hive_{secrets.token_hex(5)}", + source_connection_id=postgres_connection.id, + target_connection_id=hive_connection.id, + source_params={ + "type": "postgres", + "table_name": "public.source_table", + }, + target_params={ + "type": "hive", + "table_name": "default.target_table", + }, + queue_id=queue.id, + ) + yield result + await session.delete(result) + await session.commit() + + +@pytest_asyncio.fixture +async def hive_to_postgres( + session: AsyncSession, + group: Group, + queue: Queue, + hive_connection: Connection, + postgres_connection: Connection, +): + result = await create_transfer( + session=session, + group_id=group.id, + name=f"hive2postgres_{secrets.token_hex(5)}", + source_connection_id=hive_connection.id, + target_connection_id=postgres_connection.id, + source_params={ + "type": "hive", + "table_name": "default.source_table", + }, + target_params={ + "type": "postgres", + "table_name": "public.target_table", + }, + queue_id=queue.id, + ) + yield result + await session.delete(result) + await session.commit() + + +async def test_run_transfer_postgres_to_hive( + client: AsyncClient, + group_owner: MockUser, + prepare_postgres, + prepare_hive, + init_df: DataFrame, + postgres_to_hive: Transfer, +): + # Arrange + _, fill_with_data = prepare_postgres + fill_with_data(init_df) + hive, _ = prepare_hive + + # Act + result = await client.post( + "v1/runs", + headers={"Authorization": f"Bearer {group_owner.token}"}, + json={"transfer_id": postgres_to_hive.id}, + ) + # Assert + assert result.status_code == 200 + + run_data = await get_run_on_end( + client=client, + run_id=result.json()["id"], + token=group_owner.token, + ) + assert run_data["status"] == Status.FINISHED.value + + reader = DBReader( + connection=hive, + table="default.target_table", + ) + df = reader.run() + for field in init_df.schema: + df = df.withColumn(field.name, df[field.name].cast(field.dataType)) + + assert df.sort("ID").collect() == init_df.sort("ID").collect() + + +async def test_run_transfer_postgres_to_hive_mixed_naming( + client: AsyncClient, + group_owner: MockUser, + prepare_postgres, + prepare_hive, + init_df_with_mixed_column_naming: DataFrame, + postgres_to_hive: Transfer, +): + # Arrange + _, fill_with_data = prepare_postgres + fill_with_data(init_df_with_mixed_column_naming) + hive, _ = prepare_hive + + # Act + result = await client.post( + "v1/runs", + headers={"Authorization": f"Bearer {group_owner.token}"}, + json={"transfer_id": postgres_to_hive.id}, + ) + # Assert + assert result.status_code == 200 + + run_data = await get_run_on_end( + client=client, + run_id=result.json()["id"], + token=group_owner.token, + ) + assert run_data["status"] == Status.FINISHED.value + reader = DBReader( + connection=hive, + table="default.target_table", + ) + df = reader.run() + + assert df.columns != init_df_with_mixed_column_naming.columns + assert df.columns == [column.lower() for column in init_df_with_mixed_column_naming.columns] + + for field in init_df_with_mixed_column_naming.schema: + df = df.withColumn(field.name, df[field.name].cast(field.dataType)) + + assert df.collect() == init_df_with_mixed_column_naming.collect() + + +async def test_run_transfer_hive_to_postgres( + client: AsyncClient, + group_owner: MockUser, + prepare_hive, + prepare_postgres, + init_df: DataFrame, + hive_to_postgres: Transfer, +): + # Arrange + _, fill_with_data = prepare_hive + fill_with_data(init_df) + postgres, _ = prepare_postgres + + # Act + result = await client.post( + "v1/runs", + headers={"Authorization": f"Bearer {group_owner.token}"}, + json={"transfer_id": hive_to_postgres.id}, + ) + # Assert + assert result.status_code == 200 + + run_data = await get_run_on_end( + client=client, + run_id=result.json()["id"], + token=group_owner.token, + ) + assert run_data["status"] == Status.FINISHED.value + reader = DBReader( + connection=postgres, + table="public.target_table", + ) + df = reader.run() + for field in init_df.schema: + df = df.withColumn(field.name, df[field.name].cast(field.dataType)) + + assert df.sort("ID").collect() == init_df.sort("ID").collect() + + +async def test_run_transfer_hive_to_postgres_mixes_naming( + client: AsyncClient, + group_owner: MockUser, + prepare_hive, + prepare_postgres, + init_df_with_mixed_column_naming: DataFrame, + hive_to_postgres: Transfer, +): + # Arrange + _, fill_with_data = prepare_hive + fill_with_data(init_df_with_mixed_column_naming) + postgres, _ = prepare_postgres + + # Act + result = await client.post( + "v1/runs", + headers={"Authorization": f"Bearer {group_owner.token}"}, + json={"transfer_id": hive_to_postgres.id}, + ) + # Assert + assert result.status_code == 200 + + run_data = await get_run_on_end( + client=client, + run_id=result.json()["id"], + token=group_owner.token, + ) + assert run_data["status"] == Status.FINISHED.value + + reader = DBReader( + connection=postgres, + table="public.target_table", + ) + df = reader.run() + + assert df.columns != init_df_with_mixed_column_naming.columns + assert df.columns == [column.lower() for column in init_df_with_mixed_column_naming.columns] + + for field in init_df_with_mixed_column_naming.schema: + df = df.withColumn(field.name, df[field.name].cast(field.dataType)) + + assert df.collect() == init_df_with_mixed_column_naming.collect() diff --git a/tests/test_integration/test_run_transfer/test_oracle.py b/tests/test_integration/test_run_transfer/test_oracle.py new file mode 100644 index 00000000..21ebea5f --- /dev/null +++ b/tests/test_integration/test_run_transfer/test_oracle.py @@ -0,0 +1,243 @@ +import secrets + +import pytest +import pytest_asyncio +from httpx import AsyncClient +from onetl.connection import Oracle +from onetl.db import DBReader +from pyspark.sql import DataFrame +from sqlalchemy.ext.asyncio import AsyncSession + +from syncmaster.db.models import Connection, Group, Queue, Status, Transfer +from tests.mocks import MockUser +from tests.test_unit.utils import create_transfer +from tests.utils import get_run_on_end + +pytestmark = [pytest.mark.asyncio, pytest.mark.worker] + + +@pytest_asyncio.fixture +async def postgres_to_oracle( + session: AsyncSession, + group: Group, + queue: Queue, + oracle: Oracle, + oracle_connection: Connection, + postgres_connection: Connection, +): + result = await create_transfer( + session=session, + group_id=group.id, + name=f"postgres2oracle_{secrets.token_hex(5)}", + source_connection_id=postgres_connection.id, + target_connection_id=oracle_connection.id, + source_params={ + "type": "postgres", + "table_name": "public.source_table", + }, + target_params={ + "type": "oracle", + "table_name": f"{oracle.user}.target_table", + }, + queue_id=queue.id, + ) + yield result + await session.delete(result) + await session.commit() + + +@pytest_asyncio.fixture +async def oracle_to_postgres( + session: AsyncSession, + group: Group, + queue: Queue, + oracle: Oracle, + oracle_connection: Connection, + postgres_connection: Connection, +): + result = await create_transfer( + session=session, + group_id=group.id, + name=f"oracle2postgres_{secrets.token_hex(5)}", + source_connection_id=oracle_connection.id, + target_connection_id=postgres_connection.id, + source_params={ + "type": "oracle", + "table_name": f"{oracle.user}.source_table", + }, + target_params={ + "type": "postgres", + "table_name": "public.target_table", + }, + queue_id=queue.id, + ) + yield result + await session.delete(result) + await session.commit() + + +async def test_run_transfer_postgres_to_oracle( + client: AsyncClient, + group_owner: MockUser, + prepare_postgres, + prepare_oracle, + init_df: DataFrame, + postgres_to_oracle: Transfer, +): + # Arrange + _, fill_with_data = prepare_postgres + fill_with_data(init_df) + oracle, _ = prepare_oracle + + # Act + result = await client.post( + "v1/runs", + headers={"Authorization": f"Bearer {group_owner.token}"}, + json={"transfer_id": postgres_to_oracle.id}, + ) + # Assert + assert result.status_code == 200 + + run_data = await get_run_on_end( + client=client, + run_id=result.json()["id"], + token=group_owner.token, + ) + assert run_data["status"] == Status.FINISHED.value + reader = DBReader( + connection=oracle, + table=f"{oracle.user}.target_table", + ) + df = reader.run() + for field in init_df.schema: + df = df.withColumn(field.name, df[field.name].cast(field.dataType)) + + assert df.sort("ID").collect() == init_df.sort("ID").collect() + + +async def test_run_transfer_postgres_to_oracle_mixed_naming( + client: AsyncClient, + group_owner: MockUser, + prepare_postgres, + prepare_oracle, + init_df_with_mixed_column_naming: DataFrame, + postgres_to_oracle: Transfer, +): + # Arrange + _, fill_with_data = prepare_postgres + fill_with_data(init_df_with_mixed_column_naming) + oracle, _ = prepare_oracle + + # Act + result = await client.post( + "v1/runs", + headers={"Authorization": f"Bearer {group_owner.token}"}, + json={"transfer_id": postgres_to_oracle.id}, + ) + # Assert + assert result.status_code == 200 + + run_data = await get_run_on_end( + client=client, + run_id=result.json()["id"], + token=group_owner.token, + ) + assert run_data["status"] == Status.FINISHED.value + reader = DBReader( + connection=oracle, + table=f"{oracle.user}.target_table", + ) + df = reader.run() + + assert df.columns != init_df_with_mixed_column_naming.columns + assert df.columns == [column.upper() for column in init_df_with_mixed_column_naming.columns] + + for field in init_df_with_mixed_column_naming.schema: + df = df.withColumn(field.name, df[field.name].cast(field.dataType)) + + assert df.collect() == init_df_with_mixed_column_naming.collect() + + +async def test_run_transfer_oracle_to_postgres( + client: AsyncClient, + group_owner: MockUser, + prepare_oracle, + prepare_postgres, + init_df: DataFrame, + oracle_to_postgres: Transfer, +): + # Arrange + _, fill_with_data = prepare_oracle + fill_with_data(init_df) + postgres, _ = prepare_postgres + + # Act + result = await client.post( + "v1/runs", + headers={"Authorization": f"Bearer {group_owner.token}"}, + json={"transfer_id": oracle_to_postgres.id}, + ) + # Assert + assert result.status_code == 200 + + run_data = await get_run_on_end( + client=client, + run_id=result.json()["id"], + token=group_owner.token, + ) + assert run_data["status"] == Status.FINISHED.value + + reader = DBReader( + connection=postgres, + table="public.target_table", + ) + df = reader.run() + + for field in init_df.schema: + df = df.withColumn(field.name, df[field.name].cast(field.dataType)) + + assert df.sort("ID").collect() == init_df.sort("ID").collect() + + +async def test_run_transfer_oracle_to_postgres_mixed_naming( + client: AsyncClient, + group_owner: MockUser, + prepare_oracle, + prepare_postgres, + init_df_with_mixed_column_naming: DataFrame, + oracle_to_postgres: Transfer, +): + # Arrange + _, fill_with_data = prepare_oracle + fill_with_data(init_df_with_mixed_column_naming) + postgres, _ = prepare_postgres + + # Act + result = await client.post( + "v1/runs", + headers={"Authorization": f"Bearer {group_owner.token}"}, + json={"transfer_id": oracle_to_postgres.id}, + ) + # Assert + assert result.status_code == 200 + + run_data = await get_run_on_end( + client=client, + run_id=result.json()["id"], + token=group_owner.token, + ) + assert run_data["status"] == Status.FINISHED.value + + reader = DBReader( + connection=postgres, + table="public.target_table", + ) + df = reader.run() + + assert df.columns != init_df_with_mixed_column_naming.columns + assert df.columns == [column.lower() for column in init_df_with_mixed_column_naming.columns] + + for field in init_df_with_mixed_column_naming.schema: + df = df.withColumn(field.name, df[field.name].cast(field.dataType)) + + assert df.collect() == init_df_with_mixed_column_naming.collect() diff --git a/tests/test_integration/test_run_transfer/test_oracle_to_postgres.py b/tests/test_integration/test_run_transfer/test_oracle_to_postgres.py deleted file mode 100644 index 346d915f..00000000 --- a/tests/test_integration/test_run_transfer/test_oracle_to_postgres.py +++ /dev/null @@ -1,46 +0,0 @@ -import pytest -from httpx import AsyncClient -from onetl.db import DBReader -from pyspark.sql import DataFrame - -from syncmaster.db.models import Status, Transfer -from tests.mocks import MockUser -from tests.utils import get_run_on_end - -pytestmark = [pytest.mark.asyncio, pytest.mark.worker, pytest.mark.oracle, pytest.mark.postgres] - - -async def test_run_simple_transfer( - client: AsyncClient, - transfers: dict[str, MockUser | Transfer], - prepare_postgres, - init_df: DataFrame, -): - # Arrange - user: MockUser = transfers["group_owner"] - transfer: Transfer = transfers["oracle_postgres"] - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - - reader = DBReader( - connection=prepare_postgres, - table="public.target_table", - ) - df = reader.run() - for field in init_df.schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - - assert df.sort("ID").collect() == init_df.sort("ID").collect() diff --git a/tests/test_integration/test_run_transfer/test_postgres_to_hdfs.py b/tests/test_integration/test_run_transfer/test_postgres_to_hdfs.py deleted file mode 100644 index bd50e4fd..00000000 --- a/tests/test_integration/test_run_transfer/test_postgres_to_hdfs.py +++ /dev/null @@ -1,106 +0,0 @@ -import pytest -from httpx import AsyncClient -from onetl.file import FileDFReader -from pyspark.sql import DataFrame - -from syncmaster.db.models import Status, Transfer -from tests.mocks import MockUser -from tests.utils import get_run_on_end - -pytestmark = [pytest.mark.asyncio, pytest.mark.worker, pytest.mark.hdfs, pytest.mark.postgres] - - -@pytest.mark.parametrize("choice_file_type", ["without_header"], indirect=True) -@pytest.mark.parametrize("choice_file_format", ["csv"], indirect=True) -async def test_run_pg_to_hdfs_transfer_csv( - choice_file_format, - choice_file_type, - prepare_postgres, - prepare_hdfs, - transfers: dict[str, MockUser | Transfer], - init_df: DataFrame, - client: AsyncClient, - spark, -): - # Arrange - hdfs_file_format, file_object = choice_file_format - hdfs_connection, _, _ = prepare_hdfs - user: MockUser = transfers["group_owner"] - transfer: Transfer = transfers["postgres_hdfs"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - - reader = FileDFReader( - connection=hdfs_connection, - format=file_object, - source_path=f"/target/{hdfs_file_format}/{choice_file_type}", - options={}, - df_schema=init_df.schema, - ) - df = reader.run() - - for field in init_df.schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - assert df.sort("ID").collect() == init_df.sort("ID").collect() - - -@pytest.mark.parametrize("choice_file_type", ["without_compression"], indirect=True) -@pytest.mark.parametrize("choice_file_format", ["jsonline"], indirect=True) -async def test_run_pg_to_hdfs_transfer_jsonline( - choice_file_format, - choice_file_type, - prepare_postgres, - prepare_hdfs, - transfers: dict[str, MockUser | Transfer], - init_df: DataFrame, - client: AsyncClient, - spark, -): - # Arrange - hdfs_file_format, file_object = choice_file_format - hdfs_connection, _, _ = prepare_hdfs - user: MockUser = transfers["group_owner"] - transfer: Transfer = transfers["postgres_hdfs"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - - reader = FileDFReader( - connection=hdfs_connection, - format=file_object, - source_path=f"/target/{hdfs_file_format}/{choice_file_type}", - options={}, - df_schema=init_df.schema, - ) - df = reader.run() - - for field in init_df.schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - assert df.sort("ID").collect() == init_df.sort("ID").collect() diff --git a/tests/test_integration/test_run_transfer/test_postgres_to_hive.py b/tests/test_integration/test_run_transfer/test_postgres_to_hive.py deleted file mode 100644 index f784cef3..00000000 --- a/tests/test_integration/test_run_transfer/test_postgres_to_hive.py +++ /dev/null @@ -1,46 +0,0 @@ -import pytest -from httpx import AsyncClient -from onetl.db import DBReader -from pyspark.sql import DataFrame - -from syncmaster.db.models import Status, Transfer -from tests.mocks import MockUser -from tests.utils import get_run_on_end - -pytestmark = [pytest.mark.asyncio, pytest.mark.worker, pytest.mark.hive, pytest.mark.postgres] - - -async def test_run_simple_transfer( - client: AsyncClient, - transfers: dict[str, MockUser | Transfer], - prepare_hive, - init_df: DataFrame, -): - # Arrange - user: MockUser = transfers["group_owner"] - transfer: Transfer = transfers["postgres_hive"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - reader = DBReader( - connection=prepare_hive, - table="public.target_table", - ) - df = reader.run() - for field in init_df.schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - - assert df.sort("ID").collect() == init_df.sort("ID").collect() diff --git a/tests/test_integration/test_run_transfer/test_postgres_to_oracle.py b/tests/test_integration/test_run_transfer/test_postgres_to_oracle.py deleted file mode 100644 index 025dc5b2..00000000 --- a/tests/test_integration/test_run_transfer/test_postgres_to_oracle.py +++ /dev/null @@ -1,46 +0,0 @@ -import pytest -from httpx import AsyncClient -from onetl.db import DBReader -from pyspark.sql import DataFrame - -from syncmaster.db.models import Status, Transfer -from tests.mocks import MockUser -from tests.utils import get_run_on_end - -pytestmark = [pytest.mark.asyncio, pytest.mark.worker, pytest.mark.oracle, pytest.mark.postgres] - - -async def test_run_simple_transfer( - client: AsyncClient, - transfers: dict[str, MockUser | Transfer], - prepare_oracle, - init_df: DataFrame, -): - # Arrange - user: MockUser = transfers["group_owner"] - transfer: Transfer = transfers["postgres_oracle"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - reader = DBReader( - connection=prepare_oracle, - table=f"{prepare_oracle.user}.target_table", - ) - df = reader.run() - for field in init_df.schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - - assert df.sort("ID").collect() == init_df.sort("ID").collect() diff --git a/tests/test_integration/test_run_transfer/test_postgres_to_s3.py b/tests/test_integration/test_run_transfer/test_postgres_to_s3.py deleted file mode 100644 index b008eb22..00000000 --- a/tests/test_integration/test_run_transfer/test_postgres_to_s3.py +++ /dev/null @@ -1,106 +0,0 @@ -import pytest -from httpx import AsyncClient -from onetl.file import FileDFReader -from pyspark.sql import DataFrame - -from syncmaster.db.models import Status, Transfer -from tests.mocks import MockUser -from tests.utils import get_run_on_end - -pytestmark = [pytest.mark.asyncio, pytest.mark.worker, pytest.mark.s3, pytest.mark.postgres] - - -@pytest.mark.parametrize("choice_file_type", ["without_header"], indirect=True) -@pytest.mark.parametrize("choice_file_format", ["csv"], indirect=True) -async def test_run_pg_to_s3_transfer_csv( - choice_file_format, - choice_file_type, - prepare_postgres, - prepare_s3, - transfers: dict[str, MockUser | Transfer], - init_df: DataFrame, - client: AsyncClient, - spark, -): - # Arrange - s3_file_format, file_object = choice_file_format - s3_connection, _, _ = prepare_s3 - user: MockUser = transfers["group_owner"] - transfer: Transfer = transfers["postgres_s3"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - - reader = FileDFReader( - connection=s3_connection, - format=file_object, - source_path=f"/target/{s3_file_format}/{choice_file_type}", - options={}, - df_schema=init_df.schema, - ) - df = reader.run() - - for field in init_df.schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - assert df.sort("ID").collect() == init_df.sort("ID").collect() - - -@pytest.mark.parametrize("choice_file_type", ["without_compression"], indirect=True) -@pytest.mark.parametrize("choice_file_format", ["jsonline"], indirect=True) -async def test_run_pg_to_s3_transfer_jsonline( - choice_file_format, - choice_file_type, - prepare_postgres, - prepare_s3, - transfers: dict[str, MockUser | Transfer], - init_df: DataFrame, - client: AsyncClient, - spark, -): - # Arrange - s3_file_format, file_object = choice_file_format - s3_connection, _, _ = prepare_s3 - user: MockUser = transfers["group_owner"] - transfer: Transfer = transfers["postgres_s3"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - - reader = FileDFReader( - connection=s3_connection, - format=file_object, - source_path=f"/target/{s3_file_format}/{choice_file_type}", - options={}, - df_schema=init_df.schema, - ) - df = reader.run() - - for field in init_df.schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - assert df.sort("ID").collect() == init_df.sort("ID").collect() diff --git a/tests/test_integration/test_run_transfer/test_read_mixed_column_naming.py b/tests/test_integration/test_run_transfer/test_read_mixed_column_naming.py deleted file mode 100644 index b8d60d94..00000000 --- a/tests/test_integration/test_run_transfer/test_read_mixed_column_naming.py +++ /dev/null @@ -1,132 +0,0 @@ -import pytest -from httpx import AsyncClient -from onetl.db import DBReader -from pyspark.sql import DataFrame - -from syncmaster.db.models import Status, Transfer -from tests.mocks import MockUser -from tests.utils import get_run_on_end - -pytestmark = [pytest.mark.asyncio, pytest.mark.worker, pytest.mark.oracle, pytest.mark.postgres] - - -async def test_change_mixed_column_naming_to_oracle_default_case( - client: AsyncClient, - transfers_with_mixed_column_naming, - prepare_oracle_with_mixed_column_naming, - init_df_with_mixed_column_naming: DataFrame, -): - # Arrange - user: MockUser = transfers_with_mixed_column_naming["group_owner"] - transfer: Transfer = transfers_with_mixed_column_naming["postgres_oracle"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - reader = DBReader( - connection=prepare_oracle_with_mixed_column_naming, - table=f"{prepare_oracle_with_mixed_column_naming.user}.target_table", - ) - df = reader.run() - - assert df.columns != init_df_with_mixed_column_naming.columns - assert df.columns == [column.upper() for column in init_df_with_mixed_column_naming.columns] - - for field in init_df_with_mixed_column_naming.schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - - assert df.collect() == init_df_with_mixed_column_naming.collect() - - -async def test_change_mixed_column_naming_to_postgres_default_case( - client: AsyncClient, - transfers_with_mixed_column_naming, - prepare_postgres_with_mixed_column_naming, - init_df_with_mixed_column_naming: DataFrame, -): - # Arrange - user: MockUser = transfers_with_mixed_column_naming["group_owner"] - transfer: Transfer = transfers_with_mixed_column_naming["oracle_postgres"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - - reader = DBReader( - connection=prepare_postgres_with_mixed_column_naming, - table="public.target_table", - ) - df = reader.run() - - assert df.columns != init_df_with_mixed_column_naming.columns - assert df.columns == [column.lower() for column in init_df_with_mixed_column_naming.columns] - - for field in init_df_with_mixed_column_naming.schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - - assert df.collect() == init_df_with_mixed_column_naming.collect() - - -async def test_change_mixed_column_naming_to_hive_default_case( - client: AsyncClient, - transfers_with_mixed_column_naming, - prepare_hive_with_mixed_column_naming, - init_df_with_mixed_column_naming: DataFrame, - spark, -): - # Arrange - user: MockUser = transfers_with_mixed_column_naming["group_owner"] - transfer: Transfer = transfers_with_mixed_column_naming["postgres_hive"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - reader = DBReader( - connection=prepare_hive_with_mixed_column_naming, - table="public.target_table", - ) - df = reader.run() - - assert df.columns != init_df_with_mixed_column_naming.columns - assert df.columns == [column.lower() for column in init_df_with_mixed_column_naming.columns] - - for field in init_df_with_mixed_column_naming.schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - - assert df.collect() == init_df_with_mixed_column_naming.collect() diff --git a/tests/test_integration/test_run_transfer/test_s3.py b/tests/test_integration/test_run_transfer/test_s3.py new file mode 100644 index 00000000..91cfe87f --- /dev/null +++ b/tests/test_integration/test_run_transfer/test_s3.py @@ -0,0 +1,226 @@ +import os +import secrets + +import pytest +import pytest_asyncio +from httpx import AsyncClient +from onetl.connection import SparkS3 +from onetl.db import DBReader +from onetl.file import FileDFReader +from pyspark.sql import DataFrame +from pytest import FixtureRequest +from sqlalchemy.ext.asyncio import AsyncSession + +from syncmaster.db.models import Connection, Group, Queue, Status +from tests.mocks import MockUser +from tests.test_unit.utils import create_transfer +from tests.utils import get_run_on_end + +pytestmark = [pytest.mark.asyncio, pytest.mark.worker] + + +@pytest.fixture(params=[""]) +def file_format_flavor(request: FixtureRequest): + return request.param + + +@pytest_asyncio.fixture +async def s3_to_postgres( + session: AsyncSession, + group: Group, + queue: Queue, + init_df: DataFrame, + s3_connection: Connection, + postgres_connection: Connection, + prepare_s3, + source_file_format, + file_format_flavor: str, +): + format_name, file_format = source_file_format + _, source_path, _ = prepare_s3 + + result = await create_transfer( + session=session, + group_id=group.id, + name=f"s32postgres_{secrets.token_hex(5)}", + source_connection_id=s3_connection.id, + target_connection_id=postgres_connection.id, + source_params={ + "type": "s3", + "directory_path": os.fspath(source_path / "file_df_connection" / format_name / file_format_flavor), + "file_format": { + "type": format_name, + **file_format.dict(), + }, + "df_schema": init_df.schema.json(), + "options": {}, + }, + target_params={ + "type": "postgres", + "table_name": "public.target_table", + }, + queue_id=queue.id, + ) + yield result + await session.delete(result) + await session.commit() + + +@pytest_asyncio.fixture(params=[""]) +async def postgres_to_s3( + session: AsyncSession, + group: Group, + queue: Queue, + s3_connection: Connection, + postgres_connection: Connection, + target_file_format, + file_format_flavor: str, +): + format_name, file_format = target_file_format + result = await create_transfer( + session=session, + group_id=group.id, + name=f"postgres2s3_{secrets.token_hex(5)}", + source_connection_id=postgres_connection.id, + target_connection_id=s3_connection.id, + source_params={ + "type": "postgres", + "table_name": "public.source_table", + }, + target_params={ + "type": "s3", + "directory_path": f"/target/{format_name}/{file_format_flavor}", + "file_format": { + "type": format_name, + **file_format.dict(), + }, + "options": {}, + }, + queue_id=queue.id, + ) + yield result + await session.delete(result) + await session.commit() + + +@pytest.mark.parametrize( + "source_file_format, file_format_flavor", + [ + pytest.param( + ("csv", {}), + "with_header", + id="csv", + ), + pytest.param( + ("json", {}), + "without_compression", + id="json", + ), + pytest.param( + ("jsonline", {}), + "without_compression", + id="jsonline", + ), + ], + indirect=["source_file_format", "file_format_flavor"], +) +async def test_run_transfer_s3_to_postgres( + prepare_postgres, + group_owner: MockUser, + init_df: DataFrame, + client: AsyncClient, + s3_to_postgres: Connection, + source_file_format, + file_format_flavor, +): + # Arrange + postgres, _ = prepare_postgres + + # Act + result = await client.post( + "v1/runs", + headers={"Authorization": f"Bearer {group_owner.token}"}, + json={"transfer_id": s3_to_postgres.id}, + ) + # Assert + assert result.status_code == 200 + + run_data = await get_run_on_end( + client=client, + run_id=result.json()["id"], + token=group_owner.token, + ) + assert run_data["status"] == Status.FINISHED.value + + reader = DBReader( + connection=postgres, + table="public.target_table", + ) + df = reader.run() + for field in init_df.schema: + df = df.withColumn(field.name, df[field.name].cast(field.dataType)) + + assert df.sort("id").collect() == init_df.sort("id").collect() + + +@pytest.mark.parametrize( + "target_file_format, file_format_flavor", + [ + pytest.param( + ("csv", {}), + "with_header", + id="csv", + ), + pytest.param( + ("jsonline", {}), + "without_compression", + id="jsonline", + ), + ], + indirect=["target_file_format", "file_format_flavor"], +) +async def test_run_transfer_postgres_to_s3( + group_owner: MockUser, + init_df: DataFrame, + client: AsyncClient, + s3_file_df_connection: SparkS3, + prepare_postgres, + postgres_to_s3: Connection, + target_file_format, + file_format_flavor: str, +): + format_name, format = target_file_format + + # Arrange + _, fill_with_data = prepare_postgres + fill_with_data(init_df) + + # Act + result = await client.post( + "v1/runs", + headers={"Authorization": f"Bearer {group_owner.token}"}, + json={"transfer_id": postgres_to_s3.id}, + ) + # Assert + assert result.status_code == 200 + + run_data = await get_run_on_end( + client=client, + run_id=result.json()["id"], + token=group_owner.token, + ) + assert run_data["status"] == Status.FINISHED.value + + reader = FileDFReader( + connection=s3_file_df_connection, + format=format, + source_path=f"/target/{format_name}/{file_format_flavor}", + df_schema=init_df.schema, + options={}, + ) + df = reader.run() + + for field in init_df.schema: + df = df.withColumn(field.name, df[field.name].cast(field.dataType)) + + assert df.sort("id").collect() == init_df.sort("id").collect() diff --git a/tests/test_integration/test_run_transfer/test_s3_to_postgres.py b/tests/test_integration/test_run_transfer/test_s3_to_postgres.py deleted file mode 100644 index 6d3c8d15..00000000 --- a/tests/test_integration/test_run_transfer/test_s3_to_postgres.py +++ /dev/null @@ -1,137 +0,0 @@ -import pytest -from httpx import AsyncClient -from onetl.db import DBReader -from pyspark.sql import DataFrame - -from syncmaster.db.models import Status, Transfer -from tests.mocks import MockUser -from tests.test_integration.test_run_transfer.conftest import df_schema -from tests.utils import get_run_on_end - -pytestmark = [pytest.mark.asyncio, pytest.mark.worker, pytest.mark.s3, pytest.mark.postgres] - - -@pytest.mark.parametrize("choice_file_type", ["with_header"], indirect=True) -@pytest.mark.parametrize("choice_file_format", ["csv"], indirect=True) -async def test_run_s3_transfer_csv( - choice_file_format, - choice_file_type, - prepare_postgres, - prepare_s3, - transfers: dict[str, MockUser | Transfer], - init_df: DataFrame, - client: AsyncClient, - spark, -): - # Arrange - user: MockUser = transfers["group_owner"] - transfer: Transfer = transfers["s3_postgres"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - reader = DBReader( - connection=prepare_postgres, - table="public.target_table", - ) - df = reader.run() - for field in df_schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - - assert df.sort("id").collect() == init_df.sort("id").collect() - - -@pytest.mark.parametrize("choice_file_type", ["without_compression"], indirect=True) -@pytest.mark.parametrize("choice_file_format", ["jsonline"], indirect=True) -async def test_run_s3_transfer_jsonline( - choice_file_format, - choice_file_type, - prepare_postgres, - prepare_s3, - transfers: dict[str, MockUser | Transfer], - init_df: DataFrame, - client: AsyncClient, - spark, -): - # Arrange - user: MockUser = transfers["group_owner"] - transfer: Transfer = transfers["s3_postgres"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - reader = DBReader( - connection=prepare_postgres, - table="public.target_table", - ) - df = reader.run() - for field in df_schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - - assert df.sort("id").collect() == init_df.sort("id").collect() - - -@pytest.mark.parametrize("choice_file_type", ["without_compression"], indirect=True) -@pytest.mark.parametrize("choice_file_format", ["json"], indirect=True) -async def test_run_s3_transfer_json( - choice_file_format, - choice_file_type, - prepare_postgres, - prepare_s3, - transfers: dict[str, MockUser | Transfer], - init_df: DataFrame, - client: AsyncClient, - spark, -): - # Arrange - user: MockUser = transfers["group_owner"] - transfer: Transfer = transfers["s3_postgres"] - - # Act - result = await client.post( - "v1/runs", - headers={"Authorization": f"Bearer {user.token}"}, - json={"transfer_id": transfer.id}, - ) - # Assert - assert result.status_code == 200 - - run_data = await get_run_on_end( - client=client, - run_id=result.json()["id"], - token=user.token, - ) - assert run_data["status"] == Status.FINISHED.value - reader = DBReader( - connection=prepare_postgres, - table="public.target_table", - ) - df = reader.run() - for field in df_schema: - df = df.withColumn(field.name, df[field.name].cast(field.dataType)) - - assert df.sort("ID").collect() == init_df.sort("ID").collect() diff --git a/tests/utils.py b/tests/utils.py index c1fbf1dd..b4218f8b 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -94,9 +94,15 @@ async def drop_database(connection: AsyncConnection, db_name: str) -> None: await connection.execute(text(query)) -async def get_run_on_end(client: AsyncClient, run_id: int, token: str) -> dict[str, Any]: +async def get_run_on_end( + client: AsyncClient, + run_id: int, + token: str, + timeout: int = 120, +) -> dict[str, Any]: + end_time = datetime.now().timestamp() + timeout while True: - logger.info("WAITING FOR THE RUN STATUS", datetime.now().isoformat()) + logger.info("Waiting for end of run") result = await client.get( f"v1/runs/{run_id}", headers={"Authorization": f"Bearer {token}"}, @@ -107,5 +113,8 @@ async def get_run_on_end(client: AsyncClient, run_id: int, token: str) -> dict[s data = result.json() if data["status"] in [Status.FINISHED, Status.FAILED]: return data - logger.info("%s Try get end of run", datetime.now().isoformat()) + + if datetime.now().timestamp() > end_time: + raise TimeoutError() + await asyncio.sleep(1) From c912125e229d56c7791618e201837f35b5dd4c0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Mon, 22 Apr 2024 09:55:41 +0000 Subject: [PATCH 12/18] [DOP-14025] Fix missing options passed to Transfer params --- syncmaster/schemas/v1/transfers/file/base.py | 5 +++++ .../test_file_transfers/test_create_transfer.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/syncmaster/schemas/v1/transfers/file/base.py b/syncmaster/schemas/v1/transfers/file/base.py index f03217e6..769d8d46 100644 --- a/syncmaster/schemas/v1/transfers/file/base.py +++ b/syncmaster/schemas/v1/transfers/file/base.py @@ -3,6 +3,7 @@ from __future__ import annotations from pathlib import PurePosixPath +from typing import Any from pydantic import BaseModel, Field, field_validator @@ -14,11 +15,13 @@ class ReadFileTransferSource(BaseModel): directory_path: str file_format: CSV | JSONLine | JSON = Field(..., discriminator="type") + options: dict[str, Any] class ReadFileTransferTarget(BaseModel): directory_path: str file_format: CSV | JSONLine = Field(..., discriminator="type") # JSON format is not supported for writing + options: dict[str, Any] # At the moment the CreateTransferSourceParams and CreateTransferTargetParams @@ -26,6 +29,7 @@ class ReadFileTransferTarget(BaseModel): class CreateFileTransferSource(BaseModel): directory_path: str file_format: CSV | JSONLine | JSON = Field(..., discriminator="type") + options: dict[str, Any] = Field(default_factory=dict) class Config: arbitrary_types_allowed = True @@ -41,6 +45,7 @@ def _directory_path_is_valid_path(cls, value): class CreateFileTransferTarget(BaseModel): directory_path: str file_format: CSV | JSONLine = Field(..., discriminator="type") # JSON FORMAT IS NOT SUPPORTED AS A TARGET ! + options: dict[str, Any] = Field(default_factory=dict) class Config: arbitrary_types_allowed = True diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py index 27277f60..b52cacc0 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py @@ -29,6 +29,9 @@ "file_format": { "type": "csv", }, + "options": { + "some": "option", + }, }, ], ) @@ -91,6 +94,12 @@ async def test_developer_plus_can_create_s3_transfer( "queue_id": transfer.queue_id, } + for params in (transfer.source_params, transfer.target_params): + assert params["type"] == "s3" + assert params["directory_path"] == "/some/pure/path" + assert params["file_format"]["type"] == "csv" + assert params["options"] == {"some": "option"} + @pytest.mark.parametrize( "create_connection_data", @@ -174,6 +183,12 @@ async def test_developer_plus_can_create_hdfs_transfer( "queue_id": transfer.queue_id, } + for params in (transfer.source_params, transfer.target_params): + assert params["type"] == "hdfs" + assert params["directory_path"] == "/some/pure/path" + assert params["file_format"]["type"] == "csv" + assert params["options"] == {} + @pytest.mark.parametrize( "create_connection_data", From fd725fc3af871cb4c04ebf146a843154b8038dd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Mon, 22 Apr 2024 10:02:40 +0000 Subject: [PATCH 13/18] [DOP-14025] Fix missing options passed to Transfer params --- .../test_transfers/test_file_transfers/test_read_transfer.py | 1 + .../test_file_transfers/test_update_transfer.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py index 68d75335..ad68ec29 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_read_transfer.py @@ -21,6 +21,7 @@ "quote": '"', "type": "csv", }, + "options": {}, }, ], ) diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py index 2f7a32e9..1638cd39 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_update_transfer.py @@ -21,6 +21,7 @@ "quote": '"', "type": "csv", }, + "options": {}, }, ], ) @@ -54,12 +55,13 @@ async def test_developer_plus_can_update_s3_transfer( "type": "s3", "directory_path": "/some/new/test/directory", "file_format": {"type": "jsonline"}, + "options": {"some": "option"}, } }, ) # Pre-Assert - source_params = group_transfer.source_params + source_params = group_transfer.source_params.copy() source_params.update( { "directory_path": "/some/new/test/directory", @@ -68,6 +70,7 @@ async def test_developer_plus_can_update_s3_transfer( "line_sep": "\n", "type": "jsonline", }, + "options": {"some": "option"}, } ) # Assert From 0073c0500288fe152baf24c8e6dcf6cc2dc3fc05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Mon, 22 Apr 2024 11:56:50 +0300 Subject: [PATCH 14/18] [DOP-14025] Reduce Docker image size --- docker/Dockerfile.backend | 13 +++--- docker/Dockerfile.worker | 13 +++--- .../changelog/next_release/44.improvement.rst | 1 + poetry.lock | 42 +++++++++---------- pyproject.toml | 41 +++++++++++------- 5 files changed, 64 insertions(+), 46 deletions(-) create mode 100644 docs/changelog/next_release/44.improvement.rst diff --git a/docker/Dockerfile.backend b/docker/Dockerfile.backend index b86f6932..463a0405 100644 --- a/docker/Dockerfile.backend +++ b/docker/Dockerfile.backend @@ -1,5 +1,5 @@ ARG BASE_IMAGE=python:3.11-slim -FROM $BASE_IMAGE AS prod +FROM $BASE_IMAGE AS base RUN apt-get update && apt-get install -y \ libssl-dev \ @@ -14,20 +14,23 @@ RUN pip install --no-cache-dir --timeout 3 --retries 3 poetry \ && poetry config virtualenvs.create false WORKDIR /app +ENV PYTHONPATH=/app COPY ./pyproject.toml ./poetry.lock* /app/ RUN pip install --upgrade pip setuptools wheel packaging RUN poetry install --no-root --extras "backend" --without test,docs,dev -COPY ./syncmaster/ /app/syncmaster/ -ENV PYTHONPATH=/app - COPY ./docker/entrypoint_backend.sh /app/entrypoint.sh ENTRYPOINT ["/app/entrypoint.sh"] -FROM prod as test +FROM base AS prod + +COPY ./syncmaster/ /app/syncmaster/ + + +FROM base as test RUN poetry install --no-root --extras "backend" --with test --without docs,dev RUN sed -i 's/python -m/coverage run -m/g' /app/entrypoint.sh diff --git a/docker/Dockerfile.worker b/docker/Dockerfile.worker index 5cc28c70..7b32bfa8 100644 --- a/docker/Dockerfile.worker +++ b/docker/Dockerfile.worker @@ -1,5 +1,5 @@ ARG BASE_IMAGE=python:3.11-slim -FROM $BASE_IMAGE AS prod +FROM $BASE_IMAGE AS base RUN apt-get update && apt-get install -y \ libsasl2-dev \ @@ -20,21 +20,24 @@ RUN apt-get update && apt-get install -y \ RUN pip install --no-cache-dir --timeout 3 --retries 3 poetry && poetry config virtualenvs.create false WORKDIR /app +ENV PYTHONPATH=/app COPY ./pyproject.toml ./poetry.lock* /app/ RUN pip install --upgrade pip setuptools wheel packaging RUN poetry install --no-root --extras "worker" --without test,docs,dev -COPY ./syncmaster/ /app/syncmaster/ -ENV PYTHONPATH=/app - COPY ./docker/entrypoint_worker.sh /app/entrypoint.sh ENTRYPOINT ["/app/entrypoint.sh"] CMD ["--loglevel=info"] -FROM prod as test +FROM base as prod + +COPY ./syncmaster/ /app/syncmaster/ + + +FROM base as test ENV CREATE_SPARK_SESSION_FUNCTION=tests.spark.get_worker_spark_session.get_worker_spark_session diff --git a/docs/changelog/next_release/44.improvement.rst b/docs/changelog/next_release/44.improvement.rst new file mode 100644 index 00000000..3ed90a96 --- /dev/null +++ b/docs/changelog/next_release/44.improvement.rst @@ -0,0 +1 @@ +Reduce Docker images size diff --git a/poetry.lock b/poetry.lock index e5dc51de..9c7599ab 100644 --- a/poetry.lock +++ b/poetry.lock @@ -34,7 +34,7 @@ tz = ["backports.zoneinfo"] name = "amqp" version = "5.2.0" description = "Low-level AMQP client for Python (fork of amqplib)." -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "amqp-5.2.0-py3-none-any.whl", hash = "sha256:827cb12fb0baa892aad844fd95258143bce4027fdac4fccddbc43330fd281637"}, @@ -296,7 +296,7 @@ files = [ name = "billiard" version = "4.2.0" description = "Python multiprocessing fork with improvements and bugfixes" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "billiard-4.2.0-py3-none-any.whl", hash = "sha256:07aa978b308f334ff8282bd4a746e681b3513db5c9a514cbdd810cbbdc19714d"}, @@ -351,7 +351,7 @@ uvloop = ["uvloop (>=0.15.2)"] name = "celery" version = "5.3.6" description = "Distributed Task Queue." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "celery-5.3.6-py3-none-any.whl", hash = "sha256:9da4ea0118d232ce97dff5ed4974587fb1c0ff5c10042eb15278487cdd27d1af"}, @@ -605,7 +605,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "click-didyoumean" version = "0.3.1" description = "Enables git-like *did-you-mean* feature in click" -optional = false +optional = true python-versions = ">=3.6.2" files = [ {file = "click_didyoumean-0.3.1-py3-none-any.whl", hash = "sha256:5c4bb6007cfea5f2fd6583a2fb6701a22a41eb98957e63d0fac41c10e7c3117c"}, @@ -619,7 +619,7 @@ click = ">=7" name = "click-plugins" version = "1.1.1" description = "An extension module for click to enable registering CLI commands via setuptools entry-points." -optional = false +optional = true python-versions = "*" files = [ {file = "click-plugins-1.1.1.tar.gz", hash = "sha256:46ab999744a9d831159c3411bb0c79346d94a444df9a3a3742e9ed63645f264b"}, @@ -636,7 +636,7 @@ dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] name = "click-repl" version = "0.3.0" description = "REPL plugin for Click" -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "click-repl-0.3.0.tar.gz", hash = "sha256:17849c23dba3d667247dc4defe1757fff98694e90fe37474f3feebb69ced26a9"}, @@ -729,7 +729,7 @@ toml = ["tomli"] name = "cryptography" version = "42.0.5" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "cryptography-42.0.5-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:a30596bae9403a342c978fb47d9b0ee277699fa53bbafad14706af51fe543d16"}, @@ -815,7 +815,7 @@ files = [ name = "ecdsa" version = "0.19.0" description = "ECDSA cryptographic signature library (pure python)" -optional = false +optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.6" files = [ {file = "ecdsa-0.19.0-py2.py3-none-any.whl", hash = "sha256:2cea9b88407fdac7bbeca0833b189e4c9c53f2ef1e1eaa29f6224dbc809b707a"}, @@ -1326,7 +1326,7 @@ i18n = ["Babel (>=2.7)"] name = "kombu" version = "5.3.7" description = "Messaging library for Python." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "kombu-5.3.7-py3-none-any.whl", hash = "sha256:5634c511926309c7f9789f1433e9ed402616b56836ef9878f01bd59267b4c7a9"}, @@ -1795,7 +1795,7 @@ virtualenv = ">=20.10.0" name = "prompt-toolkit" version = "3.0.43" description = "Library for building powerful interactive command lines in Python" -optional = false +optional = true python-versions = ">=3.7.0" files = [ {file = "prompt_toolkit-3.0.43-py3-none-any.whl", hash = "sha256:a11a29cb3bf0a28a387fe5122cdb649816a957cd9261dcedf8c9f1fef33eacf6"}, @@ -1929,7 +1929,7 @@ files = [ name = "pyasn1" version = "0.6.0" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "pyasn1-0.6.0-py2.py3-none-any.whl", hash = "sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473"}, @@ -2289,7 +2289,7 @@ cli = ["click (>=5.0)"] name = "python-jose" version = "3.3.0" description = "JOSE implementation in Python" -optional = false +optional = true python-versions = "*" files = [ {file = "python-jose-3.3.0.tar.gz", hash = "sha256:55779b5e6ad599c6336191246e95eb2293a9ddebd555f796a65f838f07e5d78a"}, @@ -2311,7 +2311,7 @@ pycryptodome = ["pyasn1", "pycryptodome (>=3.3.1,<4.0.0)"] name = "python-multipart" version = "0.0.9" description = "A streaming multipart parser for Python" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "python_multipart-0.0.9-py3-none-any.whl", hash = "sha256:97ca7b8ea7b05f977dc3849c3ba99d51689822fab725c3703af7c866a0c2b215"}, @@ -2424,7 +2424,7 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] name = "rsa" version = "4.9" description = "Pure-Python RSA implementation" -optional = false +optional = true python-versions = ">=3.6,<4" files = [ {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, @@ -2841,7 +2841,7 @@ sqlcipher = ["sqlcipher3_binary"] name = "sqlalchemy-utils" version = "0.41.2" description = "Various utility functions for SQLAlchemy." -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "SQLAlchemy-Utils-0.41.2.tar.gz", hash = "sha256:bc599c8c3b3319e53ce6c5c3c471120bd325d0071fb6f38a10e924e3d07b9990"}, @@ -2980,7 +2980,7 @@ files = [ name = "tzdata" version = "2024.1" description = "Provider of IANA time zone data" -optional = false +optional = true python-versions = ">=2" files = [ {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, @@ -3026,7 +3026,7 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", name = "vine" version = "5.1.0" description = "Python promises." -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "vine-5.1.0-py3-none-any.whl", hash = "sha256:40fdf3c48b2cfe1c38a49e9ae2da6fda88e4794c810050a728bd7413811fb1dc"}, @@ -3057,7 +3057,7 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess name = "wcwidth" version = "0.2.13" description = "Measures the displayed width of unicode strings in a terminal" -optional = false +optional = true python-versions = "*" files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, @@ -3151,10 +3151,10 @@ test = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [extras] -backend = ["alembic", "asyncpg", "fastapi", "uvicorn"] -worker = ["onetl", "psycopg2-binary"] +backend = ["alembic", "asyncpg", "celery", "fastapi", "pydantic-settings", "python-jose", "python-multipart", "sqlalchemy", "sqlalchemy-utils", "uvicorn"] +worker = ["celery", "onetl", "psycopg2-binary", "pydantic-settings", "sqlalchemy", "sqlalchemy-utils"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "0a823a381af17d26a97485fd57425bdd141dcace9a047be6f1e77e77c2ae5408" +content-hash = "7599a5457f1c631b4b5b568af2b0a5b7dd761d175e12bc5e8cdd8f8c192d4694" diff --git a/pyproject.toml b/pyproject.toml index 1d2eba21..613c83da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,29 +47,40 @@ exclude = [ [tool.poetry.dependencies] python = "^3.11" -sqlalchemy = "^2.0.18" -sqlalchemy-utils = "^0.41.1" -pydantic = "^2.6.4" -python-jose = {extras = ["cryptography"], version = "^3.3.0"} -python-multipart = "^0.0.9" -celery = "^5.3.3" -onetl = {version = "^0.10.2", extras = ["spark"]} -psycopg2-binary = {version = "^2.9.7", optional = true } -fastapi = {version = "^0.110.0", optional = true} -uvicorn = {version = "^0.29.0", optional = true } -alembic = {version = "^1.11.1", optional = true } -asyncpg = {version = "^0.29.0", optional = true } -pydantic-settings = "^2.2.1" +pydantic = "^2.7.0" +pydantic-settings = { version = "^2.2.1", optional = true } +sqlalchemy = { version = "^2.0.18", optional = true } +sqlalchemy-utils = { version = "^0.41.1", optional = true } +fastapi = { version = "^0.110.0", optional = true} +uvicorn = { version = "^0.29.0", optional = true } +alembic = { version = "^1.11.1", optional = true } +asyncpg = { version = "^0.29.0", optional = true } +python-jose = { version = "^3.3.0", extras = ["cryptography"], optional = true } +python-multipart = { version = "^0.0.9", optional = true } +celery = { version = "^5.3.3", optional = true } +onetl = { version = "^0.10.2", extras = ["spark"], optional = true } +psycopg2-binary = { version = "^2.9.7", optional = true } [tool.poetry.extras] backend = [ - "alembic", - "asyncpg", + "pydantic-settings", + "sqlalchemy", + "sqlalchemy-utils", "fastapi", "uvicorn", + "alembic", + "asyncpg", + "python-multipart", + "python-jose", + # migrations only + "celery", ] worker = [ + "pydantic-settings", + "sqlalchemy", + "sqlalchemy-utils", + "celery", "onetl", "psycopg2-binary", ] From deada35601a0d0b3ca8b77777307ef25698cb07d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 11:01:58 +0000 Subject: [PATCH 15/18] Bump the python-packages group with 4 updates Bumps the python-packages group with 4 updates: [celery](https://github.com/celery/celery), [fastapi](https://github.com/tiangolo/fastapi), [faker](https://github.com/joke2k/faker) and [sphinx](https://github.com/sphinx-doc/sphinx). Updates `celery` from 5.3.6 to 5.4.0 - [Release notes](https://github.com/celery/celery/releases) - [Changelog](https://github.com/celery/celery/blob/main/Changelog.rst) - [Commits](https://github.com/celery/celery/compare/v5.3.6...v5.4.0) Updates `fastapi` from 0.110.1 to 0.110.2 - [Release notes](https://github.com/tiangolo/fastapi/releases) - [Commits](https://github.com/tiangolo/fastapi/compare/0.110.1...0.110.2) Updates `faker` from 24.9.0 to 24.11.0 - [Release notes](https://github.com/joke2k/faker/releases) - [Changelog](https://github.com/joke2k/faker/blob/master/CHANGELOG.md) - [Commits](https://github.com/joke2k/faker/compare/v24.9.0...v24.11.0) Updates `sphinx` from 7.2.6 to 7.3.7 - [Release notes](https://github.com/sphinx-doc/sphinx/releases) - [Changelog](https://github.com/sphinx-doc/sphinx/blob/master/CHANGES.rst) - [Commits](https://github.com/sphinx-doc/sphinx/compare/v7.2.6...v7.3.7) --- updated-dependencies: - dependency-name: celery dependency-type: direct:production update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: fastapi dependency-type: direct:production update-type: version-update:semver-patch dependency-group: python-packages - dependency-name: faker dependency-type: direct:development update-type: version-update:semver-minor dependency-group: python-packages - dependency-name: sphinx dependency-type: direct:development update-type: version-update:semver-minor dependency-group: python-packages ... Signed-off-by: dependabot[bot] --- poetry.lock | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/poetry.lock b/poetry.lock index 9c7599ab..14a919ca 100644 --- a/poetry.lock +++ b/poetry.lock @@ -349,13 +349,13 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "celery" -version = "5.3.6" +version = "5.4.0" description = "Distributed Task Queue." optional = true python-versions = ">=3.8" files = [ - {file = "celery-5.3.6-py3-none-any.whl", hash = "sha256:9da4ea0118d232ce97dff5ed4974587fb1c0ff5c10042eb15278487cdd27d1af"}, - {file = "celery-5.3.6.tar.gz", hash = "sha256:870cc71d737c0200c397290d730344cc991d13a057534353d124c9380267aab9"}, + {file = "celery-5.4.0-py3-none-any.whl", hash = "sha256:369631eb580cf8c51a82721ec538684994f8277637edde2dfc0dacd73ed97f64"}, + {file = "celery-5.4.0.tar.gz", hash = "sha256:504a19140e8d3029d5acad88330c541d4c3f64c789d85f94756762d8bca7e706"}, ] [package.dependencies] @@ -371,7 +371,7 @@ vine = ">=5.1.0,<6.0" [package.extras] arangodb = ["pyArango (>=2.0.2)"] -auth = ["cryptography (==41.0.5)"] +auth = ["cryptography (==42.0.5)"] azureblockblob = ["azure-storage-blob (>=12.15.0)"] brotli = ["brotli (>=1.0.0)", "brotlipy (>=0.7.0)"] cassandra = ["cassandra-driver (>=3.25.0,<4)"] @@ -381,22 +381,23 @@ couchbase = ["couchbase (>=3.0.0)"] couchdb = ["pycouchdb (==1.14.2)"] django = ["Django (>=2.2.28)"] dynamodb = ["boto3 (>=1.26.143)"] -elasticsearch = ["elastic-transport (<=8.10.0)", "elasticsearch (<=8.11.0)"] +elasticsearch = ["elastic-transport (<=8.13.0)", "elasticsearch (<=8.13.0)"] eventlet = ["eventlet (>=0.32.0)"] +gcs = ["google-cloud-storage (>=2.10.0)"] gevent = ["gevent (>=1.5.0)"] librabbitmq = ["librabbitmq (>=2.0.0)"] memcache = ["pylibmc (==1.6.3)"] mongodb = ["pymongo[srv] (>=4.0.2)"] -msgpack = ["msgpack (==1.0.7)"] -pymemcache = ["python-memcached (==1.59)"] +msgpack = ["msgpack (==1.0.8)"] +pymemcache = ["python-memcached (>=1.61)"] pyro = ["pyro4 (==4.82)"] -pytest = ["pytest-celery (==0.0.0)"] +pytest = ["pytest-celery[all] (>=1.0.0)"] redis = ["redis (>=4.5.2,!=4.5.5,<6.0.0)"] s3 = ["boto3 (>=1.26.143)"] slmq = ["softlayer-messaging (>=1.0.3)"] solar = ["ephem (==4.1.5)"] sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] -sqs = ["boto3 (>=1.26.143)", "kombu[sqs] (>=5.3.0)", "pycurl (>=7.43.0.5)", "urllib3 (>=1.26.16)"] +sqs = ["boto3 (>=1.26.143)", "kombu[sqs] (>=5.3.4)", "pycurl (>=7.43.0.5)", "urllib3 (>=1.26.16)"] tblib = ["tblib (>=1.3.0)", "tblib (>=1.5.0)"] yaml = ["PyYAML (>=3.10)"] zookeeper = ["kazoo (>=1.3.1)"] @@ -861,13 +862,13 @@ files = [ [[package]] name = "faker" -version = "24.9.0" +version = "24.11.0" description = "Faker is a Python package that generates fake data for you." optional = false python-versions = ">=3.8" files = [ - {file = "Faker-24.9.0-py3-none-any.whl", hash = "sha256:97c7874665e8eb7b517f97bf3b59f03bf3f07513fe2c159e98b6b9ea6b9f2b3d"}, - {file = "Faker-24.9.0.tar.gz", hash = "sha256:73b1e7967b0ceeac42fc99a8c973bb49e4499cc4044d20d17ab661d5cb7eda1d"}, + {file = "Faker-24.11.0-py3-none-any.whl", hash = "sha256:adb98e771073a06bdc5d2d6710d8af07ac5da64c8dc2ae3b17bb32319e66fd82"}, + {file = "Faker-24.11.0.tar.gz", hash = "sha256:34b947581c2bced340c39b35f89dbfac4f356932cfff8fe893bde854903f0e6e"}, ] [package.dependencies] @@ -875,13 +876,13 @@ python-dateutil = ">=2.4" [[package]] name = "fastapi" -version = "0.110.1" +version = "0.110.2" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = true python-versions = ">=3.8" files = [ - {file = "fastapi-0.110.1-py3-none-any.whl", hash = "sha256:5df913203c482f820d31f48e635e022f8cbfe7350e4830ef05a3163925b1addc"}, - {file = "fastapi-0.110.1.tar.gz", hash = "sha256:6feac43ec359dfe4f45b2c18ec8c94edb8dc2dfc461d417d9e626590c071baad"}, + {file = "fastapi-0.110.2-py3-none-any.whl", hash = "sha256:239403f2c0a3dda07a9420f95157a7f014ddb2b770acdbc984f9bdf3ead7afdb"}, + {file = "fastapi-0.110.2.tar.gz", hash = "sha256:b53d673652da3b65e8cd787ad214ec0fe303cad00d2b529b86ce7db13f17518d"}, ] [package.dependencies] @@ -2496,20 +2497,20 @@ files = [ [[package]] name = "sphinx" -version = "7.2.6" +version = "7.3.7" description = "Python documentation generator" optional = false python-versions = ">=3.9" files = [ - {file = "sphinx-7.2.6-py3-none-any.whl", hash = "sha256:1e09160a40b956dc623c910118fa636da93bd3ca0b9876a7b3df90f07d691560"}, - {file = "sphinx-7.2.6.tar.gz", hash = "sha256:9a5160e1ea90688d5963ba09a2dcd8bdd526620edbb65c328728f1b2228d5ab5"}, + {file = "sphinx-7.3.7-py3-none-any.whl", hash = "sha256:413f75440be4cacf328f580b4274ada4565fb2187d696a84970c23f77b64d8c3"}, + {file = "sphinx-7.3.7.tar.gz", hash = "sha256:a4a7db75ed37531c05002d56ed6948d4c42f473a36f46e1382b0bd76ca9627bc"}, ] [package.dependencies] -alabaster = ">=0.7,<0.8" +alabaster = ">=0.7.14,<0.8.0" babel = ">=2.9" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} -docutils = ">=0.18.1,<0.21" +docutils = ">=0.18.1,<0.22" imagesize = ">=1.3" Jinja2 = ">=3.0" packaging = ">=21.0" @@ -2525,8 +2526,8 @@ sphinxcontrib-serializinghtml = ">=1.1.9" [package.extras] docs = ["sphinxcontrib-websupport"] -lint = ["docutils-stubs", "flake8 (>=3.5.0)", "flake8-simplify", "isort", "mypy (>=0.990)", "ruff", "sphinx-lint", "types-requests"] -test = ["cython (>=3.0)", "filelock", "html5lib", "pytest (>=4.6)", "setuptools (>=67.0)"] +lint = ["flake8 (>=3.5.0)", "importlib_metadata", "mypy (==1.9.0)", "pytest (>=6.0)", "ruff (==0.3.7)", "sphinx-lint", "tomli", "types-docutils", "types-requests"] +test = ["cython (>=3.0)", "defusedxml (>=0.7.1)", "pytest (>=6.0)", "setuptools (>=67.0)"] [[package]] name = "sphinx-argparse" From 67a17b031db055790ad8c29b215b35c09f92ab6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Mon, 22 Apr 2024 11:10:02 +0000 Subject: [PATCH 16/18] Remove 'url' field from error responses --- tests/test_unit/test_connections/test_copy_connection.py | 1 - .../test_create_oracle_connection.py | 1 - tests/test_unit/test_connections/test_create_connection.py | 4 ---- tests/test_unit/test_connections/test_update_connection.py | 2 -- tests/test_unit/test_groups/test_add_user_to_group.py | 2 -- tests/test_unit/test_groups/test_update_group_by_id.py | 1 - tests/test_unit/test_groups/test_update_member_role.py | 1 - tests/test_unit/test_queue/test_create_queue.py | 6 ------ tests/test_unit/test_transfers/test_copy_transfer.py | 1 - tests/test_unit/test_transfers/test_create_transfer.py | 7 ------- .../test_file_transfers/test_create_transfer.py | 2 -- tests/test_unit/test_transfers/test_update_transfer.py | 1 - tests/test_unit/test_users.py | 1 - 13 files changed, 30 deletions(-) diff --git a/tests/test_unit/test_connections/test_copy_connection.py b/tests/test_unit/test_connections/test_copy_connection.py index e8cc013d..5d98af16 100644 --- a/tests/test_unit/test_connections/test_copy_connection.py +++ b/tests/test_unit/test_connections/test_copy_connection.py @@ -473,7 +473,6 @@ async def test_check_name_validation_copy_connection_with_new_connection_name( "loc": ["body", "new_name"], "msg": "String should have at least 1 character", "type": "string_too_short", - "url": "https://errors.pydantic.dev/2.7/v/string_too_short", } ] } diff --git a/tests/test_unit/test_connections/test_create_all_connection/test_create_oracle_connection.py b/tests/test_unit/test_connections/test_create_all_connection/test_create_oracle_connection.py index 9f473096..7078dcfd 100644 --- a/tests/test_unit/test_connections/test_create_all_connection/test_create_oracle_connection.py +++ b/tests/test_unit/test_connections/test_create_all_connection/test_create_oracle_connection.py @@ -199,7 +199,6 @@ async def test_developer_plus_create_oracle_connection_with_sid_and_service_name "loc": ["body", "connection_data", "oracle"], "msg": "Value error, You must specify either sid or service_name but not both", "type": "value_error", - "url": "https://errors.pydantic.dev/2.7/v/value_error", } ] } diff --git a/tests/test_unit/test_connections/test_create_connection.py b/tests/test_unit/test_connections/test_create_connection.py index 4ee964ee..71602ff2 100644 --- a/tests/test_unit/test_connections/test_create_connection.py +++ b/tests/test_unit/test_connections/test_create_connection.py @@ -165,7 +165,6 @@ async def test_check_fields_validation_on_create_connection( "loc": ["body", "name"], "msg": "String should have at least 1 character", "type": "string_too_short", - "url": "https://errors.pydantic.dev/2.7/v/string_too_short", } ] } @@ -201,7 +200,6 @@ async def test_check_fields_validation_on_create_connection( "loc": ["body", "name"], "msg": "Input should be a valid string", "type": "string_type", - "url": "https://errors.pydantic.dev/2.7/v/string_type", } ] } @@ -234,7 +232,6 @@ async def test_check_fields_validation_on_create_connection( "loc": ["body", "description"], "msg": "Input should be a valid string", "type": "string_type", - "url": "https://errors.pydantic.dev/2.7/v/string_type", } ] } @@ -280,7 +277,6 @@ async def test_check_fields_validation_on_create_connection( "msg": "Input tag 'POSTGRESQL' found using 'type' does not match " f"any of the expected tags: {ALLOWED_SOURCES}", "type": "union_tag_invalid", - "url": "https://errors.pydantic.dev/2.7/v/union_tag_invalid", } ] } diff --git a/tests/test_unit/test_connections/test_update_connection.py b/tests/test_unit/test_connections/test_update_connection.py index 1135b62f..bad6fd59 100644 --- a/tests/test_unit/test_connections/test_update_connection.py +++ b/tests/test_unit/test_connections/test_update_connection.py @@ -87,7 +87,6 @@ async def test_check_name_field_validation_on_update_connection( "loc": ["body", "name"], "msg": "String should have at least 1 character", "type": "string_too_short", - "url": "https://errors.pydantic.dev/2.7/v/string_too_short", } ] } @@ -175,7 +174,6 @@ async def test_update_connection_data_fields( "loc": ["body", "connection_data"], "msg": "Unable to extract tag using discriminator 'type'", "type": "union_tag_not_found", - "url": "https://errors.pydantic.dev/2.7/v/union_tag_not_found", } ] } diff --git a/tests/test_unit/test_groups/test_add_user_to_group.py b/tests/test_unit/test_groups/test_add_user_to_group.py index b7ff3d5f..3ee7e7ee 100644 --- a/tests/test_unit/test_groups/test_add_user_to_group.py +++ b/tests/test_unit/test_groups/test_add_user_to_group.py @@ -130,7 +130,6 @@ async def test_owner_cannot_add_user_to_group_with_wrong_role( "loc": ["body", "role"], "msg": "Input should be 'Maintainer', 'Developer' or 'Guest'", "type": "enum", - "url": "https://errors.pydantic.dev/2.7/v/enum", } ] } @@ -159,7 +158,6 @@ async def test_owner_cannot_add_user_to_group_without_role( "loc": ["body"], "msg": "Field required", "type": "missing", - "url": "https://errors.pydantic.dev/2.7/v/missing", } ] } diff --git a/tests/test_unit/test_groups/test_update_group_by_id.py b/tests/test_unit/test_groups/test_update_group_by_id.py index 44156fe0..bb62724b 100644 --- a/tests/test_unit/test_groups/test_update_group_by_id.py +++ b/tests/test_unit/test_groups/test_update_group_by_id.py @@ -128,7 +128,6 @@ async def test_validation_on_update_group( "loc": ["body"], "msg": "Field required", "type": "missing", - "url": "https://errors.pydantic.dev/2.7/v/missing", } ] } diff --git a/tests/test_unit/test_groups/test_update_member_role.py b/tests/test_unit/test_groups/test_update_member_role.py index 82ab297d..4c6cd9cb 100644 --- a/tests/test_unit/test_groups/test_update_member_role.py +++ b/tests/test_unit/test_groups/test_update_member_role.py @@ -78,7 +78,6 @@ async def test_owner_of_group_can_not_update_user_role_with_wrong_role( "loc": ["body", "role"], "msg": "Input should be 'Maintainer', 'Developer' or 'Guest'", "type": "enum", - "url": "https://errors.pydantic.dev/2.7/v/enum", } ] } diff --git a/tests/test_unit/test_queue/test_create_queue.py b/tests/test_unit/test_queue/test_create_queue.py index dcbe3f14..bee5bdc4 100644 --- a/tests/test_unit/test_queue/test_create_queue.py +++ b/tests/test_unit/test_queue/test_create_queue.py @@ -229,7 +229,6 @@ async def test_superuser_cannot_create_queue_with_unknown_group_error( "loc": ["body", "name"], "msg": "String should match pattern '^[-_a-zA-Z0-9]+$'", "type": "string_pattern_mismatch", - "url": "https://errors.pydantic.dev/2.7/v/string_pattern_mismatch", } ] }, @@ -244,7 +243,6 @@ async def test_superuser_cannot_create_queue_with_unknown_group_error( "loc": ["body", "name"], "msg": "String should match pattern '^[-_a-zA-Z0-9]+$'", "type": "string_pattern_mismatch", - "url": "https://errors.pydantic.dev/2.7/v/string_pattern_mismatch", } ] }, @@ -259,7 +257,6 @@ async def test_superuser_cannot_create_queue_with_unknown_group_error( "loc": ["body", "name"], "msg": "String should match pattern '^[-_a-zA-Z0-9]+$'", "type": "string_pattern_mismatch", - "url": "https://errors.pydantic.dev/2.7/v/string_pattern_mismatch", } ] }, @@ -274,7 +271,6 @@ async def test_superuser_cannot_create_queue_with_unknown_group_error( "msg": "String should have at most 128 characters", "input": 129 * "q", "ctx": {"max_length": 128}, - "url": "https://errors.pydantic.dev/2.7/v/string_too_long", } ] }, @@ -289,7 +285,6 @@ async def test_superuser_cannot_create_queue_with_unknown_group_error( "loc": ["body", "name"], "msg": "String should match pattern '^[-_a-zA-Z0-9]+$'", "type": "string_pattern_mismatch", - "url": "https://errors.pydantic.dev/2.7/v/string_pattern_mismatch", } ] }, @@ -303,7 +298,6 @@ async def test_superuser_cannot_create_queue_with_unknown_group_error( "loc": ["body", "name"], "msg": "Input should be a valid string", "type": "string_type", - "url": "https://errors.pydantic.dev/2.7/v/string_type", } ] }, diff --git a/tests/test_unit/test_transfers/test_copy_transfer.py b/tests/test_unit/test_transfers/test_copy_transfer.py index 6f987e0f..0162f803 100644 --- a/tests/test_unit/test_transfers/test_copy_transfer.py +++ b/tests/test_unit/test_transfers/test_copy_transfer.py @@ -392,7 +392,6 @@ async def test_check_validate_copy_transfer_parameter_new_name( "loc": ["body", "new_name"], "msg": "String should have at least 1 character", "type": "string_too_short", - "url": "https://errors.pydantic.dev/2.7/v/string_too_short", } ] } diff --git a/tests/test_unit/test_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_create_transfer.py index 866ed7f0..8121fe76 100644 --- a/tests/test_unit/test_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_create_transfer.py @@ -245,7 +245,6 @@ async def test_superuser_can_create_transfer( "loc": ["body", "name"], "msg": "String should have at least 1 character", "type": "string_too_short", - "url": "https://errors.pydantic.dev/2.7/v/string_too_short", }, ), ( @@ -255,7 +254,6 @@ async def test_superuser_can_create_transfer( "loc": ["body", "name"], "msg": "Input should be a valid string", "type": "string_type", - "url": "https://errors.pydantic.dev/2.7/v/string_type", }, ), ( @@ -265,7 +263,6 @@ async def test_superuser_can_create_transfer( "loc": ["body", "is_scheduled"], "msg": "Input should be a valid boolean, unable to interpret input", "type": "bool_parsing", - "url": "https://errors.pydantic.dev/2.7/v/bool_parsing", }, ), ( @@ -288,7 +285,6 @@ async def test_superuser_can_create_transfer( "loc": ["body"], "msg": "Value error, If transfer must be scheduled than set schedule param", "type": "value_error", - "url": "https://errors.pydantic.dev/2.7/v/value_error", }, ), ( @@ -307,7 +303,6 @@ async def test_superuser_can_create_transfer( "does not match any of the expected tags: 'full', " "'incremental'", "type": "union_tag_invalid", - "url": "https://errors.pydantic.dev/2.7/v/union_tag_invalid", }, ), ( @@ -329,7 +324,6 @@ async def test_superuser_can_create_transfer( "does not match any of the expected tags: 'postgres', " "'hdfs', 'hive', 'oracle', 's3'", "type": "union_tag_invalid", - "url": "https://errors.pydantic.dev/2.7/v/union_tag_invalid", }, ), ), @@ -572,7 +566,6 @@ async def test_developer_plus_can_not_create_transfer_with_target_format_json( "msg": "Input tag 'json' found using 'type' does not match any of the expected tags: 'csv', 'jsonline'", "input": {"type": "json", "lineSep": "\n", "encoding": "utf-8"}, "ctx": {"discriminator": "'type'", "tag": "json", "expected_tags": "'csv', 'jsonline'"}, - "url": "https://errors.pydantic.dev/2.7/v/union_tag_invalid", } ] } diff --git a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py index b52cacc0..9d4fc945 100644 --- a/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py +++ b/tests/test_unit/test_transfers/test_file_transfers/test_create_transfer.py @@ -254,7 +254,6 @@ async def test_cannot_create_file_transfer_with_relative_path( "loc": ["body", "source_params", "s3", "directory_path"], "msg": "Value error, Directory path must be absolute", "type": "value_error", - "url": "https://errors.pydantic.dev/2.7/v/value_error", }, { "ctx": {"error": {}}, @@ -262,7 +261,6 @@ async def test_cannot_create_file_transfer_with_relative_path( "loc": ["body", "target_params", "s3", "directory_path"], "msg": "Value error, Directory path must be absolute", "type": "value_error", - "url": "https://errors.pydantic.dev/2.7/v/value_error", }, ], } diff --git a/tests/test_unit/test_transfers/test_update_transfer.py b/tests/test_unit/test_transfers/test_update_transfer.py index 61e6a317..034b9f39 100644 --- a/tests/test_unit/test_transfers/test_update_transfer.py +++ b/tests/test_unit/test_transfers/test_update_transfer.py @@ -140,7 +140,6 @@ async def test_check_name_field_validation_on_update_transfer( "loc": ["body", "name"], "msg": "String should have at least 1 character", "type": "string_too_short", - "url": "https://errors.pydantic.dev/2.7/v/string_too_short", } ] } diff --git a/tests/test_unit/test_users.py b/tests/test_unit/test_users.py index 6d0cdc5f..07c24ff4 100644 --- a/tests/test_unit/test_users.py +++ b/tests/test_unit/test_users.py @@ -152,7 +152,6 @@ async def test_update_user( "loc": ["body", "username"], "msg": "String should match pattern '^[_a-z0-9]+$'", "type": "string_pattern_mismatch", - "url": "https://errors.pydantic.dev/2.7/v/string_pattern_mismatch", } ] } From e27ad39ebd9df8b482ae78fe652de6c6f8891790 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Tue, 16 Apr 2024 13:50:36 +0300 Subject: [PATCH 17/18] [DOP-15023] Pass Run to CREATE_SPARK_SESSION_FUNCTION --- docker/Dockerfile.worker | 2 +- docs/changelog/next_release/38.breaking.rst | 2 + syncmaster/dto/connections.py | 13 ++-- syncmaster/dto/transfers.py | 64 +++++++++------ syncmaster/worker/controller.py | 50 ++++++------ syncmaster/worker/handlers/base.py | 46 ++++------- syncmaster/worker/handlers/db/__init__.py | 2 + syncmaster/worker/handlers/db/base.py | 38 +++++++++ syncmaster/worker/handlers/db/hive.py | 37 +++++++++ syncmaster/worker/handlers/{ => db}/oracle.py | 37 ++++----- .../worker/handlers/{ => db}/postgres.py | 37 ++++----- syncmaster/worker/handlers/file/base.py | 49 +++++------- syncmaster/worker/handlers/file/hdfs.py | 14 +++- syncmaster/worker/handlers/file/s3.py | 15 +++- syncmaster/worker/handlers/hive.py | 41 ---------- syncmaster/worker/spark.py | 21 +++-- syncmaster/worker/transfer.py | 30 +++---- tests/spark/__init__.py | 55 +++++++++++++ tests/spark/get_worker_spark_session.py | 78 ------------------- .../test_run_transfer/conftest.py | 5 -- .../test_run_transfer/test_oracle.py | 2 +- 21 files changed, 321 insertions(+), 317 deletions(-) create mode 100644 docs/changelog/next_release/38.breaking.rst create mode 100644 syncmaster/worker/handlers/db/__init__.py create mode 100644 syncmaster/worker/handlers/db/base.py create mode 100644 syncmaster/worker/handlers/db/hive.py rename syncmaster/worker/handlers/{ => db}/oracle.py (52%) rename syncmaster/worker/handlers/{ => db}/postgres.py (51%) delete mode 100644 syncmaster/worker/handlers/hive.py delete mode 100644 tests/spark/get_worker_spark_session.py diff --git a/docker/Dockerfile.worker b/docker/Dockerfile.worker index 7b32bfa8..84830b65 100644 --- a/docker/Dockerfile.worker +++ b/docker/Dockerfile.worker @@ -39,7 +39,7 @@ COPY ./syncmaster/ /app/syncmaster/ FROM base as test -ENV CREATE_SPARK_SESSION_FUNCTION=tests.spark.get_worker_spark_session.get_worker_spark_session +ENV CREATE_SPARK_SESSION_FUNCTION=tests.spark.get_worker_spark_session # CI runs tests in the worker container, so we need backend dependencies too RUN poetry install --no-root --extras "worker backend" --with test --without docs,dev diff --git a/docs/changelog/next_release/38.breaking.rst b/docs/changelog/next_release/38.breaking.rst new file mode 100644 index 00000000..ecbf00c7 --- /dev/null +++ b/docs/changelog/next_release/38.breaking.rst @@ -0,0 +1,2 @@ +Pass current ``Run`` to ``CREATE_SPARK_SESSION_FUNCTION``. This allows using run/transfer/group information for Spark session options, +like ``appName`` or custom ones. diff --git a/syncmaster/dto/connections.py b/syncmaster/dto/connections.py index 91877325..46f369b8 100644 --- a/syncmaster/dto/connections.py +++ b/syncmaster/dto/connections.py @@ -1,11 +1,12 @@ # SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) # SPDX-License-Identifier: Apache-2.0 from dataclasses import dataclass +from typing import ClassVar @dataclass class ConnectionDTO: - pass + type: ClassVar[str] @dataclass @@ -16,7 +17,7 @@ class PostgresConnectionDTO(ConnectionDTO): password: str additional_params: dict database_name: str - type: str = "postgres" + type: ClassVar[str] = "postgres" @dataclass @@ -28,7 +29,7 @@ class OracleConnectionDTO(ConnectionDTO): additional_params: dict sid: str | None = None service_name: str | None = None - type: str = "oracle" + type: ClassVar[str] = "oracle" @dataclass @@ -36,7 +37,7 @@ class HiveConnectionDTO(ConnectionDTO): user: str password: str cluster: str - type: str = "hive" + type: ClassVar[str] = "hive" @dataclass @@ -44,7 +45,7 @@ class HDFSConnectionDTO(ConnectionDTO): user: str password: str cluster: str - type: str = "hdfs" + type: ClassVar[str] = "hdfs" @dataclass @@ -57,4 +58,4 @@ class S3ConnectionDTO(ConnectionDTO): additional_params: dict region: str | None = None protocol: str = "https" - type: str = "s3" + type: ClassVar[str] = "s3" diff --git a/syncmaster/dto/transfers.py b/syncmaster/dto/transfers.py index 44824811..50de56f0 100644 --- a/syncmaster/dto/transfers.py +++ b/syncmaster/dto/transfers.py @@ -1,46 +1,66 @@ # SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) # SPDX-License-Identifier: Apache-2.0 +import json from dataclasses import dataclass +from typing import ClassVar -from syncmaster.schemas.v1.transfers.file_format import CSV, JSON, JSONLine +from onetl.file.format import CSV, JSON, JSONLine @dataclass class TransferDTO: - pass + type: ClassVar[str] @dataclass -class PostgresTransferDTO(TransferDTO): +class DBTransferDTO(TransferDTO): table_name: str - type: str = "postgres" @dataclass -class OracleTransferDTO(TransferDTO): - table_name: str - type: str = "oracle" +class FileTransferDTO(TransferDTO): + directory_path: str + file_format: CSV | JSONLine | JSON + options: dict + df_schema: dict | None = None + + def __post_init__(self): + if isinstance(self.file_format, dict): + self.file_format = self._get_format(self.file_format.copy()) + if isinstance(self.df_schema, str): + self.df_schema = json.loads(self.df_schema) + + def _get_format(self, file_format: dict): + file_type = file_format.pop("type", None) + if file_type == "csv": + return CSV.parse_obj(file_format) + if file_type == "jsonline": + return JSONLine.parse_obj(file_format) + if file_type == "json": + return JSON.parse_obj(file_format) + raise ValueError("Unknown file type") @dataclass -class HiveTransferDTO(TransferDTO): - table_name: str - type: str = "hive" +class PostgresTransferDTO(DBTransferDTO): + type: ClassVar[str] = "postgres" @dataclass -class S3TransferDTO(TransferDTO): - directory_path: str - file_format: CSV | JSONLine | JSON - options: dict - df_schema: dict | None = None - type: str = "s3" +class OracleTransferDTO(DBTransferDTO): + type: ClassVar[str] = "oracle" @dataclass -class HDFSTransferDTO(TransferDTO): - directory_path: str - file_format: CSV | JSONLine | JSON - options: dict - df_schema: dict | None = None - type: str = "hdfs" +class HiveTransferDTO(DBTransferDTO): + type: ClassVar[str] = "hive" + + +@dataclass +class S3TransferDTO(FileTransferDTO): + type: ClassVar[str] = "s3" + + +@dataclass +class HDFSTransferDTO(FileTransferDTO): + type: ClassVar[str] = "hdfs" diff --git a/syncmaster/worker/controller.py b/syncmaster/worker/controller.py index 26408e63..72eca1b9 100644 --- a/syncmaster/worker/controller.py +++ b/syncmaster/worker/controller.py @@ -4,7 +4,7 @@ from typing import Any from syncmaster.config import Settings -from syncmaster.db.models import Connection, Transfer +from syncmaster.db.models import Connection, Run from syncmaster.dto.connections import ( HDFSConnectionDTO, HiveConnectionDTO, @@ -21,11 +21,11 @@ ) from syncmaster.exceptions.connection import ConnectionTypeNotRecognizedError from syncmaster.worker.handlers.base import Handler +from syncmaster.worker.handlers.db.hive import HiveHandler +from syncmaster.worker.handlers.db.oracle import OracleHandler +from syncmaster.worker.handlers.db.postgres import PostgresHandler from syncmaster.worker.handlers.file.hdfs import HDFSHandler from syncmaster.worker.handlers.file.s3 import S3Handler -from syncmaster.worker.handlers.hive import HiveHandler -from syncmaster.worker.handlers.oracle import OracleHandler -from syncmaster.worker.handlers.postgres import PostgresHandler logger = logging.getLogger(__name__) @@ -65,47 +65,40 @@ class TransferController: def __init__( self, - transfer: Transfer, + run: Run, source_connection: Connection, source_auth_data: dict, target_connection: Connection, target_auth_data: dict, settings: Settings, ): + self.run = run + self.settings = settings self.source_handler = self.get_handler( connection_data=source_connection.data, - transfer_params=transfer.source_params, + transfer_params=run.transfer.source_params, connection_auth_data=source_auth_data, ) self.target_handler = self.get_handler( connection_data=target_connection.data, - transfer_params=transfer.target_params, + transfer_params=run.transfer.target_params, connection_auth_data=target_auth_data, ) - spark = settings.CREATE_SPARK_SESSION_FUNCTION( - settings, - target=self.target_handler.connection_dto, + + def perform_transfer(self) -> None: + spark = self.settings.CREATE_SPARK_SESSION_FUNCTION( + settings=self.settings, + run=self.run, source=self.source_handler.connection_dto, + target=self.target_handler.connection_dto, ) - self.source_handler.set_spark(spark) - self.target_handler.set_spark(spark) - logger.info("source connection = %s", self.source_handler) - logger.info("target connection = %s", self.target_handler) - - def start_transfer(self) -> None: - self.source_handler.init_connection() - self.source_handler.init_reader() - - self.target_handler.init_connection() - self.target_handler.init_writer() - logger.info("Source and target were initialized") - - df = self.target_handler.normalize_column_name(self.source_handler.read()) - logger.info("Data has been read") + with spark: + self.source_handler.connect(spark) + self.target_handler.connect(spark) - self.target_handler.write(df) - logger.info("Data has been inserted") + df = self.source_handler.read() + self.target_handler.write(df) def get_handler( self, @@ -114,7 +107,8 @@ def get_handler( transfer_params: dict[str, Any], ) -> Handler: connection_data.update(connection_auth_data) - handler_type = connection_data["type"] + handler_type = connection_data.pop("type") + transfer_params.pop("type", None) if connection_handler_proxy.get(handler_type, None) is None: raise ConnectionTypeNotRecognizedError diff --git a/syncmaster/worker/handlers/base.py b/syncmaster/worker/handlers/base.py index e6a4fe98..0d20a607 100644 --- a/syncmaster/worker/handlers/base.py +++ b/syncmaster/worker/handlers/base.py @@ -1,49 +1,33 @@ # SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) # SPDX-License-Identifier: Apache-2.0 -from abc import ABC -from onetl.db import DBReader, DBWriter -from pyspark.sql import SparkSession -from pyspark.sql.dataframe import DataFrame +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING from syncmaster.dto.connections import ConnectionDTO from syncmaster.dto.transfers import TransferDTO +if TYPE_CHECKING: + from pyspark.sql import SparkSession + from pyspark.sql.dataframe import DataFrame + class Handler(ABC): def __init__( self, connection_dto: ConnectionDTO, transfer_dto: TransferDTO, - spark: SparkSession | None = None, - ) -> None: - self.spark = spark - self.reader: DBReader | None = None - self.writer: DBWriter | None = None + ): self.connection_dto = connection_dto self.transfer_dto = transfer_dto - def init_connection(self): ... - - def set_spark(self, spark: SparkSession): - self.spark = spark - - def init_reader(self): - if self.connection_dto is None: - raise ValueError("At first you need to initialize connection. Run `init_connection") - - def init_writer(self): - if self.connection_dto is None: - raise ValueError("At first you need to initialize connection. Run `init_connection") - - def read(self) -> DataFrame: - if self.reader is None: - raise ValueError("Reader is not initialized") - return self.reader.run() + @abstractmethod + def connect(self, spark: SparkSession) -> None: ... - def write(self, df: DataFrame) -> None: - if self.writer is None: - raise ValueError("Writer is not initialized") - return self.writer.run(df=df) + @abstractmethod + def read(self) -> DataFrame: ... - def normalize_column_name(self, df: DataFrame) -> DataFrame: ... + @abstractmethod + def write(self, df: DataFrame) -> None: ... diff --git a/syncmaster/worker/handlers/db/__init__.py b/syncmaster/worker/handlers/db/__init__.py new file mode 100644 index 00000000..104aecaf --- /dev/null +++ b/syncmaster/worker/handlers/db/__init__.py @@ -0,0 +1,2 @@ +# SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) +# SPDX-License-Identifier: Apache-2.0 diff --git a/syncmaster/worker/handlers/db/base.py b/syncmaster/worker/handlers/db/base.py new file mode 100644 index 00000000..240c665c --- /dev/null +++ b/syncmaster/worker/handlers/db/base.py @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from abc import abstractmethod +from typing import TYPE_CHECKING + +from onetl.base import BaseDBConnection +from onetl.db import DBReader, DBWriter + +from syncmaster.dto.transfers import DBTransferDTO +from syncmaster.worker.handlers.base import Handler + +if TYPE_CHECKING: + from pyspark.sql.dataframe import DataFrame + + +class DBHandler(Handler): + connection: BaseDBConnection + transfer_dto: DBTransferDTO + + def read(self) -> DataFrame: + reader = DBReader( + connection=self.connection, + table=self.transfer_dto.table_name, + ) + return reader.run() + + def write(self, df: DataFrame) -> None: + writer = DBWriter( + connection=self.connection, + table=self.transfer_dto.table_name, + ) + return writer.run(df=self.normalize_column_names(df)) + + @abstractmethod + def normalize_column_names(self, df: DataFrame) -> DataFrame: ... diff --git a/syncmaster/worker/handlers/db/hive.py b/syncmaster/worker/handlers/db/hive.py new file mode 100644 index 00000000..3c1291a7 --- /dev/null +++ b/syncmaster/worker/handlers/db/hive.py @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from onetl.connection import Hive + +from syncmaster.dto.connections import HiveConnectionDTO +from syncmaster.dto.transfers import HiveTransferDTO +from syncmaster.worker.handlers.db.base import DBHandler + +if TYPE_CHECKING: + from pyspark.sql import SparkSession + from pyspark.sql.dataframe import DataFrame + + +class HiveHandler(DBHandler): + connection: Hive + connection_dto: HiveConnectionDTO + transfer_dto: HiveTransferDTO + + def connect(self, spark: SparkSession): + self.connection = Hive( + cluster=self.connection_dto.cluster, + spark=spark, + ).check() + + def read(self) -> DataFrame: + self.connection.spark.catalog.refreshTable(self.transfer_dto.table_name) + return super().read() + + def normalize_column_names(self, df: DataFrame) -> DataFrame: + for column_name in df.columns: + df = df.withColumnRenamed(column_name, column_name.lower()) + return df diff --git a/syncmaster/worker/handlers/oracle.py b/syncmaster/worker/handlers/db/oracle.py similarity index 52% rename from syncmaster/worker/handlers/oracle.py rename to syncmaster/worker/handlers/db/oracle.py index f8a79ff9..70fc81c1 100644 --- a/syncmaster/worker/handlers/oracle.py +++ b/syncmaster/worker/handlers/db/oracle.py @@ -1,20 +1,27 @@ # SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) # SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from typing import TYPE_CHECKING + from onetl.connection import Oracle -from onetl.db import DBReader, DBWriter -from pyspark.sql.dataframe import DataFrame from syncmaster.dto.connections import OracleConnectionDTO from syncmaster.dto.transfers import OracleTransferDTO -from syncmaster.worker.handlers.base import Handler +from syncmaster.worker.handlers.db.base import DBHandler + +if TYPE_CHECKING: + from pyspark.sql import SparkSession + from pyspark.sql.dataframe import DataFrame -class OracleHandler(Handler): +class OracleHandler(DBHandler): connection: Oracle connection_dto: OracleConnectionDTO transfer_dto: OracleTransferDTO - def init_connection(self): + def connect(self, spark: SparkSession): self.connection = Oracle( host=self.connection_dto.host, port=self.connection_dto.port, @@ -23,26 +30,10 @@ def init_connection(self): sid=self.connection_dto.sid, service_name=self.connection_dto.service_name, extra=self.connection_dto.additional_params, - spark=self.spark, + spark=spark, ).check() - def init_reader(self): - super().init_reader() - df = self.connection.get_df_schema(self.transfer_dto.table_name) - self.reader = DBReader( - connection=self.connection, - table=self.transfer_dto.table_name, - columns=[f'"{f}"' for f in df.fieldNames()], - ) - - def init_writer(self): - super().init_writer() - self.writer = DBWriter( - connection=self.connection, - table=self.transfer_dto.table_name, - ) - - def normalize_column_name(self, df: DataFrame) -> DataFrame: + def normalize_column_names(self, df: DataFrame) -> DataFrame: for column_name in df.columns: df = df.withColumnRenamed(column_name, column_name.upper()) return df diff --git a/syncmaster/worker/handlers/postgres.py b/syncmaster/worker/handlers/db/postgres.py similarity index 51% rename from syncmaster/worker/handlers/postgres.py rename to syncmaster/worker/handlers/db/postgres.py index 25ddf337..1180337a 100644 --- a/syncmaster/worker/handlers/postgres.py +++ b/syncmaster/worker/handlers/db/postgres.py @@ -1,20 +1,27 @@ # SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) # SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from typing import TYPE_CHECKING + from onetl.connection import Postgres -from onetl.db import DBReader, DBWriter -from pyspark.sql.dataframe import DataFrame from syncmaster.dto.connections import PostgresConnectionDTO from syncmaster.dto.transfers import PostgresTransferDTO -from syncmaster.worker.handlers.base import Handler +from syncmaster.worker.handlers.db.base import DBHandler + +if TYPE_CHECKING: + from pyspark.sql import SparkSession + from pyspark.sql.dataframe import DataFrame -class PostgresHandler(Handler): +class PostgresHandler(DBHandler): connection: Postgres connection_dto: PostgresConnectionDTO transfer_dto: PostgresTransferDTO - def init_connection(self): + def connect(self, spark: SparkSession): self.connection = Postgres( host=self.connection_dto.host, user=self.connection_dto.user, @@ -22,26 +29,10 @@ def init_connection(self): port=self.connection_dto.port, database=self.connection_dto.database_name, extra=self.connection_dto.additional_params, - spark=self.spark, + spark=spark, ).check() - def init_reader(self): - super().init_reader() - df = self.connection.get_df_schema(self.transfer_dto.table_name) - self.reader = DBReader( - connection=self.connection, - table=self.transfer_dto.table_name, - columns=[f'"{f}"' for f in df.fieldNames()], - ) - - def init_writer(self): - super().init_writer() - self.writer = DBWriter( - connection=self.connection, - table=self.transfer_dto.table_name, - ) - - def normalize_column_name(self, df: DataFrame) -> DataFrame: + def normalize_column_names(self, df: DataFrame) -> DataFrame: for column_name in df.columns: df = df.withColumnRenamed(column_name, column_name.lower()) return df diff --git a/syncmaster/worker/handlers/file/base.py b/syncmaster/worker/handlers/file/base.py index d2656d0c..7b32443a 100644 --- a/syncmaster/worker/handlers/file/base.py +++ b/syncmaster/worker/handlers/file/base.py @@ -1,56 +1,45 @@ # SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) # SPDX-License-Identifier: Apache-2.0 -import json + +from __future__ import annotations + +from typing import TYPE_CHECKING from onetl.base.base_file_df_connection import BaseFileDFConnection from onetl.file import FileDFReader, FileDFWriter -from onetl.file.format import CSV, JSON, JSONLine -from pyspark.sql.dataframe import DataFrame -from pyspark.sql.types import StructType from syncmaster.dto.connections import ConnectionDTO -from syncmaster.dto.transfers import TransferDTO +from syncmaster.dto.transfers import FileTransferDTO from syncmaster.worker.handlers.base import Handler +if TYPE_CHECKING: + from pyspark.sql.dataframe import DataFrame + class FileHandler(Handler): connection: BaseFileDFConnection connection_dto: ConnectionDTO - transfer_dto: TransferDTO - - def init_connection(self): ... + transfer_dto: FileTransferDTO - def init_reader(self): - super().init_reader() + def read(self) -> DataFrame: + from pyspark.sql.types import StructType - self.reader = FileDFReader( + reader = FileDFReader( connection=self.connection, - format=self._get_format(), + format=self.transfer_dto.file_format, source_path=self.transfer_dto.directory_path, - df_schema=StructType.fromJson(json.loads(self.transfer_dto.df_schema)), + df_schema=StructType.fromJson(self.transfer_dto.df_schema) if self.transfer_dto.df_schema else None, options=self.transfer_dto.options, ) - def init_writer(self): - super().init_writer() + return reader.run() - self.writer = FileDFWriter( + def write(self, df: DataFrame): + writer = FileDFWriter( connection=self.connection, - format=self._get_format(), + format=self.transfer_dto.file_format, target_path=self.transfer_dto.directory_path, options=self.transfer_dto.options, ) - def normalize_column_name(self, df: DataFrame) -> DataFrame: - return df - - def _get_format(self): - file_type = self.transfer_dto.file_format["type"] - if file_type == "csv": - return CSV.parse_obj(self.transfer_dto.file_format) - elif file_type == "jsonline": - return JSONLine.parse_obj(self.transfer_dto.file_format) - elif file_type == "json": - return JSON.parse_obj(self.transfer_dto.file_format) - else: - raise ValueError("Unknown file type") + return writer.run(df=df) diff --git a/syncmaster/worker/handlers/file/hdfs.py b/syncmaster/worker/handlers/file/hdfs.py index ce0a7441..a80949da 100644 --- a/syncmaster/worker/handlers/file/hdfs.py +++ b/syncmaster/worker/handlers/file/hdfs.py @@ -1,14 +1,24 @@ # SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +from typing import TYPE_CHECKING + from onetl.connection import SparkHDFS +from syncmaster.dto.connections import HDFSConnectionDTO from syncmaster.worker.handlers.file.base import FileHandler +if TYPE_CHECKING: + from pyspark.sql import SparkSession + class HDFSHandler(FileHandler): - def init_connection(self): + connection_dto: HDFSConnectionDTO + + def connect(self, spark: SparkSession): self.connection = SparkHDFS( cluster=self.connection_dto.cluster, - spark=self.spark, + spark=spark, ).check() diff --git a/syncmaster/worker/handlers/file/s3.py b/syncmaster/worker/handlers/file/s3.py index 69082541..b3c085e1 100644 --- a/syncmaster/worker/handlers/file/s3.py +++ b/syncmaster/worker/handlers/file/s3.py @@ -1,12 +1,23 @@ # SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) # SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from typing import TYPE_CHECKING + from onetl.connection import SparkS3 +from syncmaster.dto.connections import S3ConnectionDTO from syncmaster.worker.handlers.file.base import FileHandler +if TYPE_CHECKING: + from pyspark.sql import SparkSession + class S3Handler(FileHandler): - def init_connection(self): + connection_dto: S3ConnectionDTO + + def connect(self, spark: SparkSession): self.connection = SparkS3( host=self.connection_dto.host, port=self.connection_dto.port, @@ -16,5 +27,5 @@ def init_connection(self): protocol=self.connection_dto.protocol, region=self.connection_dto.region, extra=self.connection_dto.additional_params, - spark=self.spark, + spark=spark, ).check() diff --git a/syncmaster/worker/handlers/hive.py b/syncmaster/worker/handlers/hive.py deleted file mode 100644 index 3646e1c2..00000000 --- a/syncmaster/worker/handlers/hive.py +++ /dev/null @@ -1,41 +0,0 @@ -# SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) -# SPDX-License-Identifier: Apache-2.0 -from onetl.connection import Hive -from onetl.db import DBReader, DBWriter -from pyspark.sql.dataframe import DataFrame - -from syncmaster.dto.connections import HiveConnectionDTO -from syncmaster.dto.transfers import HiveTransferDTO -from syncmaster.worker.handlers.base import Handler - - -class HiveHandler(Handler): - connection: Hive - connection_dto: HiveConnectionDTO - transfer_dto: HiveTransferDTO - - def init_connection(self): - self.connection = Hive( - cluster=self.connection_dto.cluster, - spark=self.spark, - ).check() - - def init_reader(self): - super().init_reader() - self.spark.catalog.refreshTable(self.transfer_dto.table_name) - self.reader = DBReader( - connection=self.connection, - table=self.transfer_dto.table_name, - ) - - def init_writer(self): - super().init_writer() - self.writer = DBWriter( - connection=self.connection, - table=self.transfer_dto.table_name, - ) - - def normalize_column_name(self, df: DataFrame) -> DataFrame: - for column_name in df.columns: - df = df.withColumnRenamed(column_name, column_name.lower()) - return df diff --git a/syncmaster/worker/spark.py b/syncmaster/worker/spark.py index ff9688dc..b4882f57 100644 --- a/syncmaster/worker/spark.py +++ b/syncmaster/worker/spark.py @@ -1,24 +1,33 @@ # SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + import logging +from typing import TYPE_CHECKING -import pyspark from onetl.connection import Oracle, Postgres, SparkS3 -from pyspark.sql import SparkSession from syncmaster.config import Settings +from syncmaster.db.models import Run from syncmaster.dto.connections import ConnectionDTO +if TYPE_CHECKING: + from pyspark.sql import SparkSession + log = logging.getLogger(__name__) def get_worker_spark_session( - settings: Settings, # used in test spark session definition + settings: Settings, + run: Run, source: ConnectionDTO, target: ConnectionDTO, ) -> SparkSession: - """Through the source and target parameters you can get credentials for authorization at the source""" - spark_builder = SparkSession.builder.appName("celery_worker") + """Construct Spark Session using run parameters and application settings""" + from pyspark.sql import SparkSession + + name = run.transfer.group.name + "_" + run.transfer.name + spark_builder = SparkSession.builder.appName(f"syncmaster_{name}") for k, v in get_spark_session_conf(source, target).items(): spark_builder = spark_builder.config(k, v) @@ -36,6 +45,8 @@ def get_packages(db_type: str) -> list[str]: if db_type == "oracle": return Oracle.get_packages() if db_type == "s3": + import pyspark + spark_version = pyspark.__version__ return SparkS3.get_packages(spark_version=spark_version) diff --git a/syncmaster/worker/transfer.py b/syncmaster/worker/transfer.py index e243a2ae..25c8e667 100644 --- a/syncmaster/worker/transfer.py +++ b/syncmaster/worker/transfer.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023-2024 MTS (Mobile Telesystems) # SPDX-License-Identifier: Apache-2.0 import logging -from datetime import datetime +from datetime import datetime, timezone import onetl from sqlalchemy import select @@ -21,9 +21,6 @@ @celery.task(name="run_transfer_task", bind=True, track_started=True) def run_transfer_task(self: WorkerTask, run_id: int) -> None: onetl.log.setup_logging(level=logging.INFO) - """Task for make transfer data""" - logger.info("Before spark initializing") - logger.info("Spark initialized") with Session(self.engine) as session: run_transfer( session=session, @@ -33,24 +30,25 @@ def run_transfer_task(self: WorkerTask, run_id: int) -> None: def run_transfer(session: Session, run_id: int, settings: Settings): - logger.info("Start transfering") + logger.info("Start transfer") run = session.get( Run, run_id, options=( + selectinload(Run.transfer), + selectinload(Run.transfer).selectinload(Transfer.group), selectinload(Run.transfer).selectinload(Transfer.source_connection), selectinload(Run.transfer).selectinload(Transfer.target_connection), ), ) if run is None: raise RunNotFoundError + run.status = Status.STARTED - run.started_at = datetime.utcnow() + run.started_at = datetime.now(tz=timezone.utc) session.add(run) session.commit() - controller = None - q_source_auth_data = select(AuthData).where(AuthData.connection_id == run.transfer.source_connection.id) q_target_auth_data = select(AuthData).where(AuthData.connection_id == run.transfer.target_connection.id) @@ -59,27 +57,21 @@ def run_transfer(session: Session, run_id: int, settings: Settings): try: controller = TransferController( - transfer=run.transfer, + run=run, source_connection=run.transfer.source_connection, target_connection=run.transfer.target_connection, source_auth_data=source_auth_data, target_auth_data=target_auth_data, settings=settings, ) - controller.start_transfer() + controller.perform_transfer() except Exception: run.status = Status.FAILED - logger.exception("Run `%s` was failed", run.id) + logger.exception("Run %r was failed", run.id) else: run.status = Status.FINISHED - logger.warning("Run `%s` was successful", run.id) - finally: - # Both the source and the receiver use the same spark session, - # so it is enough to stop the session at the source. - if controller is not None and controller.source_handler.spark is not None: - controller.source_handler.spark.sparkContext.stop() - controller.source_handler.spark.stop() + logger.warning("Run %r was successful", run.id) - run.ended_at = datetime.utcnow() + run.ended_at = datetime.now(tz=timezone.utc) session.add(run) session.commit() diff --git a/tests/spark/__init__.py b/tests/spark/__init__.py index e69de29b..293b7131 100644 --- a/tests/spark/__init__.py +++ b/tests/spark/__init__.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +from celery.signals import worker_process_init, worker_process_shutdown +from coverage import Coverage +from onetl.connection import SparkHDFS +from onetl.hooks import hook + +from syncmaster.worker.spark import get_worker_spark_session + +# this is just to automatically import hooks +get_worker_spark_session = get_worker_spark_session + + +@SparkHDFS.Slots.get_cluster_namenodes.bind +@hook +def get_cluster_namenodes(cluster: str) -> set[str] | None: + if cluster == "test-hive": + return {"test-hive"} + return None + + +@SparkHDFS.Slots.is_namenode_active.bind +@hook +def is_namenode_active(host: str, cluster: str) -> bool: + if cluster == "test-hive": + return True + return False + + +@SparkHDFS.Slots.get_ipc_port.bind +@hook +def get_ipc_port(cluster: str) -> int | None: + if cluster == "test-hive": + return 9820 + return None + + +# Needed to collect code coverage by tests in the worker +# https://github.com/nedbat/coveragepy/issues/689#issuecomment-656706935 +COV = None + + +@worker_process_init.connect +def start_coverage(**kwargs): + global COV + + COV = Coverage(data_suffix=True) + COV.start() + + +@worker_process_shutdown.connect +def save_coverage(**kwargs): + if COV is not None: + COV.stop() + COV.save() diff --git a/tests/spark/get_worker_spark_session.py b/tests/spark/get_worker_spark_session.py deleted file mode 100644 index 8c40d97a..00000000 --- a/tests/spark/get_worker_spark_session.py +++ /dev/null @@ -1,78 +0,0 @@ -from __future__ import annotations - -import logging - -from celery.signals import worker_process_init, worker_process_shutdown -from coverage import Coverage -from onetl.connection import SparkHDFS -from onetl.hooks import hook -from pyspark.sql import SparkSession - -from syncmaster.config import Settings -from syncmaster.dto.connections import ConnectionDTO -from syncmaster.worker.spark import get_spark_session_conf - -log = logging.getLogger(__name__) - - -@SparkHDFS.Slots.get_cluster_namenodes.bind -@hook -def get_cluster_namenodes(cluster: str) -> set[str] | None: - if cluster == "test-hive": - return {"test-hive"} - return None - - -@SparkHDFS.Slots.is_namenode_active.bind -@hook -def is_namenode_active(host: str, cluster: str) -> bool: - if cluster == "test-hive": - return True - return False - - -@SparkHDFS.Slots.get_ipc_port.bind -@hook -def get_ipc_port(cluster: str) -> int | None: - if cluster == "test-hive": - return 9820 - return None - - -def get_worker_spark_session( - settings: Settings, - source: ConnectionDTO, - target: ConnectionDTO, -) -> SparkSession: - spark_builder = SparkSession.builder.appName("celery_worker") - - for k, v in get_spark_session_conf(source, target).items(): - spark_builder = spark_builder.config(k, v) - - if source.type == "hive" or target.type == "hive": - log.debug("Enabling Hive support") - spark_builder = spark_builder.enableHiveSupport() - - return spark_builder.getOrCreate() - - -# Needed to collect code coverage by tests in the worker -# https://github.com/nedbat/coveragepy/issues/689#issuecomment-656706935 - - -COV = None - - -@worker_process_init.connect -def start_coverage(**kwargs): - global COV - - COV = Coverage(data_suffix=True) - COV.start() - - -@worker_process_shutdown.connect -def save_coverage(**kwargs): - if COV is not None: - COV.stop() - COV.save() diff --git a/tests/test_integration/test_run_transfer/conftest.py b/tests/test_integration/test_run_transfer/conftest.py index 58f6fcf7..8233d102 100644 --- a/tests/test_integration/test_run_transfer/conftest.py +++ b/tests/test_integration/test_run_transfer/conftest.py @@ -98,7 +98,6 @@ def get_spark_session(connection_settings: Settings) -> SparkSession: ) def hive(test_settings: TestSettings) -> HiveConnectionDTO: return HiveConnectionDTO( - type="hive", cluster=test_settings.TEST_HIVE_CLUSTER, user=test_settings.TEST_HIVE_USER, password=test_settings.TEST_HIVE_PASSWORD, @@ -111,7 +110,6 @@ def hive(test_settings: TestSettings) -> HiveConnectionDTO: ) def hdfs(test_settings: TestSettings) -> HDFSConnectionDTO: return HDFSConnectionDTO( - type="hdfs", cluster=test_settings.TEST_HIVE_CLUSTER, user=test_settings.TEST_HIVE_USER, password=test_settings.TEST_HIVE_PASSWORD, @@ -124,7 +122,6 @@ def hdfs(test_settings: TestSettings) -> HDFSConnectionDTO: ) def oracle(test_settings: TestSettings) -> OracleConnectionDTO: return OracleConnectionDTO( - type="oracle", host=test_settings.TEST_ORACLE_HOST, port=test_settings.TEST_ORACLE_PORT, user=test_settings.TEST_ORACLE_USER, @@ -141,7 +138,6 @@ def oracle(test_settings: TestSettings) -> OracleConnectionDTO: ) def postgres(test_settings: TestSettings) -> PostgresConnectionDTO: return PostgresConnectionDTO( - type="postgres", host=test_settings.TEST_POSTGRES_HOST, port=test_settings.TEST_POSTGRES_PORT, user=test_settings.TEST_POSTGRES_USER, @@ -157,7 +153,6 @@ def postgres(test_settings: TestSettings) -> PostgresConnectionDTO: ) def s3(test_settings: TestSettings) -> S3ConnectionDTO: return S3ConnectionDTO( - type="s3", host=test_settings.TEST_S3_HOST, port=test_settings.TEST_S3_PORT, bucket=test_settings.TEST_S3_BUCKET, diff --git a/tests/test_integration/test_run_transfer/test_oracle.py b/tests/test_integration/test_run_transfer/test_oracle.py index 21ebea5f..fe2e5814 100644 --- a/tests/test_integration/test_run_transfer/test_oracle.py +++ b/tests/test_integration/test_run_transfer/test_oracle.py @@ -143,12 +143,12 @@ async def test_run_transfer_postgres_to_oracle_mixed_naming( token=group_owner.token, ) assert run_data["status"] == Status.FINISHED.value + reader = DBReader( connection=oracle, table=f"{oracle.user}.target_table", ) df = reader.run() - assert df.columns != init_df_with_mixed_column_naming.columns assert df.columns == [column.upper() for column in init_df_with_mixed_column_naming.columns] From 2695bf6f2f5b029f5e023d5f7d95bbf9be41979c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?= =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?= =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= Date: Mon, 22 Apr 2024 13:34:53 +0000 Subject: [PATCH 18/18] [DOP-15023] Prepare for release --- docs/changelog/0.1.5.rst | 23 +++++++++++++++++++ docs/changelog/index.rst | 1 + docs/changelog/next_release/38.breaking.rst | 2 -- docs/changelog/next_release/39.bugfix.rst | 1 - docs/changelog/next_release/40.bugfix.rst | 1 - docs/changelog/next_release/41.bugfix.rst | 1 - .../changelog/next_release/44.improvement.rst | 1 - syncmaster/worker/spark.py | 4 ++-- 8 files changed, 26 insertions(+), 8 deletions(-) create mode 100644 docs/changelog/0.1.5.rst delete mode 100644 docs/changelog/next_release/38.breaking.rst delete mode 100644 docs/changelog/next_release/39.bugfix.rst delete mode 100644 docs/changelog/next_release/40.bugfix.rst delete mode 100644 docs/changelog/next_release/41.bugfix.rst delete mode 100644 docs/changelog/next_release/44.improvement.rst diff --git a/docs/changelog/0.1.5.rst b/docs/changelog/0.1.5.rst new file mode 100644 index 00000000..581b5ff9 --- /dev/null +++ b/docs/changelog/0.1.5.rst @@ -0,0 +1,23 @@ +0.1.5 (2024-04-22) +================== + +Breaking Changes +---------------- + +- Pass current ``Run`` to ``CREATE_SPARK_SESSION_FUNCTION``. This allows using run/transfer/group information for Spark session options, + like ``appName`` or custom ones. (:issue:`38`) + + +Improvements +------------ + +- Reduce backend image size (:issue:`44`) + + +Bug Fixes +--------- + +- Fix 500 error in case of ``PATCH v1/connections/:id`` request with passed ``auth_data.password`` field value (:issue:`39`) +- Do not use ``asyncio.gather`` with SQLAlchemy requests (:issue:`40`) +- Fix 500 error while creating HDFS connection (:issue:`41`) +- Fix missing ``options`` field from Transfer params with ``hdfs`` and ``s3`` type (:issue:`39`) diff --git a/docs/changelog/index.rst b/docs/changelog/index.rst index a678f9fa..ac0e87f0 100644 --- a/docs/changelog/index.rst +++ b/docs/changelog/index.rst @@ -3,6 +3,7 @@ :caption: Changelog DRAFT + 0.1.5 0.1.4 0.1.3 0.1.2 diff --git a/docs/changelog/next_release/38.breaking.rst b/docs/changelog/next_release/38.breaking.rst deleted file mode 100644 index ecbf00c7..00000000 --- a/docs/changelog/next_release/38.breaking.rst +++ /dev/null @@ -1,2 +0,0 @@ -Pass current ``Run`` to ``CREATE_SPARK_SESSION_FUNCTION``. This allows using run/transfer/group information for Spark session options, -like ``appName`` or custom ones. diff --git a/docs/changelog/next_release/39.bugfix.rst b/docs/changelog/next_release/39.bugfix.rst deleted file mode 100644 index 2f621f6f..00000000 --- a/docs/changelog/next_release/39.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Fix 500 error in case of ``PATCH v1/connections/:id`` request with passed ``auth_data.password`` field value diff --git a/docs/changelog/next_release/40.bugfix.rst b/docs/changelog/next_release/40.bugfix.rst deleted file mode 100644 index f62ecd7c..00000000 --- a/docs/changelog/next_release/40.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Do not use ``asyncio.gather`` with SQLAlchemy requests. diff --git a/docs/changelog/next_release/41.bugfix.rst b/docs/changelog/next_release/41.bugfix.rst deleted file mode 100644 index 5e4c696e..00000000 --- a/docs/changelog/next_release/41.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Fix 500 error while creating HDFS connection. diff --git a/docs/changelog/next_release/44.improvement.rst b/docs/changelog/next_release/44.improvement.rst deleted file mode 100644 index 3ed90a96..00000000 --- a/docs/changelog/next_release/44.improvement.rst +++ /dev/null @@ -1 +0,0 @@ -Reduce Docker images size diff --git a/syncmaster/worker/spark.py b/syncmaster/worker/spark.py index b4882f57..b05b63bd 100644 --- a/syncmaster/worker/spark.py +++ b/syncmaster/worker/spark.py @@ -5,8 +5,6 @@ import logging from typing import TYPE_CHECKING -from onetl.connection import Oracle, Postgres, SparkS3 - from syncmaster.config import Settings from syncmaster.db.models import Run from syncmaster.dto.connections import ConnectionDTO @@ -40,6 +38,8 @@ def get_worker_spark_session( def get_packages(db_type: str) -> list[str]: + from onetl.connection import Oracle, Postgres, SparkS3 + if db_type == "postgres": return Postgres.get_packages() if db_type == "oracle":