From 361f12f30742469274f4f02f48d9f425b873a524 Mon Sep 17 00:00:00 2001 From: tokoko Date: Wed, 2 Oct 2024 21:41:06 +0000 Subject: [PATCH 1/2] add improved extension registry --- fetch_extensions.py | 35 - pixi.lock | 828 -------- pyproject.toml | 1 + subframe/__init__.py | 21 +- subframe/derivation_expression.py | 249 +++ subframe/extension_registry.py | 352 ++++ subframe/extensions/__init__.py | 0 subframe/extensions/extension_function.py | 21 - subframe/extensions/extension_registry.py | 57 - .../functions_aggregate_approx.yaml | 18 - .../functions_aggregate_decimal_output.yaml | 41 - .../functions_aggregate_generic.yaml | 42 - .../extensions/functions_arithmetic.yaml | 1844 ----------------- .../functions_arithmetic_decimal.yaml | 248 --- .../extensions/functions_boolean.yaml | 140 -- .../extensions/functions_comparison.yaml | 289 --- .../extensions/functions_datetime.yaml | 1110 ---------- .../extensions/functions_geometry.yaml | 239 --- .../extensions/functions_logarithmic.yaml | 254 --- .../extensions/functions_rounding.yaml | 270 --- .../extensions/extensions/functions_set.yaml | 27 - .../extensions/functions_string.yaml | 1481 ------------- .../extensions/type_variations.yaml | 25 - subframe/utils.py | 35 + subframe/value.py | 51 +- tests/test_derivation_expression.py | 89 + tests/test_registry.py | 190 ++ 27 files changed, 935 insertions(+), 7022 deletions(-) delete mode 100644 fetch_extensions.py delete mode 100644 pixi.lock create mode 100644 subframe/derivation_expression.py create mode 100644 subframe/extension_registry.py delete mode 100644 subframe/extensions/__init__.py delete mode 100644 subframe/extensions/extension_function.py delete mode 100644 subframe/extensions/extension_registry.py delete mode 100644 subframe/extensions/extensions/functions_aggregate_approx.yaml delete mode 100644 subframe/extensions/extensions/functions_aggregate_decimal_output.yaml delete mode 100644 subframe/extensions/extensions/functions_aggregate_generic.yaml delete mode 100644 subframe/extensions/extensions/functions_arithmetic.yaml delete mode 100644 subframe/extensions/extensions/functions_arithmetic_decimal.yaml delete mode 100644 subframe/extensions/extensions/functions_boolean.yaml delete mode 100644 subframe/extensions/extensions/functions_comparison.yaml delete mode 100644 subframe/extensions/extensions/functions_datetime.yaml delete mode 100644 subframe/extensions/extensions/functions_geometry.yaml delete mode 100644 subframe/extensions/extensions/functions_logarithmic.yaml delete mode 100644 subframe/extensions/extensions/functions_rounding.yaml delete mode 100644 subframe/extensions/extensions/functions_set.yaml delete mode 100644 subframe/extensions/extensions/functions_string.yaml delete mode 100644 subframe/extensions/extensions/type_variations.yaml create mode 100644 subframe/utils.py create mode 100644 tests/test_derivation_expression.py create mode 100644 tests/test_registry.py diff --git a/fetch_extensions.py b/fetch_extensions.py deleted file mode 100644 index 5a9c827..0000000 --- a/fetch_extensions.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from urllib.request import urlopen - -urls = [ - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_aggregate_approx.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_aggregate_decimal_output.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_aggregate_generic.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic_decimal.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_boolean.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_comparison.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_datetime.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_geometry.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_logarithmic.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_rounding.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_set.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_string.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/type_variations.yaml", -] - -for url in urls: - resource_url = url.replace( - "https://github.com/substrait-io/substrait/blob", - "https://raw.githubusercontent.com/substrait-io/substrait", - ) - with urlopen(resource_url) as response: - body = response.read() - - file_name = resource_url.split("/")[-1] - path = os.path.join("subframe", "extensions", "extensions", file_name) - - print(file_name) - - with open(path, "wb") as f: - f.write(body) diff --git a/pixi.lock b/pixi.lock deleted file mode 100644 index d016eab..0000000 --- a/pixi.lock +++ /dev/null @@ -1,828 +0,0 @@ -version: 5 -environments: - default: - channels: - - url: https://conda.anaconda.org/conda-forge/ - indexes: - - https://pypi.org/simple - packages: - linux-64: - - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.8.30-hbcca054_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-hf3520f5_7.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.3-h5888daf_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2 - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.1.0-h77fa898_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.1.0-h69a702a_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.1.0-h77fa898_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.46.1-hadc24fc_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-h4ab18f5_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-he02047a_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.2-hb9d3cd8_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.12.5-h2ad013b_0_cpython.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h8827d51_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2 - - pypi: https://files.pythonhosted.org/packages/6b/03/2cb0e5326e19b7d877bc9c3a7ef436a30a06835b638580d1f5e21a0409ed/atpublic-5.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/95/08/d6302a15d9f2ae57949efd28dbd3613aae5ae6655e92a104562dc2de159f/datafusion-40.1.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/03/57/2b2998e5278b22648a72808c9fcfa33136c71ea99d63ceab1bfd35d363ce/duckdb-1.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/c2/27/625b85750662d764024f5faa23e5b2b0ef3eb1fde745bf3dbe34c1181861/ibis_framework-9.4.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/b4/53/959881915048b09cf54852d08f13ad8706bd680205df8b17e9898e0eb63c/ibis_substrait-4.0.1-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/cb/22/2b840d297183916a95847c11f82ae11e248fa98113490b2357f774651e1d/numpy-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/40/10/79e52ef01dfeb1c1ca47a109a01a248754ebe990e159a844ece12914de83/pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/b3/05/1c84e2ebd1eb2817d92ae05a917e60e57b1c83f7b89e63c31df2cd6fcb70/parsy-2.1-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/19/15/da43113361db20f2d521bc38d92549edbe06856aeec085c420b2b8af5751/protobuf-5.28.0-cp38-abi3-manylinux2014_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/f1/c4/9625418a1413005e486c006e56675334929fad864347c5ae7c1b2e7fe639/pyarrow-17.0.0-cp312-cp312-manylinux_2_28_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/e4/f4/9ec2222f5f5f8ea04f66f184caafd991a39c8782e31f5b0266f101cb68ca/pyarrow_hotfix-0.6-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/0f/f9/cf155cf32ca7d6fa3601bc4c5dd19086af4b320b706919d48a4c79081cf9/pytest-8.3.2-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/9c/3d/a121f284241f08268b21359bd425f7d4825cffc5ac5cd0e1b3d82ffd2b10/pytz-2024.1-py2.py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/c7/d9/c2a126eeae791e90ea099d05cb0515feea3688474b978343f3cdcfe04523/rich-13.8.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/ff/14/3d2f6679f11c4c64a52aa4a8fd526a415c2d7a940914af33ad52902693f3/sqlglot-25.18.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/66/ca/731a15b98ff0af0447d756d4ca09708d141e4b5f6483cccf38db2cd1ad6c/substrait-0.22.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/b7/8a/d82202c9f89eab30f9fc05380daae87d617e2ad11571ab23d7c13a29bb54/toolz-0.12.1-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/65/58/f9c9e6be752e9fcb8b6a0ee9fb87e6e7a1f6bcab2cdc73f02bb7ba91ada0/tzdata-2024.1-py2.py3-none-any.whl - - pypi: . -packages: -- kind: conda - name: _libgcc_mutex - version: '0.1' - build: conda_forge - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 - sha256: fe51de6107f9edc7aa4f786a70f4a883943bc9d39b3bb7307c04c41410990726 - md5: d7c89558ba9fa0495403155b64376d81 - license: None - purls: [] - size: 2562 - timestamp: 1578324546067 -- kind: conda - name: _openmp_mutex - version: '4.5' - build: 2_gnu - build_number: 16 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2 - sha256: fbe2c5e56a653bebb982eda4876a9178aedfc2b545f25d0ce9c4c0b508253d22 - md5: 73aaf86a425cc6e73fcf236a5a46396d - depends: - - _libgcc_mutex 0.1 conda_forge - - libgomp >=7.5.0 - constrains: - - openmp_impl 9999 - license: BSD-3-Clause - license_family: BSD - purls: [] - size: 23621 - timestamp: 1650670423406 -- kind: pypi - name: atpublic - version: '5.0' - url: https://files.pythonhosted.org/packages/6b/03/2cb0e5326e19b7d877bc9c3a7ef436a30a06835b638580d1f5e21a0409ed/atpublic-5.0-py3-none-any.whl - sha256: b651dcd886666b1042d1e38158a22a4f2c267748f4e97fde94bc492a4a28a3f3 - requires_python: '>=3.8' -- kind: conda - name: bzip2 - version: 1.0.8 - build: h4bc722e_7 - build_number: 7 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda - sha256: 5ced96500d945fb286c9c838e54fa759aa04a7129c59800f0846b4335cee770d - md5: 62ee74e96c5ebb0af99386de58cf9553 - depends: - - __glibc >=2.17,<3.0.a0 - - libgcc-ng >=12 - license: bzip2-1.0.6 - license_family: BSD - purls: [] - size: 252783 - timestamp: 1720974456583 -- kind: conda - name: ca-certificates - version: 2024.8.30 - build: hbcca054_0 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.8.30-hbcca054_0.conda - sha256: afee721baa6d988e27fef1832f68d6f32ac8cc99cdf6015732224c2841a09cea - md5: c27d1c142233b5bc9ca570c6e2e0c244 - license: ISC - purls: [] - size: 159003 - timestamp: 1725018903918 -- kind: pypi - name: datafusion - version: 40.1.0 - url: https://files.pythonhosted.org/packages/95/08/d6302a15d9f2ae57949efd28dbd3613aae5ae6655e92a104562dc2de159f/datafusion-40.1.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - sha256: be44d24971e73d324a3f41503bb091f48d171d50d1d2415b469ca5e3953b5a0e - requires_dist: - - pyarrow>=11.0.0 - - typing-extensions ; python_full_version < '3.13' - requires_python: '>=3.6' -- kind: pypi - name: duckdb - version: 1.0.0 - url: https://files.pythonhosted.org/packages/03/57/2b2998e5278b22648a72808c9fcfa33136c71ea99d63ceab1bfd35d363ce/duckdb-1.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - sha256: 752e9d412b0a2871bf615a2ede54be494c6dc289d076974eefbf3af28129c759 - requires_python: '>=3.7.0' -- kind: pypi - name: ibis-framework - version: 9.4.0 - url: https://files.pythonhosted.org/packages/c2/27/625b85750662d764024f5faa23e5b2b0ef3eb1fde745bf3dbe34c1181861/ibis_framework-9.4.0-py3-none-any.whl - sha256: 0de6f5246c25d38873ef5bcd0cc6879ba0f6ac6696103036be7d988b400107be - requires_dist: - - atpublic>=2.3,<6 - - black>=22.1.0,<25 ; extra == 'decompiler' - - clickhouse-connect[arrow,numpy,pandas]>=0.5.23,<1 ; extra == 'clickhouse' - - dask[array,dataframe]>=2022.9.1,<2024.3.0 ; extra == 'dask' - - datafusion>=0.6,<41 ; extra == 'datafusion' - - db-dtypes>=0.3,<2 ; extra == 'bigquery' - - deltalake>=0.9.0,<1 ; extra == 'deltalake' - - duckdb>=0.8.1,<2 ; extra == 'duckdb' - - fsspec<2024.6.2 ; extra == 'examples' - - geoarrow-types>=0.2,<1 ; extra == 'geospatial' - - geopandas>=0.6,<2 ; extra == 'geospatial' - - google-cloud-bigquery>=3,<4 ; extra == 'bigquery' - - google-cloud-bigquery-storage>=2,<3 ; extra == 'bigquery' - - graphviz>=0.16,<1 ; extra == 'visualization' - - impyla>=0.17,<1 ; extra == 'impala' - - numpy>=1.23.2,<3 ; extra == 'bigquery' or extra == 'clickhouse' or extra == 'dask' or extra == 'datafusion' or extra == 'druid' or extra == 'duckdb' or extra == 'exasol' or extra == 'flink' or extra == 'impala' or extra == 'mssql' or extra == 'mysql' or extra == 'oracle' or extra == 'pandas' or extra == 'polars' or extra == 'postgres' or extra == 'pyspark' or extra == 'risingwave' or extra == 'snowflake' or extra == 'sqlite' or extra == 'trino' - - oracledb>=1.3.1,<3 ; extra == 'oracle' - - packaging>=21.3,<25 ; extra == 'dask' or extra == 'oracle' or extra == 'pandas' or extra == 'polars' or extra == 'pyspark' - - pandas>=1.5.3,<3 ; extra == 'bigquery' or extra == 'clickhouse' or extra == 'dask' or extra == 'datafusion' or extra == 'druid' or extra == 'duckdb' or extra == 'exasol' or extra == 'flink' or extra == 'impala' or extra == 'mssql' or extra == 'mysql' or extra == 'oracle' or extra == 'pandas' or extra == 'polars' or extra == 'postgres' or extra == 'pyspark' or extra == 'risingwave' or extra == 'snowflake' or extra == 'sqlite' or extra == 'trino' - - parsy>=2,<3 - - pins[gcs]>=0.8.3,<1 ; extra == 'examples' - - polars>=1,<2 ; extra == 'polars' - - psycopg2>=2.8.4,<3 ; extra == 'postgres' or extra == 'risingwave' - - pyarrow>=10.0.1,<18 ; extra == 'bigquery' or extra == 'clickhouse' or extra == 'dask' or extra == 'datafusion' or extra == 'druid' or extra == 'duckdb' or extra == 'exasol' or extra == 'flink' or extra == 'impala' or extra == 'mssql' or extra == 'mysql' or extra == 'oracle' or extra == 'pandas' or extra == 'polars' or extra == 'postgres' or extra == 'pyspark' or extra == 'risingwave' or extra == 'snowflake' or extra == 'sqlite' or extra == 'trino' - - pyarrow-hotfix>=0.4,<1 ; extra == 'bigquery' or extra == 'clickhouse' or extra == 'dask' or extra == 'datafusion' or extra == 'druid' or extra == 'duckdb' or extra == 'exasol' or extra == 'flink' or extra == 'impala' or extra == 'mssql' or extra == 'mysql' or extra == 'oracle' or extra == 'pandas' or extra == 'polars' or extra == 'postgres' or extra == 'pyspark' or extra == 'risingwave' or extra == 'snowflake' or extra == 'sqlite' or extra == 'trino' - - pydata-google-auth>=1.4.0,<2 ; extra == 'bigquery' - - pydruid>=0.6.7,<1 ; extra == 'druid' - - pyexasol[pandas]>=0.25.2,<1 ; extra == 'exasol' - - pymysql>=1,<2 ; extra == 'mysql' - - pyodbc>=4.0.39,<6 ; extra == 'mssql' - - pyproj>=3.3.0,<4 ; extra == 'geospatial' - - pyspark>=3.3.3,<4 ; extra == 'pyspark' - - python-dateutil>=2.8.2,<3 - - pytz>=2022.7 - - regex>=2021.7.6 ; extra == 'dask' or extra == 'pandas' or extra == 'sqlite' - - rich>=12.4.4,<14 ; extra == 'bigquery' or extra == 'clickhouse' or extra == 'dask' or extra == 'datafusion' or extra == 'druid' or extra == 'duckdb' or extra == 'exasol' or extra == 'flink' or extra == 'impala' or extra == 'mssql' or extra == 'mysql' or extra == 'oracle' or extra == 'pandas' or extra == 'polars' or extra == 'postgres' or extra == 'pyspark' or extra == 'risingwave' or extra == 'snowflake' or extra == 'sqlite' or extra == 'trino' - - shapely>=2,<3 ; extra == 'geospatial' - - snowflake-connector-python>=3.0.2,<4,!=3.3.0b1 ; extra == 'snowflake' - - sqlglot>=23.4,<25.19 - - toolz>=0.11,<1 - - trino>=0.321,<1 ; extra == 'trino' - - typing-extensions>=4.3.0,<5 - requires_python: '>=3.10,<4.0' -- kind: pypi - name: ibis-substrait - version: 4.0.1 - url: https://files.pythonhosted.org/packages/b4/53/959881915048b09cf54852d08f13ad8706bd680205df8b17e9898e0eb63c/ibis_substrait-4.0.1-py3-none-any.whl - sha256: 107ca49383a3cca2fdc88f67ea2f0172620c16fa8f39c9c52305af85dd6180b4 - requires_dist: - - ibis-framework>=9,<10 - - packaging>=21.3 - - pyyaml>=5 - - substrait>=0.2.1 - requires_python: '>=3.9,<4' -- kind: pypi - name: iniconfig - version: 2.0.0 - url: https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl - sha256: b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 - requires_python: '>=3.7' -- kind: conda - name: ld_impl_linux-64 - version: '2.40' - build: hf3520f5_7 - build_number: 7 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-hf3520f5_7.conda - sha256: 764b6950aceaaad0c67ef925417594dd14cd2e22fff864aeef455ac259263d15 - md5: b80f2f396ca2c28b8c14c437a4ed1e74 - constrains: - - binutils_impl_linux-64 2.40 - license: GPL-3.0-only - license_family: GPL - purls: [] - size: 707602 - timestamp: 1718625640445 -- kind: conda - name: libexpat - version: 2.6.3 - build: h5888daf_0 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.3-h5888daf_0.conda - sha256: 4bb47bb2cd09898737a5211e2992d63c555d63715a07ba56eae0aff31fb89c22 - md5: 59f4c43bb1b5ef1c71946ff2cbf59524 - depends: - - __glibc >=2.17,<3.0.a0 - - libgcc >=13 - constrains: - - expat 2.6.3.* - license: MIT - license_family: MIT - purls: [] - size: 73616 - timestamp: 1725568742634 -- kind: conda - name: libffi - version: 3.4.2 - build: h7f98852_5 - build_number: 5 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2 - sha256: ab6e9856c21709b7b517e940ae7028ae0737546122f83c2aa5d692860c3b149e - md5: d645c6d2ac96843a2bfaccd2d62b3ac3 - depends: - - libgcc-ng >=9.4.0 - license: MIT - license_family: MIT - purls: [] - size: 58292 - timestamp: 1636488182923 -- kind: conda - name: libgcc - version: 14.1.0 - build: h77fa898_1 - build_number: 1 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.1.0-h77fa898_1.conda - sha256: 10fa74b69266a2be7b96db881e18fa62cfa03082b65231e8d652e897c4b335a3 - md5: 002ef4463dd1e2b44a94a4ace468f5d2 - depends: - - _libgcc_mutex 0.1 conda_forge - - _openmp_mutex >=4.5 - constrains: - - libgomp 14.1.0 h77fa898_1 - - libgcc-ng ==14.1.0=*_1 - license: GPL-3.0-only WITH GCC-exception-3.1 - license_family: GPL - purls: [] - size: 846380 - timestamp: 1724801836552 -- kind: conda - name: libgcc-ng - version: 14.1.0 - build: h69a702a_1 - build_number: 1 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.1.0-h69a702a_1.conda - sha256: b91f7021e14c3d5c840fbf0dc75370d6e1f7c7ff4482220940eaafb9c64613b7 - md5: 1efc0ad219877a73ef977af7dbb51f17 - depends: - - libgcc 14.1.0 h77fa898_1 - license: GPL-3.0-only WITH GCC-exception-3.1 - license_family: GPL - purls: [] - size: 52170 - timestamp: 1724801842101 -- kind: conda - name: libgomp - version: 14.1.0 - build: h77fa898_1 - build_number: 1 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.1.0-h77fa898_1.conda - sha256: c96724c8ae4ee61af7674c5d9e5a3fbcf6cd887a40ad5a52c99aa36f1d4f9680 - md5: 23c255b008c4f2ae008f81edcabaca89 - depends: - - _libgcc_mutex 0.1 conda_forge - license: GPL-3.0-only WITH GCC-exception-3.1 - license_family: GPL - purls: [] - size: 460218 - timestamp: 1724801743478 -- kind: conda - name: libnsl - version: 2.0.1 - build: hd590300_0 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda - sha256: 26d77a3bb4dceeedc2a41bd688564fe71bf2d149fdcf117049970bc02ff1add6 - md5: 30fd6e37fe21f86f4bd26d6ee73eeec7 - depends: - - libgcc-ng >=12 - license: LGPL-2.1-only - license_family: GPL - purls: [] - size: 33408 - timestamp: 1697359010159 -- kind: conda - name: libsqlite - version: 3.46.1 - build: hadc24fc_0 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.46.1-hadc24fc_0.conda - sha256: 9851c049abafed3ee329d6c7c2033407e2fc269d33a75c071110ab52300002b0 - md5: 36f79405ab16bf271edb55b213836dac - depends: - - __glibc >=2.17,<3.0.a0 - - libgcc >=13 - - libzlib >=1.3.1,<2.0a0 - license: Unlicense - purls: [] - size: 865214 - timestamp: 1725353659783 -- kind: conda - name: libuuid - version: 2.38.1 - build: h0b41bf4_0 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda - sha256: 787eb542f055a2b3de553614b25f09eefb0a0931b0c87dbcce6efdfd92f04f18 - md5: 40b61aab5c7ba9ff276c41cfffe6b80b - depends: - - libgcc-ng >=12 - license: BSD-3-Clause - license_family: BSD - purls: [] - size: 33601 - timestamp: 1680112270483 -- kind: conda - name: libxcrypt - version: 4.4.36 - build: hd590300_1 - build_number: 1 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda - sha256: 6ae68e0b86423ef188196fff6207ed0c8195dd84273cb5623b85aa08033a410c - md5: 5aa797f8787fe7a17d1b0821485b5adc - depends: - - libgcc-ng >=12 - license: LGPL-2.1-or-later - purls: [] - size: 100393 - timestamp: 1702724383534 -- kind: conda - name: libzlib - version: 1.3.1 - build: h4ab18f5_1 - build_number: 1 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-h4ab18f5_1.conda - sha256: adf6096f98b537a11ae3729eaa642b0811478f0ea0402ca67b5108fe2cb0010d - md5: 57d7dc60e9325e3de37ff8dffd18e814 - depends: - - libgcc-ng >=12 - constrains: - - zlib 1.3.1 *_1 - license: Zlib - license_family: Other - purls: [] - size: 61574 - timestamp: 1716874187109 -- kind: pypi - name: markdown-it-py - version: 3.0.0 - url: https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl - sha256: 355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 - requires_dist: - - mdurl~=0.1 - - psutil ; extra == 'benchmarking' - - pytest ; extra == 'benchmarking' - - pytest-benchmark ; extra == 'benchmarking' - - pre-commit~=3.0 ; extra == 'code-style' - - commonmark~=0.9 ; extra == 'compare' - - markdown~=3.4 ; extra == 'compare' - - mistletoe~=1.0 ; extra == 'compare' - - mistune~=2.0 ; extra == 'compare' - - panflute~=2.3 ; extra == 'compare' - - linkify-it-py>=1,<3 ; extra == 'linkify' - - mdit-py-plugins ; extra == 'plugins' - - gprof2dot ; extra == 'profiling' - - mdit-py-plugins ; extra == 'rtd' - - myst-parser ; extra == 'rtd' - - pyyaml ; extra == 'rtd' - - sphinx ; extra == 'rtd' - - sphinx-copybutton ; extra == 'rtd' - - sphinx-design ; extra == 'rtd' - - sphinx-book-theme ; extra == 'rtd' - - jupyter-sphinx ; extra == 'rtd' - - coverage ; extra == 'testing' - - pytest ; extra == 'testing' - - pytest-cov ; extra == 'testing' - - pytest-regressions ; extra == 'testing' - requires_python: '>=3.8' -- kind: pypi - name: mdurl - version: 0.1.2 - url: https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl - sha256: 84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 - requires_python: '>=3.7' -- kind: conda - name: ncurses - version: '6.5' - build: he02047a_1 - build_number: 1 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-he02047a_1.conda - sha256: 6a1d5d8634c1a07913f1c525db6455918cbc589d745fac46d9d6e30340c8731a - md5: 70caf8bb6cf39a0b6b7efc885f51c0fe - depends: - - __glibc >=2.17,<3.0.a0 - - libgcc-ng >=12 - license: X11 AND BSD-3-Clause - purls: [] - size: 889086 - timestamp: 1724658547447 -- kind: pypi - name: numpy - version: 2.1.1 - url: https://files.pythonhosted.org/packages/cb/22/2b840d297183916a95847c11f82ae11e248fa98113490b2357f774651e1d/numpy-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - sha256: d2b9cd92c8f8e7b313b80e93cedc12c0112088541dcedd9197b5dee3738c1201 - requires_python: '>=3.10' -- kind: conda - name: openssl - version: 3.3.2 - build: hb9d3cd8_0 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.2-hb9d3cd8_0.conda - sha256: cee91036686419f6dd6086902acf7142b4916e1c4ba042e9ca23e151da012b6d - md5: 4d638782050ab6faa27275bed57e9b4e - depends: - - __glibc >=2.17,<3.0.a0 - - ca-certificates - - libgcc >=13 - license: Apache-2.0 - license_family: Apache - purls: [] - size: 2891789 - timestamp: 1725410790053 -- kind: pypi - name: packaging - version: '24.1' - url: https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl - sha256: 5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 - requires_python: '>=3.8' -- kind: pypi - name: pandas - version: 2.2.2 - url: https://files.pythonhosted.org/packages/40/10/79e52ef01dfeb1c1ca47a109a01a248754ebe990e159a844ece12914de83/pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - sha256: eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad - requires_dist: - - numpy>=1.22.4 ; python_full_version < '3.11' - - numpy>=1.23.2 ; python_full_version == '3.11.*' - - numpy>=1.26.0 ; python_full_version >= '3.12' - - python-dateutil>=2.8.2 - - pytz>=2020.1 - - tzdata>=2022.7 - - hypothesis>=6.46.1 ; extra == 'test' - - pytest>=7.3.2 ; extra == 'test' - - pytest-xdist>=2.2.0 ; extra == 'test' - - pyarrow>=10.0.1 ; extra == 'pyarrow' - - bottleneck>=1.3.6 ; extra == 'performance' - - numba>=0.56.4 ; extra == 'performance' - - numexpr>=2.8.4 ; extra == 'performance' - - scipy>=1.10.0 ; extra == 'computation' - - xarray>=2022.12.0 ; extra == 'computation' - - fsspec>=2022.11.0 ; extra == 'fss' - - s3fs>=2022.11.0 ; extra == 'aws' - - gcsfs>=2022.11.0 ; extra == 'gcp' - - pandas-gbq>=0.19.0 ; extra == 'gcp' - - odfpy>=1.4.1 ; extra == 'excel' - - openpyxl>=3.1.0 ; extra == 'excel' - - python-calamine>=0.1.7 ; extra == 'excel' - - pyxlsb>=1.0.10 ; extra == 'excel' - - xlrd>=2.0.1 ; extra == 'excel' - - xlsxwriter>=3.0.5 ; extra == 'excel' - - pyarrow>=10.0.1 ; extra == 'parquet' - - pyarrow>=10.0.1 ; extra == 'feather' - - tables>=3.8.0 ; extra == 'hdf5' - - pyreadstat>=1.2.0 ; extra == 'spss' - - sqlalchemy>=2.0.0 ; extra == 'postgresql' - - psycopg2>=2.9.6 ; extra == 'postgresql' - - adbc-driver-postgresql>=0.8.0 ; extra == 'postgresql' - - sqlalchemy>=2.0.0 ; extra == 'mysql' - - pymysql>=1.0.2 ; extra == 'mysql' - - sqlalchemy>=2.0.0 ; extra == 'sql-other' - - adbc-driver-postgresql>=0.8.0 ; extra == 'sql-other' - - adbc-driver-sqlite>=0.8.0 ; extra == 'sql-other' - - beautifulsoup4>=4.11.2 ; extra == 'html' - - html5lib>=1.1 ; extra == 'html' - - lxml>=4.9.2 ; extra == 'html' - - lxml>=4.9.2 ; extra == 'xml' - - matplotlib>=3.6.3 ; extra == 'plot' - - jinja2>=3.1.2 ; extra == 'output-formatting' - - tabulate>=0.9.0 ; extra == 'output-formatting' - - pyqt5>=5.15.9 ; extra == 'clipboard' - - qtpy>=2.3.0 ; extra == 'clipboard' - - zstandard>=0.19.0 ; extra == 'compression' - - dataframe-api-compat>=0.1.7 ; extra == 'consortium-standard' - - adbc-driver-postgresql>=0.8.0 ; extra == 'all' - - adbc-driver-sqlite>=0.8.0 ; extra == 'all' - - beautifulsoup4>=4.11.2 ; extra == 'all' - - bottleneck>=1.3.6 ; extra == 'all' - - dataframe-api-compat>=0.1.7 ; extra == 'all' - - fastparquet>=2022.12.0 ; extra == 'all' - - fsspec>=2022.11.0 ; extra == 'all' - - gcsfs>=2022.11.0 ; extra == 'all' - - html5lib>=1.1 ; extra == 'all' - - hypothesis>=6.46.1 ; extra == 'all' - - jinja2>=3.1.2 ; extra == 'all' - - lxml>=4.9.2 ; extra == 'all' - - matplotlib>=3.6.3 ; extra == 'all' - - numba>=0.56.4 ; extra == 'all' - - numexpr>=2.8.4 ; extra == 'all' - - odfpy>=1.4.1 ; extra == 'all' - - openpyxl>=3.1.0 ; extra == 'all' - - pandas-gbq>=0.19.0 ; extra == 'all' - - psycopg2>=2.9.6 ; extra == 'all' - - pyarrow>=10.0.1 ; extra == 'all' - - pymysql>=1.0.2 ; extra == 'all' - - pyqt5>=5.15.9 ; extra == 'all' - - pyreadstat>=1.2.0 ; extra == 'all' - - pytest>=7.3.2 ; extra == 'all' - - pytest-xdist>=2.2.0 ; extra == 'all' - - python-calamine>=0.1.7 ; extra == 'all' - - pyxlsb>=1.0.10 ; extra == 'all' - - qtpy>=2.3.0 ; extra == 'all' - - scipy>=1.10.0 ; extra == 'all' - - s3fs>=2022.11.0 ; extra == 'all' - - sqlalchemy>=2.0.0 ; extra == 'all' - - tables>=3.8.0 ; extra == 'all' - - tabulate>=0.9.0 ; extra == 'all' - - xarray>=2022.12.0 ; extra == 'all' - - xlrd>=2.0.1 ; extra == 'all' - - xlsxwriter>=3.0.5 ; extra == 'all' - - zstandard>=0.19.0 ; extra == 'all' - requires_python: '>=3.9' -- kind: pypi - name: parsy - version: '2.1' - url: https://files.pythonhosted.org/packages/b3/05/1c84e2ebd1eb2817d92ae05a917e60e57b1c83f7b89e63c31df2cd6fcb70/parsy-2.1-py3-none-any.whl - sha256: 8f18e7b11985e7802e7e3ecbd8291c6ca243d29820b1186e4c84605db4efffa0 - requires_python: '>=3.7' -- kind: pypi - name: pluggy - version: 1.5.0 - url: https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl - sha256: 44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 - requires_dist: - - pre-commit ; extra == 'dev' - - tox ; extra == 'dev' - - pytest ; extra == 'testing' - - pytest-benchmark ; extra == 'testing' - requires_python: '>=3.8' -- kind: pypi - name: protobuf - version: 5.28.0 - url: https://files.pythonhosted.org/packages/19/15/da43113361db20f2d521bc38d92549edbe06856aeec085c420b2b8af5751/protobuf-5.28.0-cp38-abi3-manylinux2014_x86_64.whl - sha256: 6206afcb2d90181ae8722798dcb56dc76675ab67458ac24c0dd7d75d632ac9bd - requires_python: '>=3.8' -- kind: pypi - name: pyarrow - version: 17.0.0 - url: https://files.pythonhosted.org/packages/f1/c4/9625418a1413005e486c006e56675334929fad864347c5ae7c1b2e7fe639/pyarrow-17.0.0-cp312-cp312-manylinux_2_28_x86_64.whl - sha256: b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b - requires_dist: - - numpy>=1.16.6 - - pytest ; extra == 'test' - - hypothesis ; extra == 'test' - - cffi ; extra == 'test' - - pytz ; extra == 'test' - - pandas ; extra == 'test' - requires_python: '>=3.8' -- kind: pypi - name: pyarrow-hotfix - version: '0.6' - url: https://files.pythonhosted.org/packages/e4/f4/9ec2222f5f5f8ea04f66f184caafd991a39c8782e31f5b0266f101cb68ca/pyarrow_hotfix-0.6-py3-none-any.whl - sha256: dcc9ae2d220dff0083be6a9aa8e0cdee5182ad358d4931fce825c545e5c89178 - requires_python: '>=3.5' -- kind: pypi - name: pygments - version: 2.18.0 - url: https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl - sha256: b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a - requires_dist: - - colorama>=0.4.6 ; extra == 'windows-terminal' - requires_python: '>=3.8' -- kind: pypi - name: pytest - version: 8.3.2 - url: https://files.pythonhosted.org/packages/0f/f9/cf155cf32ca7d6fa3601bc4c5dd19086af4b320b706919d48a4c79081cf9/pytest-8.3.2-py3-none-any.whl - sha256: 4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5 - requires_dist: - - iniconfig - - packaging - - pluggy<2,>=1.5 - - exceptiongroup>=1.0.0rc8 ; python_full_version < '3.11' - - tomli>=1 ; python_full_version < '3.11' - - colorama ; sys_platform == 'win32' - - argcomplete ; extra == 'dev' - - attrs>=19.2 ; extra == 'dev' - - hypothesis>=3.56 ; extra == 'dev' - - mock ; extra == 'dev' - - pygments>=2.7.2 ; extra == 'dev' - - requests ; extra == 'dev' - - setuptools ; extra == 'dev' - - xmlschema ; extra == 'dev' - requires_python: '>=3.8' -- kind: conda - name: python - version: 3.12.5 - build: h2ad013b_0_cpython - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/python-3.12.5-h2ad013b_0_cpython.conda - sha256: e2aad83838988725d4ffba4e9717b9328054fd18a668cff3377e0c50f109e8bd - md5: 9c56c4df45f6571b13111d8df2448692 - depends: - - __glibc >=2.17,<3.0.a0 - - bzip2 >=1.0.8,<2.0a0 - - ld_impl_linux-64 >=2.36.1 - - libexpat >=2.6.2,<3.0a0 - - libffi >=3.4,<4.0a0 - - libgcc-ng >=12 - - libnsl >=2.0.1,<2.1.0a0 - - libsqlite >=3.46.0,<4.0a0 - - libuuid >=2.38.1,<3.0a0 - - libxcrypt >=4.4.36 - - libzlib >=1.3.1,<2.0a0 - - ncurses >=6.5,<7.0a0 - - openssl >=3.3.1,<4.0a0 - - readline >=8.2,<9.0a0 - - tk >=8.6.13,<8.7.0a0 - - tzdata - - xz >=5.2.6,<6.0a0 - constrains: - - python_abi 3.12.* *_cp312 - license: Python-2.0 - purls: [] - size: 31663253 - timestamp: 1723143721353 -- kind: pypi - name: python-dateutil - version: 2.9.0.post0 - url: https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl - sha256: a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 - requires_dist: - - six>=1.5 - requires_python: '!=3.0.*,!=3.1.*,!=3.2.*,>=2.7' -- kind: pypi - name: pytz - version: '2024.1' - url: https://files.pythonhosted.org/packages/9c/3d/a121f284241f08268b21359bd425f7d4825cffc5ac5cd0e1b3d82ffd2b10/pytz-2024.1-py2.py3-none-any.whl - sha256: 328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319 -- kind: pypi - name: pyyaml - version: 6.0.2 - url: https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - sha256: 80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476 - requires_python: '>=3.8' -- kind: conda - name: readline - version: '8.2' - build: h8228510_1 - build_number: 1 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda - sha256: 5435cf39d039387fbdc977b0a762357ea909a7694d9528ab40f005e9208744d7 - md5: 47d31b792659ce70f470b5c82fdfb7a4 - depends: - - libgcc-ng >=12 - - ncurses >=6.3,<7.0a0 - license: GPL-3.0-only - license_family: GPL - purls: [] - size: 281456 - timestamp: 1679532220005 -- kind: pypi - name: rich - version: 13.8.0 - url: https://files.pythonhosted.org/packages/c7/d9/c2a126eeae791e90ea099d05cb0515feea3688474b978343f3cdcfe04523/rich-13.8.0-py3-none-any.whl - sha256: 2e85306a063b9492dffc86278197a60cbece75bcb766022f3436f567cae11bdc - requires_dist: - - ipywidgets>=7.5.1,<9 ; extra == 'jupyter' - - markdown-it-py>=2.2.0 - - pygments>=2.13.0,<3.0.0 - - typing-extensions>=4.0.0,<5.0 ; python_full_version < '3.9' - requires_python: '>=3.7.0' -- kind: pypi - name: six - version: 1.16.0 - url: https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl - sha256: 8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 - requires_python: '>=2.7,!=3.0.*,!=3.1.*,!=3.2.*' -- kind: pypi - name: sqlglot - version: 25.18.0 - url: https://files.pythonhosted.org/packages/ff/14/3d2f6679f11c4c64a52aa4a8fd526a415c2d7a940914af33ad52902693f3/sqlglot-25.18.0-py3-none-any.whl - sha256: d315874d88a81c48604cf4a9cd8a56e8bd3009aa294c233755ac176a9aceb153 - requires_dist: - - duckdb>=0.6 ; extra == 'dev' - - mypy ; extra == 'dev' - - pandas ; extra == 'dev' - - pandas-stubs ; extra == 'dev' - - python-dateutil ; extra == 'dev' - - pytz ; extra == 'dev' - - pdoc ; extra == 'dev' - - pre-commit ; extra == 'dev' - - ruff==0.4.3 ; extra == 'dev' - - types-python-dateutil ; extra == 'dev' - - types-pytz ; extra == 'dev' - - typing-extensions ; extra == 'dev' - - maturin<2.0,>=1.4 ; extra == 'dev' - - sqlglotrs==0.2.9 ; extra == 'rs' - requires_python: '>=3.7' -- kind: pypi - name: subframe - version: 0.0.1 - path: . - sha256: dc10e02747a43348787b5ccea59ab0504a85304c2f12e05311ad7c1189f52969 - requires_dist: - - ibis-framework[duckdb] - - ibis-substrait - - pyarrow - - pytest - - datafusion - requires_python: '>=3.9' - editable: true -- kind: pypi - name: substrait - version: 0.22.0 - url: https://files.pythonhosted.org/packages/66/ca/731a15b98ff0af0447d756d4ca09708d141e4b5f6483cccf38db2cd1ad6c/substrait-0.22.0-py3-none-any.whl - sha256: c26556c8eabc922138e0749e7a8fb024af5db2d7ddc2a8004fad1d30daf90531 - requires_dist: - - protobuf>=3.20 - - protobuf==3.20.1 ; extra == 'gen-proto' - - protoletariat>=2.0.0 ; extra == 'gen-proto' - - pytest>=7.0.0 ; extra == 'test' - requires_python: '>=3.8.1' -- kind: conda - name: tk - version: 8.6.13 - build: noxft_h4845f30_101 - build_number: 101 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda - sha256: e0569c9caa68bf476bead1bed3d79650bb080b532c64a4af7d8ca286c08dea4e - md5: d453b98d9c83e71da0741bb0ff4d76bc - depends: - - libgcc-ng >=12 - - libzlib >=1.2.13,<2.0.0a0 - license: TCL - license_family: BSD - purls: [] - size: 3318875 - timestamp: 1699202167581 -- kind: pypi - name: toolz - version: 0.12.1 - url: https://files.pythonhosted.org/packages/b7/8a/d82202c9f89eab30f9fc05380daae87d617e2ad11571ab23d7c13a29bb54/toolz-0.12.1-py3-none-any.whl - sha256: d22731364c07d72eea0a0ad45bafb2c2937ab6fd38a3507bf55eae8744aa7d85 - requires_python: '>=3.7' -- kind: pypi - name: typing-extensions - version: 4.12.2 - url: https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl - sha256: 04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d - requires_python: '>=3.8' -- kind: pypi - name: tzdata - version: '2024.1' - url: https://files.pythonhosted.org/packages/65/58/f9c9e6be752e9fcb8b6a0ee9fb87e6e7a1f6bcab2cdc73f02bb7ba91ada0/tzdata-2024.1-py2.py3-none-any.whl - sha256: 9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252 - requires_python: '>=2' -- kind: conda - name: tzdata - version: 2024a - build: h8827d51_1 - build_number: 1 - subdir: noarch - noarch: generic - url: https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h8827d51_1.conda - sha256: 7d21c95f61319dba9209ca17d1935e6128af4235a67ee4e57a00908a1450081e - md5: 8bfdead4e0fff0383ae4c9c50d0531bd - license: LicenseRef-Public-Domain - purls: [] - size: 124164 - timestamp: 1724736371498 -- kind: conda - name: xz - version: 5.2.6 - build: h166bdaf_0 - subdir: linux-64 - url: https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2 - sha256: 03a6d28ded42af8a347345f82f3eebdd6807a08526d47899a42d62d319609162 - md5: 2161070d867d1b1204ea749c8eec4ef0 - depends: - - libgcc-ng >=12 - license: LGPL-2.1 and GPL-2.0 - purls: [] - size: 418368 - timestamp: 1660346797927 diff --git a/pyproject.toml b/pyproject.toml index 3817780..fc131bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,6 +4,7 @@ version = "0.0.1" requires-python = ">=3.9" dependencies = [ "ibis-framework[duckdb]", + "pyparsing", "ibis-substrait", "pyarrow", "pytest", diff --git a/subframe/__init__.py b/subframe/__init__.py index fcac87b..1ae76cc 100644 --- a/subframe/__init__.py +++ b/subframe/__init__.py @@ -5,26 +5,9 @@ from substrait.gen.proto import algebra_pb2 as stalg from .table import Table from .value import Value -from .extensions.extension_registry import ExtensionRegistry +from .extension_registry import FunctionRegistry -registry = ExtensionRegistry( - [ - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_aggregate_approx.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_aggregate_decimal_output.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_aggregate_generic.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic_decimal.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_boolean.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_comparison.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_datetime.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_geometry.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_logarithmic.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_rounding.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_set.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/functions_string.yaml", - "https://github.com/substrait-io/substrait/blob/main/extensions/type_variations.yaml", - ] -) +registry = FunctionRegistry() def substrait_type_from_string(type: str): diff --git a/subframe/derivation_expression.py b/subframe/derivation_expression.py new file mode 100644 index 0000000..267b108 --- /dev/null +++ b/subframe/derivation_expression.py @@ -0,0 +1,249 @@ +from .utils import to_substrait_type +from typing import Optional +from substrait.gen.proto.type_expressions_pb2 import DerivationExpression +from substrait.gen.proto.type_pb2 import Type +from pyparsing import ( + Forward, + Literal, + ParseResults, + Word, + ZeroOrMore, + identchars, + infix_notation, + nums, + oneOf, + opAssoc, +) + +expr = Forward() + + +def parse_dtype(tokens: ParseResults): + tokens_dict = tokens.as_dict() + dtype = tokens_dict["dtype"].lower() + if dtype == "decimal": + return DerivationExpression( + decimal=DerivationExpression.ExpressionDecimal( + scale=tokens_dict["scale"], precision=tokens_dict["precision"] + ) + ) + elif tokens_dict["dtype"] == "boolean": + return DerivationExpression(bool=Type.Boolean()) + elif tokens_dict["dtype"] == "i8": + return DerivationExpression(i8=Type.I8()) + elif tokens_dict["dtype"] == "i16": + return DerivationExpression(i16=Type.I16()) + elif tokens_dict["dtype"] == "i32": + return DerivationExpression(i32=Type.I32()) + elif tokens_dict["dtype"] == "i64": + return DerivationExpression(i64=Type.I64()) + elif tokens_dict["dtype"] == "fp32": + return DerivationExpression(fp32=Type.FP32()) + elif tokens_dict["dtype"] == "fp64": + return DerivationExpression(fp64=Type.FP64()) + else: + raise Exception(f"Unknown dtype - {tokens_dict['dtype']}") + + +dtype = ( + Literal("i8")("dtype") + | Literal("i16")("dtype") + | Literal("i32")("dtype") + | Literal("i64")("dtype") + | Literal("fp32")("dtype") + | Literal("fp64")("dtype") + | Literal("boolean")("dtype") + | oneOf("DECIMAL decimal")("dtype") + + Literal("<").suppress() + + expr("scale") + + Literal(",").suppress() + + expr("precision") + + Literal(">").suppress() +).set_parse_action(parse_dtype) + +supported_functions = ["max", "min"] + + +def parse_binary_fn(tokens: ParseResults): + if tokens[0] == "min": + op_type = DerivationExpression.BinaryOp.BinaryOpType.BINARY_OP_TYPE_MIN + elif tokens[0] == "max": + op_type = DerivationExpression.BinaryOp.BinaryOpType.BINARY_OP_TYPE_MAX + else: + raise Exception(f"Unknown operation {tokens[0]}") + + return DerivationExpression( + binary_op=DerivationExpression.BinaryOp( + op_type=op_type, arg1=tokens[1], arg2=tokens[2] + ) + ) + + +binary_fn = ( + oneOf(supported_functions)("fn") + + Literal("(").suppress() + + expr + + Literal(",").suppress() + + expr + + Literal(")").suppress() +).set_parse_action(parse_binary_fn) + +integer_literal = Word(nums).set_parse_action( + lambda toks: DerivationExpression(integer_literal=int(toks[0])) +) + + +def parse_parameter(pr: ParseResults): + return DerivationExpression(integer_parameter_name=pr[0]) + + +parameter = Word(identchars + nums).set_parse_action(parse_parameter) + +operand = integer_literal | binary_fn | dtype | parameter + + +def parse_binary_op(pr): + tokens = pr[0] + prev_expression = None + for i in range(1, len(tokens), 2): + if tokens[i] == "*": + op_type = DerivationExpression.BinaryOp.BinaryOpType.BINARY_OP_TYPE_MULTIPLY + elif tokens[i] == "+": + op_type = DerivationExpression.BinaryOp.BinaryOpType.BINARY_OP_TYPE_PLUS + elif tokens[i] == "-": + op_type = DerivationExpression.BinaryOp.BinaryOpType.BINARY_OP_TYPE_MINUS + elif tokens[i] == ">": + op_type = ( + DerivationExpression.BinaryOp.BinaryOpType.BINARY_OP_TYPE_GREATER_THAN + ) + elif tokens[i] == "<": + op_type = ( + DerivationExpression.BinaryOp.BinaryOpType.BINARY_OP_TYPE_LESS_THAN + ) + else: + raise Exception(f"Unknown operation {tokens[i]}") + + prev_expression = DerivationExpression( + binary_op=DerivationExpression.BinaryOp( + op_type=op_type, + arg1=prev_expression if prev_expression else tokens[i - 1], + arg2=tokens[i + 1], + ) + ) + + return prev_expression + + +def parse_ternary(pr): + tokens = pr[0] + return DerivationExpression( + if_else=DerivationExpression.IfElse( + if_condition=tokens[0], if_return=tokens[1], else_return=tokens[2] + ) + ) + + +expr << infix_notation( + operand, + [ + (oneOf("* /")("binary_op"), 2, opAssoc.LEFT, parse_binary_op), + (oneOf("+ -")("binary_op"), 2, opAssoc.LEFT, parse_binary_op), + (oneOf("> <")("binary_op"), 2, opAssoc.LEFT, parse_binary_op), + ( + (Literal("?").suppress(), Literal(":").suppress()), + 3, + opAssoc.RIGHT, + parse_ternary, + ), + ], +) + + +def parse_assignment(toks): + tokens_dict = toks.as_dict() + return DerivationExpression.ReturnProgram.Assignment( + name=tokens_dict["name"], expression=tokens_dict["expression"] + ) + + +assignment = ( + Word(identchars + nums)("name") + Literal("=").suppress() + expr("expression") +).set_parse_action(parse_assignment) + + +def parse_return_program(toks): + return DerivationExpression( + return_program=DerivationExpression.ReturnProgram( + assignments=toks.as_dict()["assignments"], + final_expression=toks.as_dict()["final_expression"], + ) + ) + + +return_program = ( + ZeroOrMore(assignment)("assignments") + expr("final_expression") +).set_parse_action(parse_return_program) + + +def to_proto(txt: str): + return return_program.parseString(txt)[0] + + +def evaluate_expression(de: DerivationExpression, values: Optional[dict] = None): + kind = de.WhichOneof("kind") + if kind == "return_program": + for assign in de.return_program.assignments: + values[assign.name] = evaluate_expression(assign.expression, values) + return evaluate_expression(de.return_program.final_expression, values) + elif kind == "integer_literal": + return de.integer_literal + elif kind == "integer_parameter_name": + return values[de.integer_parameter_name] + elif kind == "binary_op": + binary_op = de.binary_op + arg1_eval = evaluate_expression(binary_op.arg1, values) + arg2_eval = evaluate_expression(binary_op.arg2, values) + if binary_op.op_type == DerivationExpression.BinaryOp.BINARY_OP_TYPE_PLUS: + return arg1_eval + arg2_eval + elif binary_op.op_type == DerivationExpression.BinaryOp.BINARY_OP_TYPE_MINUS: + return arg1_eval - arg2_eval + elif binary_op.op_type == DerivationExpression.BinaryOp.BINARY_OP_TYPE_MULTIPLY: + return arg1_eval * arg2_eval + elif binary_op.op_type == DerivationExpression.BinaryOp.BINARY_OP_TYPE_MIN: + return min(arg1_eval, arg2_eval) + elif binary_op.op_type == DerivationExpression.BinaryOp.BINARY_OP_TYPE_MAX: + return max(arg1_eval, arg2_eval) + elif ( + binary_op.op_type + == DerivationExpression.BinaryOp.BINARY_OP_TYPE_GREATER_THAN + ): + return arg1_eval > arg2_eval + elif ( + binary_op.op_type == DerivationExpression.BinaryOp.BINARY_OP_TYPE_LESS_THAN + ): + return arg1_eval < arg2_eval + else: + raise Exception(f"Unknown binary op type - {binary_op.op_type}") + elif kind == "if_else": + if_else = de.if_else + if_return_eval = evaluate_expression(if_else.if_return, values) + if_condition_eval = evaluate_expression(if_else.if_condition, values) + else_return_eval = evaluate_expression(if_else.else_return, values) + return if_return_eval if if_condition_eval else else_return_eval + elif kind == "decimal": + decimal = de.decimal + scale_eval = evaluate_expression(decimal.scale, values) + precision_eval = evaluate_expression(decimal.precision, values) + return to_substrait_type(f"decimal<{scale_eval},{precision_eval}>") + elif kind in ("i8", "i16", "i32", "i64", "fp32", "fp64"): + return to_substrait_type(kind) + elif kind == "bool": + return to_substrait_type("boolean") + else: + raise Exception(f"Unknown derivation expression type - {kind}") + + +def evaluate(txt: str, values: Optional[dict] = None): + if not values: + values = {} + return evaluate_expression(to_proto(txt), values) diff --git a/subframe/extension_registry.py b/subframe/extension_registry.py new file mode 100644 index 0000000..18c41ac --- /dev/null +++ b/subframe/extension_registry.py @@ -0,0 +1,352 @@ +from substrait.gen.proto.parameterized_types_pb2 import ParameterizedType +from substrait.gen.proto.type_pb2 import Type +from importlib.resources import files as importlib_files +import itertools +from collections import defaultdict +from collections.abc import Iterator, Mapping +from pathlib import Path +from typing import Any, Optional, Union +from .derivation_expression import evaluate + +import yaml +import re + +_normalized_key_names = { + "binary": "vbin", + "interval_compound": "icompound", + "interval_day": "iday", + "interval_year": "iyear", + "string": "str", + "timestamp": "ts", + "timestamp_tz": "tstz", +} + + +def normalize_substrait_type_names(typ: str) -> str: + # First strip off any punctuation + typ = typ.strip("?").lower() + + # Common prefixes whose information does not matter to an extension function + # signature + for complex_type, abbr in [ + ("fixedchar", "fchar"), + ("varchar", "vchar"), + ("fixedbinary", "fbin"), + ("decimal", "dec"), + ("precision_timestamp", "pts"), + ("precision_timestamp_tz", "ptstz"), + ("struct", "struct"), + ("list", "list"), + ("map", "map"), + ("any", "any"), + ("boolean", "bool"), + ]: + if typ.lower().startswith(complex_type): + typ = abbr + + # Then pass through the dictionary of mappings, defaulting to just the + # existing string + typ = _normalized_key_names.get(typ.lower(), typ.lower()) + return typ + + +id_generator = itertools.count(1) + + +def to_integer_option(txt: str): + if txt.isnumeric(): + return ParameterizedType.IntegerOption(literal=int(txt)) + else: + return ParameterizedType.IntegerOption( + parameter=ParameterizedType.IntegerParameter(name=txt) + ) + + +def to_parameterized_type(dtype: str): + if dtype == "boolean": + return ParameterizedType(bool=Type.Boolean()) + elif dtype == "i8": + return ParameterizedType(i8=Type.I8()) + elif dtype == "i16": + return ParameterizedType(i16=Type.I16()) + elif dtype == "i32": + return ParameterizedType(i32=Type.I32()) + elif dtype == "i64": + return ParameterizedType(i64=Type.I64()) + elif dtype == "fp32": + return ParameterizedType(fp32=Type.FP32()) + elif dtype == "fp64": + return ParameterizedType(fp64=Type.FP64()) + elif dtype == "timestamp": + return ParameterizedType(timestamp=Type.Timestamp()) + elif dtype == "timestamp_tz": + return ParameterizedType(timestamp_tz=Type.TimestampTZ()) + elif dtype == "date": + return ParameterizedType(date=Type.Date()) + elif dtype == "time": + return ParameterizedType(time=Type.Time()) + elif dtype == "interval_year": + return ParameterizedType(interval_year=Type.IntervalYear()) + elif dtype.startswith("decimal") or dtype.startswith("DECIMAL"): + (_, scale, precision, _) = re.split(r"\W+", dtype) + + return ParameterizedType( + decimal=ParameterizedType.ParameterizedDecimal( + scale=to_integer_option(scale), precision=to_integer_option(precision) + ) + ) + elif dtype.startswith("varchar"): + (_, length, _) = re.split(r"\W+", dtype) + + return ParameterizedType( + varchar=ParameterizedType.ParameterizedVarChar( + length=to_integer_option(length) + ) + ) + elif dtype.startswith("precision_timestamp"): + (_, precision, _) = re.split(r"\W+", dtype) + + return ParameterizedType( + precision_timestamp=ParameterizedType.ParameterizedPrecisionTimestamp( + precision=to_integer_option(precision) + ) + ) + elif dtype.startswith("precision_timestamp_tz"): + (_, precision, _) = re.split(r"\W+", dtype) + + return ParameterizedType( + precision_timestamp_tz=ParameterizedType.ParameterizedPrecisionTimestampTZ( + precision=to_integer_option(precision) + ) + ) + elif dtype.startswith("fixedchar"): + (_, length, _) = re.split(r"\W+", dtype) + + return ParameterizedType( + fixed_char=ParameterizedType.ParameterizedFixedChar( + length=to_integer_option(length) + ) + ) + elif dtype == "string": + return ParameterizedType(string=Type.String()) + elif dtype.startswith("list"): + inner_dtype = dtype[5:-1] + return ParameterizedType( + list=ParameterizedType.ParameterizedList( + type=to_parameterized_type(inner_dtype) + ) + ) + elif dtype.startswith("interval_day"): + (_, precision, _) = re.split(r"\W+", dtype) + + return ParameterizedType( + interval_day=ParameterizedType.ParameterizedIntervalDay( + precision=to_integer_option(precision) + ) + ) + elif dtype.startswith("any"): + return ParameterizedType( + type_parameter=ParameterizedType.TypeParameter(name=dtype) + ) + elif dtype.startswith("u!") or dtype == "geometry": + return ParameterizedType( + user_defined=ParameterizedType.ParameterizedUserDefined() + ) + else: + raise Exception(f"Unkownn type - {dtype}") + + +def violates_integer_option( + actual: int, option: ParameterizedType.IntegerOption, parameters: dict +): + integer_type = option.WhichOneof("integer_type") + + if integer_type == "literal" and actual != option.literal: + return True + else: + parameter_name = option.parameter.name + if parameter_name in parameters and parameters[parameter_name] != actual: + return True + else: + parameters[parameter_name] = actual + + return False + + +def covers(dtype: Type, parameterized_type: ParameterizedType, parameters: dict): + expected_kind = parameterized_type.WhichOneof("kind") + + if expected_kind == "type_parameter": + parameter_name = parameterized_type.type_parameter.name + if parameter_name == "any": + return True + else: + if parameter_name in parameters and parameters[ + parameter_name + ].SerializeToString(deterministic=True) != dtype.SerializeToString( + deterministic=True + ): + return False + else: + parameters[parameter_name] = dtype + return True + + kind = dtype.WhichOneof("kind") + + if kind != expected_kind: + return False + + if kind == "decimal": + if violates_integer_option( + dtype.decimal.scale, parameterized_type.decimal.scale, parameters + ) or violates_integer_option( + dtype.decimal.precision, parameterized_type.decimal.precision, parameters + ): + return False + + # TODO handle all types + + return True + + +class FunctionEntry: + def __init__(self, name: str) -> None: + self.name = name + self.options: Mapping[str, Any] = {} + self.arg_names: list = [] + self.normalized_inputs: list = [] + self.uri: str = "" + self.anchor = next(id_generator) + self.value_arguments = [] + + def parse(self, impl: Mapping[str, Any]) -> None: + self.rtn = impl["return"] + self.nullability = impl.get("nullability", False) + self.variadic = impl.get("variadic", False) + if input_args := impl.get("args", []): + for val in input_args: + if typ := val.get("value"): + self.value_arguments.append(to_parameterized_type(typ.strip("?"))) + self.normalized_inputs.append(normalize_substrait_type_names(typ)) + elif arg_name := val.get("name", None): + self.arg_names.append(arg_name) + + if options_args := impl.get("options", []): + for val in options_args: + self.options[val] = options_args[val]["values"] # type: ignore + + def __repr__(self) -> str: + return f"{self.name}:{'_'.join(self.normalized_inputs)}" + + def castable(self) -> None: + raise NotImplementedError + + def satisfies_signature(self, signature: tuple) -> Optional[str]: + if self.variadic: + min_args_allowed = self.variadic.get("min", 0) + if len(signature) < min_args_allowed: + return None + inputs = [self.value_arguments[0]] * len(signature) + else: + inputs = self.value_arguments + if len(inputs) != len(signature): + return None + + zipped_args = list(zip(inputs, signature)) + + parameters = {} + + if all([covers(y, x, parameters) for (x, y) in zipped_args]): + print(parameters) + return evaluate(self.rtn, parameters) + + +def _parse_func(entry: Mapping[str, Any]) -> Iterator[FunctionEntry]: + for impl in entry.get("impls", []): + sf = FunctionEntry(entry["name"]) + sf.parse(impl) + + yield sf + + +class FunctionRegistry: + def __init__(self) -> None: + self._extension_mapping: dict = defaultdict(dict) + self.id_generator = itertools.count(1) + + self.uri_aliases = {} + + for fpath in importlib_files("substrait.extensions").glob( # type: ignore + "functions*.yaml" + ): + self.uri_aliases[fpath.name] = ( + f"https://github.com/substrait-io/substrait/blob/main/extensions/{fpath.name}" + ) + self.register_extension_yaml(fpath) + + def register_extension_yaml( + self, + fname: Union[str, Path], + prefix: Optional[str] = None, + uri: Optional[str] = None, + ) -> None: + """Add a substrait extension YAML file to the ibis substrait compiler. + + Parameters + ---------- + fname + The filename of the extension yaml to register. + prefix + Custom prefix to use when constructing Substrait extension URI + uri + A custom URI to use for all functions defined within `fname`. + If passed, this value overrides `prefix`. + + + """ + fname = Path(fname) + with open(fname) as f: # type: ignore + extension_definitions = yaml.safe_load(f) + + prefix = ( + prefix.strip("/") + if prefix is not None + else "https://github.com/substrait-io/substrait/blob/main/extensions" + ) + + uri = uri or f"{prefix}/{fname.name}" + + self.register_extension_dict(extension_definitions, uri) + + def register_extension_dict(self, definitions: dict, uri: str) -> None: + for named_functions in definitions.values(): + for function in named_functions: + for func in _parse_func(function): + func.uri = uri + if ( + func.uri in self._extension_mapping + and function["name"] in self._extension_mapping[func.uri] + ): + self._extension_mapping[func.uri][function["name"]].append(func) + else: + self._extension_mapping[func.uri][function["name"]] = [func] + + # TODO add an optional return type check + def lookup_function( + self, uri: str, function_name: str, signature: tuple + ) -> Optional[tuple[FunctionEntry, Type]]: + uri = self.uri_aliases.get(uri, uri) + + if ( + uri not in self._extension_mapping + or function_name not in self._extension_mapping[uri] + ): + return None + functions = self._extension_mapping[uri][function_name] + for f in functions: + assert isinstance(f, FunctionEntry) + rtn = f.satisfies_signature(signature) + if rtn is not None: + return (f, rtn) + + return None diff --git a/subframe/extensions/__init__.py b/subframe/extensions/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/subframe/extensions/extension_function.py b/subframe/extensions/extension_function.py deleted file mode 100644 index 51f5921..0000000 --- a/subframe/extensions/extension_function.py +++ /dev/null @@ -1,21 +0,0 @@ -class ExtensionFunction: - def __init__(self, function_definition: dict, anchor: int): - self.function_definition = function_definition - self.anchor = anchor - - def lookup_signature(self, args: list[str]): - for impl in self.function_definition["impls"]: - impl_args = impl.get("args", []) - if len(impl_args) == len(args) and all( - [ - x == y or y.startswith("any") # TODO - for x, y in zip(args, [x["value"] for x in impl_args]) - ] - ): - return ( - self.anchor, - self.function_definition["name"] - + ":" - + "_".join([x["value"] for x in impl_args]), - impl, - ) diff --git a/subframe/extensions/extension_registry.py b/subframe/extensions/extension_registry.py deleted file mode 100644 index 60cdbfa..0000000 --- a/subframe/extensions/extension_registry.py +++ /dev/null @@ -1,57 +0,0 @@ -import yaml -import os -from .extension_function import ExtensionFunction - - -class ExtensionRegistry: - def __init__(self, urls) -> None: - self.registry = {} - self.num_functions = 0 - for url in urls: - self.load_url(url) - - def load_url(self, url): - resource_url = url.replace( - "https://github.com/substrait-io/substrait/blob/main/", "" - ) - - resource_url = os.path.join(os.path.dirname(__file__), resource_url) - with open(resource_url, "r") as f: - body = f.read() - self.registry[url] = {} - - content = yaml.load(body, Loader=yaml.FullLoader) - - if "scalar_functions" in content: - self.registry[url]["scalar_functions"] = {} - for i, f in enumerate(content["scalar_functions"]): - if f["name"] in self.registry[url]["scalar_functions"]: - self.registry[url]["scalar_functions"][f["name"]].append( - ExtensionFunction(f, self.num_functions + i) - ) - else: - self.registry[url]["scalar_functions"][f["name"]] = [ - ExtensionFunction(f, self.num_functions + i) - ] - - self.num_functions += len(content["scalar_functions"]) - - if "aggregate_functions" in content: - self.registry[url]["aggregate_functions"] = {} - for i, f in enumerate(content["aggregate_functions"]): - if f["name"] in self.registry[url]["aggregate_functions"]: - self.registry[url]["aggregate_functions"][f["name"]].append( - ExtensionFunction(f, self.num_functions + i) - ) - else: - self.registry[url]["aggregate_functions"][f["name"]] = [ - ExtensionFunction(f, self.num_functions + i) - ] - - self.num_functions += len(content["aggregate_functions"]) - - def lookup_scalar_function(self, url: str, name: str) -> ExtensionFunction: - return self.registry[url]["scalar_functions"][name] - - def lookup_aggregate_function(self, url: str, name: str) -> ExtensionFunction: - return self.registry[url]["aggregate_functions"][name] diff --git a/subframe/extensions/extensions/functions_aggregate_approx.yaml b/subframe/extensions/extensions/functions_aggregate_approx.yaml deleted file mode 100644 index c77caec..0000000 --- a/subframe/extensions/extensions/functions_aggregate_approx.yaml +++ /dev/null @@ -1,18 +0,0 @@ -%YAML 1.2 ---- -aggregate_functions: - - name: "approx_count_distinct" - description: >- - Calculates the approximate number of rows that contain distinct values of the expression argument using - HyperLogLog. This function provides an alternative to the COUNT (DISTINCT expression) function, which - returns the exact number of rows that contain distinct values of an expression. APPROX_COUNT_DISTINCT - processes large amounts of data significantly faster than COUNT, with negligible deviation from the exact - result. - impls: - - args: - - name: x - value: any - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: binary - return: i64 diff --git a/subframe/extensions/extensions/functions_aggregate_decimal_output.yaml b/subframe/extensions/extensions/functions_aggregate_decimal_output.yaml deleted file mode 100644 index 13a3b2e..0000000 --- a/subframe/extensions/extensions/functions_aggregate_decimal_output.yaml +++ /dev/null @@ -1,41 +0,0 @@ -%YAML 1.2 ---- -aggregate_functions: - - name: "count" - description: Count a set of values. Result is returned as a decimal instead of i64. - impls: - - args: - - name: x - value: any - options: - overflow: - values: [SILENT, SATURATE, ERROR] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: decimal<38,0> - return: decimal<38,0> - - name: "count" - description: "Count a set of records (not field referenced). Result is returned as a decimal instead of i64." - impls: - - options: - overflow: - values: [SILENT, SATURATE, ERROR] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: decimal<38,0> - return: decimal<38,0> - - name: "approx_count_distinct" - description: >- - Calculates the approximate number of rows that contain distinct values of the expression argument using - HyperLogLog. This function provides an alternative to the COUNT (DISTINCT expression) function, which - returns the exact number of rows that contain distinct values of an expression. APPROX_COUNT_DISTINCT - processes large amounts of data significantly faster than COUNT, with negligible deviation from the exact - result. Result is returned as a decimal instead of i64. - impls: - - args: - - name: x - value: any - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: binary - return: decimal<38,0> diff --git a/subframe/extensions/extensions/functions_aggregate_generic.yaml b/subframe/extensions/extensions/functions_aggregate_generic.yaml deleted file mode 100644 index 4db63ec..0000000 --- a/subframe/extensions/extensions/functions_aggregate_generic.yaml +++ /dev/null @@ -1,42 +0,0 @@ -%YAML 1.2 ---- -aggregate_functions: - - name: "count" - description: Count a set of values - impls: - - args: - - name: x - value: any - options: - overflow: - values: [SILENT, SATURATE, ERROR] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64 - return: i64 - - name: "count" - description: "Count a set of records (not field referenced)" - impls: - - options: - overflow: - values: [SILENT, SATURATE, ERROR] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64 - return: i64 - - name: "any_value" - description: > - Selects an arbitrary value from a group of values. - - If the input is empty, the function returns null. - impls: - - args: - - name: x - value: any1 - options: - ignore_nulls: - values: [ "TRUE", "FALSE" ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: any1? - return: any1? diff --git a/subframe/extensions/extensions/functions_arithmetic.yaml b/subframe/extensions/extensions/functions_arithmetic.yaml deleted file mode 100644 index 050af65..0000000 --- a/subframe/extensions/extensions/functions_arithmetic.yaml +++ /dev/null @@ -1,1844 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "add" - description: "Add two values." - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i32 - - args: - - value: i64 - - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - args: - - name: x - value: fp32 - - name: y - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - - name: y - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "subtract" - description: "Subtract one value from another." - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - args: - - name: x - value: fp32 - - name: y - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - - name: y - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "multiply" - description: "Multiply two values." - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - args: - - name: x - value: fp32 - - name: y - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - - name: y - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "divide" - description: > - Divide x by y. In the case of integer division, partial values are truncated (i.e. rounded towards 0). - The `on_division_by_zero` option governs behavior in cases where y is 0. If the option is IEEE then - the IEEE754 standard is followed: all values except +/-infinity return NaN and +/-infinity are unchanged. - If the option is LIMIT then the result is +/-infinity in all cases. - If either x or y are NaN then behavior will be governed by `on_domain_error`. - If x and y are both +/-infinity, behavior will be governed by `on_domain_error`. - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - on_domain_error: - values: [ "NULL", ERROR ] - on_division_by_zero: - values: [ "NULL", ERROR ] - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - on_domain_error: - values: [ "NULL", ERROR ] - on_division_by_zero: - values: [ "NULL", ERROR ] - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - on_domain_error: - values: [ "NULL", ERROR ] - on_division_by_zero: - values: [ "NULL", ERROR ] - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - on_domain_error: - values: [ "NULL", ERROR ] - on_division_by_zero: - values: [ "NULL", ERROR ] - return: i64 - - args: - - name: x - value: fp32 - - name: y - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_division_by_zero: - values: [ IEEE, LIMIT, "NULL", ERROR ] - return: fp32 - - args: - - name: x - value: fp64 - - name: y - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_division_by_zero: - values: [ IEEE, LIMIT, "NULL", ERROR ] - return: fp64 - - - name: "negate" - description: "Negation of the value" - impls: - - args: - - name: x - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i8 - - args: - - name: x - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i16 - - args: - - name: x - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i32 - - args: - - name: x - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - args: - - name: x - value: fp32 - return: fp32 - - args: - - name: x - value: fp64 - return: fp64 - - - name: "modulus" - description: > - Calculate the remainder (r) when dividing dividend (x) by divisor (y). - - In mathematics, many conventions for the modulus (mod) operation exists. The result of a mod operation - depends on the software implementation and underlying hardware. Substrait is a format for describing compute - operations on structured data and designed for interoperability. Therefore the user is responsible for determining - a definition of division as defined by the quotient (q). - - The following basic conditions of division are satisfied: - (1) q ∈ ℤ (the quotient is an integer) - (2) x = y * q + r (division rule) - (3) abs(r) < abs(y) - where q is the quotient. - - The `division_type` option determines the mathematical definition of quotient to use in the above definition of - division. - - When `division_type`=TRUNCATE, q = trunc(x/y). - When `division_type`=FLOOR, q = floor(x/y). - - In the cases of TRUNCATE and FLOOR division: remainder r = x - round_func(x/y) - - The `on_domain_error` option governs behavior in cases where y is 0, y is +/-inf, or x is +/-inf. In these cases - the mod is undefined. - The `overflow` option governs behavior when integer overflow occurs. - If x and y are both 0 or both +/-infinity, behavior will be governed by `on_domain_error`. - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - options: - division_type: - values: [ TRUNCATE, FLOOR ] - overflow: - values: [ SILENT, SATURATE, ERROR ] - on_domain_error: - values: [ "NULL", ERROR ] - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - options: - division_type: - values: [ TRUNCATE, FLOOR ] - overflow: - values: [ SILENT, SATURATE, ERROR ] - on_domain_error: - values: [ "NULL", ERROR ] - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - options: - division_type: - values: [ TRUNCATE, FLOOR ] - overflow: - values: [ SILENT, SATURATE, ERROR ] - on_domain_error: - values: [ "NULL", ERROR ] - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - options: - division_type: - values: [ TRUNCATE, FLOOR ] - overflow: - values: [ SILENT, SATURATE, ERROR ] - on_domain_error: - values: [ "NULL", ERROR ] - return: i64 - - - name: "power" - description: "Take the power with x as the base and y as exponent." - impls: - - args: - - name: x - value: i64 - - name: y - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - args: - - name: x - value: fp32 - - name: y - value: fp32 - return: fp32 - - args: - - name: x - value: fp64 - - name: y - value: fp64 - return: fp64 - - - name: "sqrt" - description: "Square root of the value" - impls: - - args: - - name: x - value: i64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - - name: "exp" - description: "The mathematical constant e, raised to the power of the value." - impls: - - args: - - name: x - value: i64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "cos" - description: "Get the cosine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "sin" - description: "Get the sine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "tan" - description: "Get the tangent of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "cosh" - description: "Get the hyperbolic cosine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "sinh" - description: "Get the hyperbolic sine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "tanh" - description: "Get the hyperbolic tangent of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "acos" - description: "Get the arccosine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - - name: "asin" - description: "Get the arcsine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - - name: "atan" - description: "Get the arctangent of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "acosh" - description: "Get the hyperbolic arccosine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - - name: "asinh" - description: "Get the hyperbolic arcsine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "atanh" - description: "Get the hyperbolic arctangent of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - - name: "atan2" - description: "Get the arctangent of values given as x/y pairs." - impls: - - args: - - name: x - value: fp32 - - name: y - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp32 - - args: - - name: x - value: fp64 - - name: y - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - - name: "radians" - description: > - Converts angle `x` in degrees to radians. - - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "degrees" - description: > - Converts angle `x` in radians to degrees. - - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "abs" - description: > - Calculate the absolute value of the argument. - - Integer values allow the specification of overflow behavior to handle the - unevenness of the twos complement, e.g. Int8 range [-128 : 127]. - impls: - - args: - - name: x - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i8 - - args: - - name: x - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i16 - - args: - - name: x - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i32 - - args: - - name: x - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - args: - - name: x - value: fp32 - return: fp32 - - args: - - name: x - value: fp64 - return: fp64 - - - name: "sign" - description: > - Return the signedness of the argument. - - Integer values return signedness with the same type as the input. - Possible return values are [-1, 0, 1] - - Floating point values return signedness with the same type as the input. - Possible return values are [-1.0, -0.0, 0.0, 1.0, NaN] - impls: - - args: - - name: x - value: i8 - return: i8 - - args: - - name: x - value: i16 - return: i16 - - args: - - name: x - value: i32 - return: i32 - - args: - - name: x - value: i64 - return: i64 - - args: - - name: x - value: fp32 - return: fp32 - - args: - - name: x - value: fp64 - return: fp64 - - - name: "factorial" - description: > - Return the factorial of a given integer input. - - The factorial of 0! is 1 by convention. - - Negative inputs will raise an error. - impls: - - args: - - value: i32 - name: "n" - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i32 - - args: - - value: i64 - name: "n" - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - - name: "bitwise_not" - description: > - Return the bitwise NOT result for one integer input. - - impls: - - args: - - name: x - value: i8 - return: i8 - - args: - - name: x - value: i16 - return: i16 - - args: - - name: x - value: i32 - return: i32 - - args: - - name: x - value: i64 - return: i64 - - - name: "bitwise_and" - description: > - Return the bitwise AND result for two integer inputs. - - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - return: i64 - - - name: "bitwise_or" - description: > - Return the bitwise OR result for two given integer inputs. - - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - return: i64 - - - name: "bitwise_xor" - description: > - Return the bitwise XOR result for two integer inputs. - - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - return: i64 - -aggregate_functions: - - name: "sum" - description: Sum a set of values. The sum of zero elements yields null. - impls: - - args: - - name: x - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: x - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: x - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: x - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: x - value: fp32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp64? - return: fp64? - - args: - - name: x - value: fp64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp64? - return: fp64? - - name: "sum0" - description: > - Sum a set of values. The sum of zero elements yields zero. - - Null values are ignored. - impls: - - args: - - name: x - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64 - return: i64 - - args: - - name: x - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64 - return: i64 - - args: - - name: x - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64 - return: i64 - - args: - - name: x - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64 - return: i64 - - args: - - name: x - value: fp32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp64 - return: fp64 - - args: - - name: x - value: fp64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp64 - return: fp64 - - name: "avg" - description: Average a set of values. For integral types, this truncates partial values. - impls: - - args: - - name: x - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: i8? - - args: - - name: x - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: i16? - - args: - - name: x - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: i32? - - args: - - name: x - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: i64? - - args: - - name: x - value: fp32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: fp32? - - args: - - name: x - value: fp64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: fp64? - - name: "min" - description: Min a set of values. - impls: - - args: - - name: x - value: i8 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i8? - return: i8? - - args: - - name: x - value: i16 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i16? - return: i16? - - args: - - name: x - value: i32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i32? - return: i32? - - args: - - name: x - value: i64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: x - value: fp32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp32? - return: fp32? - - args: - - name: x - value: fp64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp64? - return: fp64? - - name: "max" - description: Max a set of values. - impls: - - args: - - name: x - value: i8 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i8? - return: i8? - - args: - - name: x - value: i16 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i16? - return: i16? - - args: - - name: x - value: i32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i32? - return: i32? - - args: - - name: x - value: i64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: x - value: fp32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp32? - return: fp32? - - args: - - name: x - value: fp64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp64? - return: fp64? - - name: "product" - description: Product of a set of values. Returns 1 for empty input. - impls: - - args: - - name: x - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: MIRROR - decomposable: MANY - intermediate: i64 - return: i8 - - args: - - name: x - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: MIRROR - decomposable: MANY - intermediate: i64 - return: i16 - - args: - - name: x - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: MIRROR - decomposable: MANY - intermediate: i64 - return: i32 - - args: - - name: x - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: MIRROR - decomposable: MANY - intermediate: i64 - return: i64 - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: MIRROR - decomposable: MANY - intermediate: fp64 - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: MIRROR - decomposable: MANY - intermediate: fp64 - return: fp64 - - name: "std_dev" - description: Calculates standard-deviation for a set of values. - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - distribution: - values: [ SAMPLE, POPULATION] - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - distribution: - values: [ SAMPLE, POPULATION] - nullability: DECLARED_OUTPUT - return: fp64? - - name: "variance" - description: Calculates variance for a set of values. - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - distribution: - values: [ SAMPLE, POPULATION] - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - distribution: - values: [ SAMPLE, POPULATION] - nullability: DECLARED_OUTPUT - return: fp64? - - name: "corr" - description: > - Calculates the value of Pearson's correlation coefficient between `x` and `y`. - If there is no input, null is returned. - impls: - - args: - - name: x - value: fp32 - - name: y - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - name: x - value: fp64 - - name: y - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: fp64? - - name: "mode" - description: > - Calculates mode for a set of values. - If there is no input, null is returned. - impls: - - args: - - name: x - value: i8 - nullability: DECLARED_OUTPUT - return: i8? - - args: - - name: x - value: i16 - nullability: DECLARED_OUTPUT - return: i16? - - args: - - name: x - value: i32 - nullability: DECLARED_OUTPUT - return: i32? - - args: - - name: x - value: i64 - nullability: DECLARED_OUTPUT - return: i64? - - args: - - name: x - value: fp32 - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - name: x - value: fp64 - nullability: DECLARED_OUTPUT - return: fp64? - - name: "median" - description: > - Calculate the median for a set of values. - - Returns null if applied to zero records. For the integer implementations, - the rounding option determines how the median should be rounded if it ends - up midway between two values. For the floating point implementations, - they specify the usual floating point rounding mode. - impls: - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - - name: x - value: i8 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: i8? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - - name: x - value: i16 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: i16? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - - name: x - value: i32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: i32? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - - name: x - value: i64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: i64? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: fp64? - - name: "quantile" - description: > - Calculates quantiles for a set of values. - - This function will divide the aggregated values (passed via the - distribution argument) over N equally-sized bins, where N is passed - via a constant argument. It will then return the values at the - boundaries of these bins in list form. If the input is appropriately - sorted, this computes the quantiles of the distribution. - - The function can optionally return the first and/or last element of - the input, as specified by the `boundaries` argument. If the input is - appropriately sorted, this will thus be the minimum and/or maximum - values of the distribution. - - When the boundaries do not lie exactly on elements of the incoming - distribution, the function will interpolate between the two nearby - elements. If the interpolated value cannot be represented exactly, - the `rounding` option controls how the value should be selected or - computed. - - The function fails and returns null in the following cases: - - `n` is null or less than one; - - any value in `distribution` is null. - - The function returns an empty list if `n` equals 1 and `boundaries` is - set to `NEITHER`. - - impls: - - args: - - name: boundaries - description: > - Which boundaries to include. For NEITHER, the output will have - n-1 elements, for MINIMUM and MAXIMUM it will have n elements, - and for BOTH it will have n+1 elements. - options: [ NEITHER, MINIMUM, MAXIMUM, BOTH ] - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - - value: i64 - constant: true - name: n - description: > - A positive integer which defines the number of quantile - partitions. - - value: any - name: distribution - description: > - The data for which the quantiles should be computed. - options: - rounding: - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. For floating point numbers, it specifies the IEEE - 754 rounding mode (as it does for all other floating point - operations). For integer types: - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - For non-numeric types, the behavior is the same as for integer - types, but applied to the index of the value in distribution. - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - ordered: true - return: LIST? - -window_functions: - - name: "row_number" - description: "the number of the current row within its partition." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i64? - window_type: PARTITION - - name: "rank" - description: "the rank of the current row, with gaps." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i64? - window_type: PARTITION - - name: "dense_rank" - description: "the rank of the current row, without gaps." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i64? - window_type: PARTITION - - name: "percent_rank" - description: "the relative rank of the current row." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: fp64? - window_type: PARTITION - - name: "cume_dist" - description: "the cumulative distribution." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: fp64? - window_type: PARTITION - - name: "ntile" - description: "Return an integer ranging from 1 to the argument value,dividing the partition as equally as possible." - impls: - - args: - - name: x - value: i32 - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i32? - window_type: PARTITION - - args: - - name: x - value: i64 - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i64? - window_type: PARTITION - - name: "first_value" - description: > - Returns the first value in the window. - impls: - - args: - - value: any1 - name: expression - nullability: DECLARED_OUTPUT - decomposable: NONE - return: any1 - window_type: PARTITION - - name: "last_value" - description: > - Returns the last value in the window. - impls: - - args: - - value: any1 - name: expression - nullability: DECLARED_OUTPUT - decomposable: NONE - return: any1 - window_type: PARTITION - - name: "nth_value" - description: > - Returns a value from the nth row based on the `window_offset`. `window_offset` should - be a positive integer. If the value of the `window_offset` is outside the range - of the window, `null` is returned. - - The `on_domain_error` option governs behavior in cases where `window_offset` is not - a positive integer or `null`. - impls: - - args: - - value: any1 - name: expression - - value: i32 - name: window_offset - options: - on_domain_error: - values: [ NAN, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: any1? - window_type: PARTITION - - name: "lead" - description: > - Return a value from a following row based on a specified physical offset. - This allows you to compare a value in the current row against a following row. - - The `expression` is evaluated against a row that comes after the current row based - on the `row_offset`. The `row_offset` should be a positive integer and is set to - 1 if not specified explicitly. If the `row_offset` is negative, the expression - will be evaluated against a row coming before the current row, similar to the `lag` - function. A `row_offset` of `null` will return `null`. The function returns the - `default` input value if `row_offset` goes beyond the scope of the window. - If a `default` value is not specified, it is set to `null`. - - Example comparing the sales of the current year to the following year. - `row_offset` of 1. - | year | sales | next_year_sales | - | 2019 | 20.50 | 30.00 | - | 2020 | 30.00 | 45.99 | - | 2021 | 45.99 | null | - impls: - - args: - - value: any1 - name: expression - nullability: DECLARED_OUTPUT - decomposable: NONE - return: any1? - window_type: PARTITION - - args: - - value: any1 - name: expression - - value: i32 - name: row_offset - nullability: DECLARED_OUTPUT - decomposable: NONE - return: any1? - window_type: PARTITION - - args: - - value: any1 - name: expression - - value: i32 - name: row_offset - - value: any1 - name: default - nullability: DECLARED_OUTPUT - decomposable: NONE - return: any1? - window_type: PARTITION - - name: "lag" - description: > - Return a column value from a previous row based on a specified physical offset. - This allows you to compare a value in the current row against a previous row. - - The `expression` is evaluated against a row that comes before the current row based - on the `row_offset`. The `expression` can be a column, expression or subquery that - evaluates to a single value. The `row_offset` should be a positive integer and is set to - 1 if not specified explicitly. If the `row_offset` is negative, the expression will - be evaluated against a row coming after the current row, similar to the `lead` function. - A `row_offset` of `null` will return `null`. The function returns the `default` - input value if `row_offset` goes beyond the scope of the partition. If a `default` - value is not specified, it is set to `null`. - - Example comparing the sales of the current year to the previous year. - `row_offset` of 1. - | year | sales | previous_year_sales | - | 2019 | 20.50 | null | - | 2020 | 30.00 | 20.50 | - | 2021 | 45.99 | 30.00 | - impls: - - args: - - value: any1 - name: expression - nullability: DECLARED_OUTPUT - decomposable: NONE - return: any1? - window_type: PARTITION - - args: - - value: any1 - name: expression - - value: i32 - name: row_offset - nullability: DECLARED_OUTPUT - decomposable: NONE - return: any1? - window_type: PARTITION - - args: - - value: any1 - name: expression - - value: i32 - name: row_offset - - value: any1 - name: default - nullability: DECLARED_OUTPUT - decomposable: NONE - return: any1? - window_type: PARTITION diff --git a/subframe/extensions/extensions/functions_arithmetic_decimal.yaml b/subframe/extensions/extensions/functions_arithmetic_decimal.yaml deleted file mode 100644 index 57cdbe3..0000000 --- a/subframe/extensions/extensions/functions_arithmetic_decimal.yaml +++ /dev/null @@ -1,248 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "add" - description: "Add two decimal values." - impls: - - args: - - name: x - value: decimal - - name: y - value: decimal - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: |- - init_scale = max(S1,S2) - init_prec = init_scale + max(P1 - S1, P2 - S2) + 1 - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL - - - name: "subtract" - impls: - - args: - - name: x - value: decimal - - name: y - value: decimal - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: |- - init_scale = max(S1,S2) - init_prec = init_scale + max(P1 - S1, P2 - S2) + 1 - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL - - - name: "multiply" - impls: - - args: - - name: x - value: decimal - - name: y - value: decimal - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: |- - init_scale = S1 + S2 - init_prec = P1 + P2 + 1 - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL - - - name: "divide" - impls: - - args: - - name: x - value: decimal - - name: y - value: decimal - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: |- - init_scale = max(6, S1 + P2 + 1) - init_prec = P1 - S1 + P2 + init_scale - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL - - - name: "modulus" - impls: - - args: - - name: x - value: decimal - - name: y - value: decimal - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: |- - init_scale = max(S1,S2) - init_prec = min(P1 - S1, P2 - S2) + init_scale - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL - - - name: "abs" - description: Calculate the absolute value of the argument. - impls: - - args: - - name: x - value: decimal - return: decimal - - name: "bitwise_and" - description: > - Return the bitwise AND result for two decimal inputs. - In inputs scale must be 0 (i.e. only integer types are allowed) - impls: - - args: - - name: x - value: "DECIMAL" - - name: y - value: "DECIMAL" - return: |- - max_precision = max(P1, P2) - DECIMAL - - name: "bitwise_or" - description: > - Return the bitwise OR result for two given decimal inputs. - In inputs scale must be 0 (i.e. only integer types are allowed) - impls: - - args: - - name: x - value: "DECIMAL" - - name: y - value: "DECIMAL" - return: |- - max_precision = max(P1, P2) - DECIMAL - - name: "bitwise_xor" - description: > - Return the bitwise XOR result for two given decimal inputs. - In inputs scale must be 0 (i.e. only integer types are allowed) - impls: - - args: - - name: x - value: "DECIMAL" - - name: y - value: "DECIMAL" - return: |- - max_precision = max(P1, P2) - DECIMAL - - name: "sqrt" - description: Square root of the value. Sqrt of 0 is 0 and sqrt of negative values will raise an error. - impls: - - args: - - name: x - value: "DECIMAL" - return: fp64 - - name: "factorial" - description: > - Return the factorial of a given decimal input. Scale should be 0 for factorial decimal input. - The factorial of 0! is 1 by convention. Negative inputs will raise an error. - Input which cause overflow of result will raise an error. - impls: - - args: - - name: "n" - value: "DECIMAL" - return: "DECIMAL<38,0>" - - - name: "power" - description: "Take the power with x as the base and y as exponent. - Behavior for complex number result is indicated by option complex_number_result" - impls: - - args: - - name: x - value: "DECIMAL" - - name: y - value: "DECIMAL" - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - complex_number_result: - values: [ NAN, ERROR ] - return: fp64 - -aggregate_functions: - - name: "sum" - description: Sum a set of values. - impls: - - args: - - name: x - value: "DECIMAL" - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "DECIMAL?<38,S>" - return: "DECIMAL?<38,S>" - - name: "avg" - description: Average a set of values. - impls: - - args: - - name: x - value: "DECIMAL" - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT,i64>" - return: "DECIMAL<38,S>" - - name: "min" - description: Min a set of values. - impls: - - args: - - name: x - value: "DECIMAL" - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "DECIMAL?" - return: "DECIMAL?" - - name: "max" - description: Max a set of values. - impls: - - args: - - name: x - value: "DECIMAL" - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "DECIMAL?" - return: "DECIMAL?" - - name: "sum0" - description: > - Sum a set of values. The sum of zero elements yields zero. - - Null values are ignored. - impls: - - args: - - name: x - value: "DECIMAL" - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "DECIMAL<38,S>" - return: "DECIMAL<38,S>" diff --git a/subframe/extensions/extensions/functions_boolean.yaml b/subframe/extensions/extensions/functions_boolean.yaml deleted file mode 100644 index 22ae296..0000000 --- a/subframe/extensions/extensions/functions_boolean.yaml +++ /dev/null @@ -1,140 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: or - description: > - The boolean `or` using Kleene logic. - - This function behaves as follows with nulls: - - true or null = true - - null or true = true - - false or null = null - - null or false = null - - null or null = null - - In other words, in this context a null value really means "unknown", and - an unknown value `or` true is always true. - - Behavior for 0 or 1 inputs is as follows: - or() -> false - or(x) -> x - impls: - - args: - - value: boolean? - name: a - variadic: - min: 0 - return: boolean? - - - name: and - description: > - The boolean `and` using Kleene logic. - - This function behaves as follows with nulls: - - true and null = null - - null and true = null - - false and null = false - - null and false = false - - null and null = null - - In other words, in this context a null value really means "unknown", and - an unknown value `and` false is always false. - - Behavior for 0 or 1 inputs is as follows: - and() -> true - and(x) -> x - impls: - - args: - - value: boolean? - name: a - variadic: - min: 0 - return: boolean? - - - name: and_not - description: > - The boolean `and` of one value and the negation of the other using Kleene logic. - - This function behaves as follows with nulls: - - true and not null = null - - null and not false = null - - false and not null = false - - null and not true = false - - null and not null = null - - In other words, in this context a null value really means "unknown", and - an unknown value `and not` true is always false, as is false `and not` an - unknown value. - impls: - - args: - - value: boolean? - name: a - - value: boolean? - name: b - return: boolean? - - - name: xor - description: > - The boolean `xor` of two values using Kleene logic. - - When a null is encountered in either input, a null is output. - impls: - - args: - - value: boolean? - name: a - - value: boolean? - name: b - return: boolean? - - - name: not - description: > - The `not` of a boolean value. - - When a null is input, a null is output. - impls: - - args: - - value: boolean? - name: a - return: boolean? - -aggregate_functions: - - - name: "bool_and" - description: > - If any value in the input is false, false is returned. If the input is - empty or only contains nulls, null is returned. Otherwise, true is - returned. - impls: - - args: - - value: boolean - name: a - nullability: DECLARED_OUTPUT - return: boolean? - - - name: "bool_or" - description: > - If any value in the input is true, true is returned. If the input is - empty or only contains nulls, null is returned. Otherwise, false is - returned. - impls: - - args: - - value: boolean - name: a - nullability: DECLARED_OUTPUT - return: boolean? diff --git a/subframe/extensions/extensions/functions_comparison.yaml b/subframe/extensions/extensions/functions_comparison.yaml deleted file mode 100644 index dffc572..0000000 --- a/subframe/extensions/extensions/functions_comparison.yaml +++ /dev/null @@ -1,289 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "not_equal" - description: > - Whether two values are not_equal. - - `not_equal(x, y) := (x != y)` - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: boolean - - - name: "equal" - description: > - Whether two values are equal. - - `equal(x, y) := (x == y)` - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: boolean - - - name: "is_not_distinct_from" - description: > - Whether two values are equal. - - This function treats `null` values as comparable, so - - `is_not_distinct_from(null, null) == True` - - This is in contrast to `equal`, in which `null` values do not compare. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: boolean - nullability: DECLARED_OUTPUT - - - name: "is_distinct_from" - description: > - Whether two values are not equal. - - This function treats `null` values as comparable, so - - `is_distinct_from(null, null) == False` - - This is in contrast to `equal`, in which `null` values do not compare. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: boolean - nullability: DECLARED_OUTPUT - - - name: "lt" - description: > - Less than. - - lt(x, y) := (x < y) - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: boolean - - - name: "gt" - description: > - Greater than. - - gt(x, y) := (x > y) - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: boolean - - - name: "lte" - description: > - Less than or equal to. - - lte(x, y) := (x <= y) - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: boolean - - - name: "gte" - description: > - Greater than or equal to. - - gte(x, y) := (x >= y) - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: boolean - - - name: "between" - description: >- - Whether the `expression` is greater than or equal to `low` and less than or equal to `high`. - - `expression` BETWEEN `low` AND `high` - - If `low`, `high`, or `expression` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: expression - description: The expression to test for in the range defined by `low` and `high`. - - value: any1 - name: low - description: The value to check if greater than or equal to. - - value: any1 - name: high - description: The value to check if less than or equal to. - return: boolean - - - name: "is_null" - description: Whether a value is null. NaN is not null. - impls: - - args: - - value: any1 - name: x - return: boolean - nullability: DECLARED_OUTPUT - - - name: "is_not_null" - description: Whether a value is not null. NaN is not null. - impls: - - args: - - value: any1 - name: x - return: boolean - nullability: DECLARED_OUTPUT - - - name: "is_nan" - description: > - Whether a value is not a number. - - If `x` is `null`, `null` is returned. - impls: - - args: - - value: fp32 - name: x - return: boolean - - args: - - value: fp64 - name: x - return: boolean - - - name: "is_finite" - description: > - Whether a value is finite (neither infinite nor NaN). - - If `x` is `null`, `null` is returned. - impls: - - args: - - value: fp32 - name: x - return: boolean - - args: - - value: fp64 - name: x - return: boolean - - - name: "is_infinite" - description: > - Whether a value is infinite. - - If `x` is `null`, `null` is returned. - impls: - - args: - - value: fp32 - name: x - return: boolean - - args: - - value: fp64 - name: x - return: boolean - - - name: "nullif" - description: If two values are equal, return null. Otherwise, return the first value. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: any1 - - - name: "coalesce" - description: >- - Evaluate arguments from left to right and return the first argument that is not null. Once - a non-null argument is found, the remaining arguments are not evaluated. - - If all arguments are null, return null. - impls: - - args: - - value: any1 - variadic: - min: 2 - return: any1 - - - name: "least" - description: >- - Evaluates each argument and returns the smallest one. - The function will return null if any argument evaluates to null. - impls: - - args: - - value: any1 - variadic: - min: 2 - return: any1 - nullability: MIRROR - - - name: "least_skip_null" - description: >- - Evaluates each argument and returns the smallest one. - The function will return null only if all arguments evaluate to null. - impls: - - args: - - value: any1 - variadic: - min: 2 - return: any1 - # NOTE: The return type nullability as described above cannot be expressed currently - # See https://github.com/substrait-io/substrait/issues/601 - # Using MIRROR for now until it can be expressed - nullability: MIRROR - - - name: "greatest" - description: >- - Evaluates each argument and returns the largest one. - The function will return null if any argument evaluates to null. - impls: - - args: - - value: any1 - variadic: - min: 2 - return: any1 - nullability: MIRROR - - - name: "greatest_skip_null" - description: >- - Evaluates each argument and returns the largest one. - The function will return null only if all arguments evaluate to null. - impls: - - args: - - value: any1 - variadic: - min: 2 - return: any1 - # NOTE: The return type nullability as described above cannot be expressed currently - # See https://github.com/substrait-io/substrait/issues/601 - # Using MIRROR for now until it can be expressed - nullability: MIRROR diff --git a/subframe/extensions/extensions/functions_datetime.yaml b/subframe/extensions/extensions/functions_datetime.yaml deleted file mode 100644 index 5847c27..0000000 --- a/subframe/extensions/extensions/functions_datetime.yaml +++ /dev/null @@ -1,1110 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: extract - description: >- - Extract portion of a date/time value. - * YEAR Return the year. - * ISO_YEAR Return the ISO 8601 week-numbering year. First week of an ISO year has the majority (4 or more) of - its days in January. - * US_YEAR Return the US epidemiological year. First week of US epidemiological year has the majority (4 or more) - of its days in January. Last week of US epidemiological year has the year's last Wednesday in it. US - epidemiological week starts on Sunday. - * QUARTER Return the number of the quarter within the year. January 1 through March 31 map to the first quarter, - April 1 through June 30 map to the second quarter, etc. - * MONTH Return the number of the month within the year. - * DAY Return the number of the day within the month. - * DAY_OF_YEAR Return the number of the day within the year. January 1 maps to the first day, February 1 maps to - the thirty-second day, etc. - * MONDAY_DAY_OF_WEEK Return the number of the day within the week, from Monday (first day) to Sunday (seventh - day). - * SUNDAY_DAY_OF_WEEK Return the number of the day within the week, from Sunday (first day) to Saturday (seventh - day). - * MONDAY_WEEK Return the number of the week within the year. First week starts on first Monday of January. - * SUNDAY_WEEK Return the number of the week within the year. First week starts on first Sunday of January. - * ISO_WEEK Return the number of the ISO week within the ISO year. First ISO week has the majority (4 or more) - of its days in January. ISO week starts on Monday. - * US_WEEK Return the number of the US week within the US year. First US week has the majority (4 or more) of - its days in January. US week starts on Sunday. - * HOUR Return the hour (0-23). - * MINUTE Return the minute (0-59). - * SECOND Return the second (0-59). - * MILLISECOND Return number of milliseconds since the last full second. - * MICROSECOND Return number of microseconds since the last full millisecond. - * NANOSECOND Return number of nanoseconds since the last full microsecond. - * SUBSECOND Return number of microseconds since the last full second of the given timestamp. - * UNIX_TIME Return number of seconds that have elapsed since 1970-01-01 00:00:00 UTC, ignoring leap seconds. - * TIMEZONE_OFFSET Return number of seconds of timezone offset to UTC. - - The range of values returned for QUARTER, MONTH, DAY, DAY_OF_YEAR, MONDAY_DAY_OF_WEEK, SUNDAY_DAY_OF_WEEK, - MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, and US_WEEK depends on whether counting starts at 1 or 0. This is governed - by the indexing option. - - When indexing is ONE: - * QUARTER returns values in range 1-4 - * MONTH returns values in range 1-12 - * DAY returns values in range 1-31 - * DAY_OF_YEAR returns values in range 1-366 - * MONDAY_DAY_OF_WEEK and SUNDAY_DAY_OF_WEEK return values in range 1-7 - * MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, and US_WEEK return values in range 1-53 - - When indexing is ZERO: - * QUARTER returns values in range 0-3 - * MONTH returns values in range 0-11 - * DAY returns values in range 0-30 - * DAY_OF_YEAR returns values in range 0-365 - * MONDAY_DAY_OF_WEEK and SUNDAY_DAY_OF_WEEK return values in range 0-6 - * MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, and US_WEEK return values in range 0-52 - - The indexing option must be specified when the component is QUARTER, MONTH, DAY, DAY_OF_YEAR, - MONDAY_DAY_OF_WEEK, SUNDAY_DAY_OF_WEEK, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, or US_WEEK. The - indexing option cannot be specified when the component is YEAR, ISO_YEAR, US_YEAR, HOUR, MINUTE, SECOND, - MILLISECOND, MICROSECOND, SUBSECOND, UNIX_TIME, or TIMEZONE_OFFSET. - - Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones). - Examples: "Pacific/Marquesas", "Etc/GMT+1". - If timezone is invalid an error is thrown. - impls: - - args: - - name: component - options: [ YEAR, ISO_YEAR, US_YEAR, HOUR, MINUTE, SECOND, - MILLISECOND, MICROSECOND, SUBSECOND, UNIX_TIME, TIMEZONE_OFFSET ] - description: The part of the value to extract. - - name: x - value: timestamp_tz - - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: i64 - - args: - - name: component - options: [ YEAR, ISO_YEAR, US_YEAR, HOUR, MINUTE, SECOND, - MILLISECOND, MICROSECOND, NANOSECOND, SUBSECOND, UNIX_TIME, TIMEZONE_OFFSET ] - description: The part of the value to extract. - - name: x - value: precision_timestamp_tz

- - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: i64 - - args: - - name: component - options: [ YEAR, ISO_YEAR, US_YEAR, HOUR, MINUTE, SECOND, - MILLISECOND, MICROSECOND, SUBSECOND, UNIX_TIME ] - description: The part of the value to extract. - - name: x - value: timestamp - return: i64 - - args: - - name: component - options: [ YEAR, ISO_YEAR, US_YEAR, HOUR, MINUTE, SECOND, - MILLISECOND, MICROSECOND, NANOSECOND, SUBSECOND, UNIX_TIME ] - description: The part of the value to extract. - - name: x - value: precision_timestamp

- return: i64 - - args: - - name: component - options: [ YEAR, ISO_YEAR, US_YEAR, UNIX_TIME ] - description: The part of the value to extract. - - name: x - value: date - return: i64 - - args: - - name: component - options: [ HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND, SUBSECOND ] - description: The part of the value to extract. - - name: x - value: time - return: i64 - - args: - - name: component - options: [ QUARTER, MONTH, DAY, DAY_OF_YEAR, MONDAY_DAY_OF_WEEK, - SUNDAY_DAY_OF_WEEK, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, US_WEEK ] - description: The part of the value to extract. - - name: indexing - options: [ ONE, ZERO ] - description: Start counting from 1 or 0. - - name: x - value: timestamp_tz - - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: i64 - - args: - - name: component - options: [ QUARTER, MONTH, DAY, DAY_OF_YEAR, MONDAY_DAY_OF_WEEK, - SUNDAY_DAY_OF_WEEK, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, US_WEEK ] - description: The part of the value to extract. - - name: indexing - options: [ ONE, ZERO ] - description: Start counting from 1 or 0. - - name: x - value: precision_timestamp_tz

- - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: i64 - - args: - - name: component - options: [ QUARTER, MONTH, DAY, DAY_OF_YEAR, MONDAY_DAY_OF_WEEK, - SUNDAY_DAY_OF_WEEK, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, US_WEEK ] - description: The part of the value to extract. - - name: indexing - options: [ ONE, ZERO ] - description: Start counting from 1 or 0. - - name: x - value: timestamp - return: i64 - - args: - - name: component - options: [ QUARTER, MONTH, DAY, DAY_OF_YEAR, MONDAY_DAY_OF_WEEK, - SUNDAY_DAY_OF_WEEK, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, US_WEEK ] - description: The part of the value to extract. - - name: indexing - options: [ ONE, ZERO ] - description: Start counting from 1 or 0. - - name: x - value: precision_timestamp

- return: i64 - - args: - - name: component - options: [ QUARTER, MONTH, DAY, DAY_OF_YEAR, MONDAY_DAY_OF_WEEK, - SUNDAY_DAY_OF_WEEK, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, US_WEEK ] - description: The part of the value to extract. - - name: indexing - options: [ ONE, ZERO ] - description: Start counting from 1 or 0. - - name: x - value: date - return: i64 - - - name: "extract_boolean" - description: >- - Extract boolean values of a date/time value. - * IS_LEAP_YEAR Return true if year of the given value is a leap year and false otherwise. - * IS_DST Return true if DST (Daylight Savings Time) is observed at the given value - in the given timezone. - - Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones). - Examples: "Pacific/Marquesas", "Etc/GMT+1". - If timezone is invalid an error is thrown. - impls: - - args: - - name: component - options: [ IS_LEAP_YEAR ] - description: The part of the value to extract. - - name: x - value: timestamp - return: boolean - - args: - - name: component - options: [ IS_LEAP_YEAR ] - description: The part of the value to extract. - - name: x - value: precision_timestamp

- return: boolean - - args: - - name: component - options: [ IS_LEAP_YEAR, IS_DST ] - description: The part of the value to extract. - - name: x - value: timestamp_tz - - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: boolean - - args: - - name: component - options: [ IS_LEAP_YEAR, IS_DST ] - description: The part of the value to extract. - - name: x - value: precision_timestamp_tz

- - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: boolean - - args: - - name: component - options: [ IS_LEAP_YEAR ] - description: The part of the value to extract. - - name: x - value: date - return: boolean - - - name: "add" - description: >- - Add an interval to a date/time type. - - Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones). - Examples: "Pacific/Marquesas", "Etc/GMT+1". - If timezone is invalid an error is thrown. - impls: - - args: - - name: x - value: timestamp - - name: y - value: interval_year - return: timestamp - - args: - - name: x - value: precision_timestamp

- - name: y - value: interval_year - return: precision_timestamp

- - args: - - name: x - value: timestamp_tz - - name: y - value: interval_year - - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: timestamp_tz - - args: - - name: x - value: precision_timestamp_tz

- - name: y - value: interval_year - - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: precision_timestamp_tz

- - args: - - name: x - value: date - - name: y - value: interval_year - return: timestamp - - args: - - name: x - value: timestamp - - name: y - value: interval_day

- return: timestamp - - args: - - name: x - value: precision_timestamp

- - name: y - value: interval_day

- return: precision_timestamp

- - args: - - name: x - value: timestamp_tz - - name: y - value: interval_day

- return: timestamp_tz - - args: - - name: x - value: precision_timestamp_tz

- - name: y - value: interval_day

- return: precision_timestamp_tz

- - args: - - name: x - value: date - - name: y - value: interval_day

- return: timestamp - - - name: "multiply" - description: Multiply an interval by an integral number. - impls: - - args: - - name: x - value: i8 - - name: y - value: interval_day

- return: interval_day

- - args: - - name: x - value: i16 - - name: y - value: interval_day

- return: interval_day

- - args: - - name: x - value: i32 - - name: y - value: interval_day

- return: interval_day

- - args: - - name: x - value: i64 - - name: y - value: interval_day

- return: interval_day

- - args: - - name: x - value: i8 - - name: y - value: interval_year - return: interval_year - - args: - - name: x - value: i16 - - name: y - value: interval_year - return: interval_year - - args: - - name: x - value: i32 - - name: y - value: interval_year - return: interval_year - - args: - - name: x - value: i64 - - name: y - value: interval_year - return: interval_year - - - name: "add_intervals" - description: Add two intervals together. - impls: - - args: - - name: x - value: interval_day

- - name: y - value: interval_day

- return: interval_day

- - args: - - name: x - value: interval_year - - name: y - value: interval_year - return: interval_year - - - name: "subtract" - description: >- - Subtract an interval from a date/time type. - - Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones). - Examples: "Pacific/Marquesas", "Etc/GMT+1". - If timezone is invalid an error is thrown. - impls: - - args: - - name: x - value: timestamp - - name: y - value: interval_year - return: timestamp - - args: - - name: x - value: precision_timestamp

- - name: y - value: interval_year - return: precision_timestamp

- - args: - - name: x - value: timestamp_tz - - name: y - value: interval_year - return: timestamp_tz - - args: - - name: x - value: precision_timestamp_tz

- - name: y - value: interval_year - return: precision_timestamp_tz

- - args: - - name: x - value: timestamp_tz - - name: y - value: interval_year - - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: timestamp_tz - - args: - - name: x - value: precision_timestamp_tz

- - name: y - value: interval_year - - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: precision_timestamp_tz

- - args: - - name: x - value: date - - name: y - value: interval_year - return: date - - args: - - name: x - value: timestamp - - name: y - value: interval_day

- return: timestamp - - args: - - name: x - value: precision_timestamp

- - name: y - value: interval_day

- return: precision_timestamp

- - args: - - name: x - value: timestamp_tz - - name: y - value: interval_day

- return: timestamp_tz - - args: - - name: x - value: precision_timestamp_tz

- - name: y - value: interval_day

- return: precision_timestamp_tz

- - args: - - name: x - value: date - - name: y - value: interval_day

- return: date - - - name: "lte" - description: less than or equal to - impls: - - args: - - name: x - value: timestamp - - name: y - value: timestamp - return: boolean - - args: - - name: x - value: precision_timestamp

- - name: y - value: precision_timestamp

- return: boolean - - args: - - name: x - value: timestamp_tz - - name: y - value: timestamp_tz - return: boolean - - args: - - name: x - value: precision_timestamp_tz

- - name: y - value: precision_timestamp_tz

- return: boolean - - args: - - name: x - value: date - - name: y - value: date - return: boolean - - args: - - name: x - value: interval_day

- - name: y - value: interval_day

- return: boolean - - args: - - name: x - value: interval_year - - name: y - value: interval_year - return: boolean - - - name: "lt" - description: less than - impls: - - args: - - name: x - value: timestamp - - name: y - value: timestamp - return: boolean - - args: - - name: x - value: precision_timestamp

- - name: y - value: precision_timestamp

- return: boolean - - args: - - name: x - value: timestamp_tz - - name: y - value: timestamp_tz - return: boolean - - args: - - name: x - value: precision_timestamp_tz

- - name: y - value: precision_timestamp_tz

- return: boolean - - args: - - name: x - value: date - - name: y - value: date - return: boolean - - args: - - name: x - value: interval_day

- - name: y - value: interval_day

- return: boolean - - args: - - name: x - value: interval_year - - name: y - value: interval_year - return: boolean - - - name: "gte" - description: greater than or equal to - impls: - - args: - - name: x - value: timestamp - - name: y - value: timestamp - return: boolean - - args: - - name: x - value: precision_timestamp

- - name: y - value: precision_timestamp

- return: boolean - - args: - - name: x - value: timestamp_tz - - name: y - value: timestamp_tz - return: boolean - - args: - - name: x - value: precision_timestamp_tz

- - name: y - value: precision_timestamp_tz

- return: boolean - - args: - - name: x - value: date - - name: y - value: date - return: boolean - - args: - - name: x - value: interval_day

- - name: y - value: interval_day

- return: boolean - - args: - - name: x - value: interval_year - - name: y - value: interval_year - return: boolean - - - name: "gt" - description: greater than - impls: - - args: - - name: x - value: timestamp - - name: y - value: timestamp - return: boolean - - args: - - name: x - value: precision_timestamp

- - name: y - value: precision_timestamp

- return: boolean - - args: - - name: x - value: timestamp_tz - - name: y - value: timestamp_tz - return: boolean - - args: - - name: x - value: precision_timestamp_tz

- - name: y - value: precision_timestamp_tz

- return: boolean - - args: - - name: x - value: date - - name: y - value: date - return: boolean - - args: - - name: x - value: interval_day

- - name: y - value: interval_day

- return: boolean - - args: - - name: x - value: interval_year - - name: y - value: interval_year - return: boolean - - - name: "assume_timezone" - description: >- - Convert local timestamp to UTC-relative timestamp_tz using given local time's timezone. - - Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones). - Examples: "Pacific/Marquesas", "Etc/GMT+1". - If timezone is invalid an error is thrown. - impls: - - args: - - name: x - value: timestamp - - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: timestamp_tz - - args: - - name: x - value: precision_timestamp

- - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: precision_timestamp_tz

- - args: - - name: x - value: date - - name: timezone - description: Timezone string from IANA tzdb. Returned timestamp_tz will have time set to 00:00:00. - value: string - return: timestamp_tz - - - name: "local_timestamp" - description: >- - Convert UTC-relative timestamp_tz to local timestamp using given local time's timezone. - - Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones). - Examples: "Pacific/Marquesas", "Etc/GMT+1". - If timezone is invalid an error is thrown. - impls: - - args: - - name: x - value: timestamp_tz - - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: timestamp - - args: - - name: x - value: precision_timestamp_tz

- - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: precision_timestamp

- - - name: "strptime_time" - description: >- - Parse string into time using provided format, - see https://man7.org/linux/man-pages/man3/strptime.3.html for reference. - impls: - - args: - - name: time_string - value: string - - name: format - value: string - return: time - - - name: "strptime_date" - description: >- - Parse string into date using provided format, - see https://man7.org/linux/man-pages/man3/strptime.3.html for reference. - impls: - - args: - - name: date_string - value: string - - name: format - value: string - return: date - - - name: "strptime_timestamp" - description: >- - Parse string into timestamp using provided format, - see https://man7.org/linux/man-pages/man3/strptime.3.html for reference. - If timezone is present in timestamp and provided as parameter an error is thrown. - - Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones). - Examples: "Pacific/Marquesas", "Etc/GMT+1". - If timezone is supplied as parameter and present in the parsed string the parsed timezone is used. - If parameter supplied timezone is invalid an error is thrown. - impls: - - args: - - name: timestamp_string - value: string - - name: format - value: string - - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: timestamp_tz - - args: - - name: timestamp_string - value: string - - name: format - value: string - return: timestamp_tz - - - name: "strftime" - description: >- - Convert timestamp/date/time to string using provided format, - see https://man7.org/linux/man-pages/man3/strftime.3.html for reference. - - Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones). - Examples: "Pacific/Marquesas", "Etc/GMT+1". - If timezone is invalid an error is thrown. - impls: - - args: - - name: x - value: timestamp - - name: format - value: string - return: string - - args: - - name: x - value: precision_timestamp

- - name: format - value: string - return: string - - args: - - name: x - value: timestamp_tz - - name: format - value: string - - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: string - - args: - - name: x - value: precision_timestamp_tz

- - name: format - value: string - - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: string - - args: - - name: x - value: date - - name: format - value: string - return: string - - args: - - name: x - value: time - - name: format - value: string - return: string - - - name: "round_temporal" - description: >- - Round a given timestamp/date/time to a multiple of a time unit. If the given timestamp is not already an - exact multiple from the origin in the given timezone, the resulting point is chosen as one of the - two nearest multiples. Which of these is chosen is governed by rounding: FLOOR means to use the earlier - one, CEIL means to use the later one, ROUND_TIE_DOWN means to choose the nearest and tie to the - earlier one if equidistant, ROUND_TIE_UP means to choose the nearest and tie to the later one if - equidistant. - - Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones). - Examples: "Pacific/Marquesas", "Etc/GMT+1". - If timezone is invalid an error is thrown. - impls: - - args: - - name: x - value: timestamp - - name: rounding - options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ] - - name: unit - options: [ YEAR, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ] - - name: multiple - value: i64 - - name: origin - value: timestamp - return: timestamp - - args: - - name: x - value: precision_timestamp

- - name: rounding - options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ] - - name: unit - options: [ YEAR, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ] - - name: multiple - value: i64 - - name: origin - value: precision_timestamp

- return: precision_timestamp

- - args: - - name: x - value: timestamp_tz - - name: rounding - options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ] - - name: unit - options: [ YEAR, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ] - - name: multiple - value: i64 - - name: timezone - description: Timezone string from IANA tzdb. - value: string - - name: origin - value: timestamp_tz - return: timestamp_tz - - args: - - name: x - value: precision_timestamp_tz

- - name: rounding - options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ] - - name: unit - options: [ YEAR, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ] - - name: multiple - value: i64 - - name: timezone - description: Timezone string from IANA tzdb. - value: string - - name: origin - value: precision_timestamp_tz

- return: precision_timestamp_tz

- - args: - - name: x - value: date - - name: rounding - options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ] - - name: unit - options: [ YEAR, MONTH, WEEK, DAY ] - - name: multiple - value: i64 - - name: origin - value: date - return: date - - args: - - name: x - value: time - - name: rounding - options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ] - - name: unit - options: [ HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ] - - name: multiple - value: i64 - - name: origin - value: time - return: time - - - name: "round_calendar" - description: >- - Round a given timestamp/date/time to a multiple of a time unit. If the given timestamp is not already an - exact multiple from the last origin unit in the given timezone, the resulting point is chosen as one of the - two nearest multiples. Which of these is chosen is governed by rounding: FLOOR means to use the earlier - one, CEIL means to use the later one, ROUND_TIE_DOWN means to choose the nearest and tie to the - earlier one if equidistant, ROUND_TIE_UP means to choose the nearest and tie to the later one if - equidistant. - - Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones). - Examples: "Pacific/Marquesas", "Etc/GMT+1". - If timezone is invalid an error is thrown. - - impls: - - args: - - name: x - value: timestamp - - name: rounding - options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ] - - name: unit - options: [ YEAR, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ] - - name: origin - options: [ YEAR, MONTH, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, - US_WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND ] - - name: multiple - value: i64 - return: timestamp - - args: - - name: x - value: precision_timestamp

- - name: rounding - options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ] - - name: unit - options: [ YEAR, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ] - - name: origin - options: [ YEAR, MONTH, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, - US_WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND ] - - name: multiple - value: i64 - return: precision_timestamp

- - args: - - name: x - value: timestamp_tz - - name: rounding - options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ] - - name: unit - options: [ YEAR, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ] - - name: origin - options: [ YEAR, MONTH, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, - US_WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND ] - - name: multiple - value: i64 - - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: timestamp_tz - - args: - - name: x - value: precision_timestamp_tz

- - name: rounding - options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ] - - name: unit - options: [ YEAR, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ] - - name: origin - options: [ YEAR, MONTH, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, - US_WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND ] - - name: multiple - value: i64 - - name: timezone - description: Timezone string from IANA tzdb. - value: string - return: precision_timestamp_tz

- - args: - - name: x - value: date - - name: rounding - options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ] - - name: unit - options: [ YEAR, MONTH, WEEK, DAY ] - - name: origin - options: [ YEAR, MONTH, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, US_WEEK, DAY ] - - name: multiple - value: i64 - - name: origin - value: date - return: date - - args: - - name: x - value: time - - name: rounding - options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ] - - name: unit - options: [ DAY, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ] - - name: origin - options: [ DAY, HOUR, MINUTE, SECOND, MILLISECOND ] - - name: multiple - value: i64 - - name: origin - value: time - return: time - -aggregate_functions: - - name: "min" - description: Min a set of values. - impls: - - args: - - name: x - value: date - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: date? - return: date? - - args: - - name: x - value: time - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: time? - return: time? - - args: - - name: x - value: timestamp - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: timestamp? - return: timestamp? - - args: - - name: x - value: precision_timestamp

- nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: precision_timestamp?

- return: precision_timestamp?

- - args: - - name: x - value: timestamp_tz - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: timestamp_tz? - return: timestamp_tz? - - args: - - name: x - value: precision_timestamp_tz

- nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: precision_timestamp_tz?

- return: precision_timestamp_tz?

- - args: - - name: x - value: interval_day

- nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: interval_day?

- return: interval_day?

- - args: - - name: x - value: interval_year - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: interval_year? - return: interval_year? - - name: "max" - description: Max a set of values. - impls: - - args: - - name: x - value: date - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: date? - return: date? - - args: - - name: x - value: time - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: time? - return: time? - - args: - - name: x - value: timestamp - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: timestamp? - return: timestamp? - - args: - - name: x - value: timestamp_tz - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: timestamp_tz? - return: timestamp_tz? - - args: - - name: x - value: precision_timestamp_tz

- nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: precision_timestamp_tz?

- return: precision_timestamp_tz?

- - args: - - name: x - value: interval_day

- nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: interval_day?

- return: interval_day?

- - args: - - name: x - value: interval_year - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: interval_year? - return: interval_year? diff --git a/subframe/extensions/extensions/functions_geometry.yaml b/subframe/extensions/extensions/functions_geometry.yaml deleted file mode 100644 index 8cf1318..0000000 --- a/subframe/extensions/extensions/functions_geometry.yaml +++ /dev/null @@ -1,239 +0,0 @@ -%YAML 1.2 ---- -types: - - name: geometry - structure: "BINARY" -# description: | -# An opaque type that can represent one or many points, lines, or shapes encompassing -# 2, 3 or 4 dimension. -scalar_functions: - - - name: "point" - description: > - Returns a 2D point with the given `x` and `y` coordinate values. - impls: - - args: - - name: x - value: fp64 - - name: y - value: fp64 - return: u!geometry - - - name: "make_line" - description: > - Returns a linestring connecting the endpoint of geometry `geom1` to the begin point of - geometry `geom2`. Repeated points at the beginning of input geometries are collapsed to a single point. - - A linestring can be closed or simple. A closed linestring starts and ends on the same - point. A simple linestring does not cross or touch itself. - impls: - - args: - - name: geom1 - value: u!geometry - - name: geom2 - value: u!geometry - return: u!geometry - - - name: "x_coordinate" - description: > - Return the x coordinate of the point. Return null if not available. - impls: - - args: - - name: point - value: u!geometry - return: fp64 - - - name: "y_coordinate" - description: > - Return the y coordinate of the point. Return null if not available. - impls: - - args: - - name: point - value: u!geometry - return: fp64 - - - name: "num_points" - description: > - Return the number of points in the geometry. The geometry should be an linestring - or circularstring. - impls: - - args: - - name: geom - value: u!geometry - return: i64 - - - name: "is_empty" - description: > - Return true is the geometry is an empty geometry. - impls: - - args: - - name: geom - value: u!geometry - return: boolean - - - name: "is_closed" - description: > - Return true if the geometry's start and end points are the same. - impls: - - args: - - name: geom - value: geometry - return: boolean - - - name: "is_simple" - description: > - Return true if the geometry does not self intersect. - impls: - - args: - - name: geom - value: u!geometry - return: boolean - - - name: "is_ring" - description: > - Return true if the geometry's start and end points are the same and it does not self - intersect. - impls: - - args: - - name: geom - value: u!geometry - return: boolean - - - name: "geometry_type" - description: > - Return the type of geometry as a string. - impls: - - args: - - name: geom - value: u!geometry - return: string - - - name: "envelope" - description: > - Return the minimum bounding box for the input geometry as a geometry. - - The returned geometry is defined by the corner points of the bounding box. If the - input geometry is a point or a line, the returned geometry can also be a point or line. - impls: - - args: - - name: geom - value: u!geometry - return: u!geometry - - - name: "dimension" - description: > - Return the dimension of the input geometry. If the input is a collection of geometries, - return the largest dimension from the collection. Dimensionality is determined by - the complexity of the input and not the coordinate system being used. - - Type dimensions: - POINT - 0 - LINE - 1 - POLYGON - 2 - impls: - - args: - - name: geom - value: u!geometry - return: i8 - - - name: "is_valid" - description: > - Return true if the input geometry is a valid 2D geometry. - - For 3 dimensional and 4 dimensional geometries, the validity is still only tested - in 2 dimensions. - impls: - - args: - - name: geom - value: u!geometry - return: boolean - - - name: "collection_extract" - description: > - Given the input geometry collection, return a homogenous multi-geometry. All geometries - in the multi-geometry will have the same dimension. - - If type is not specified, the multi-geometry will only contain geometries of the highest - dimension. If type is specified, the multi-geometry will only contain geometries - of that type. If there are no geometries of the specified type, an empty geometry - is returned. Only points, linestrings, and polygons are supported. - - Type numbers: - POINT - 0 - LINE - 1 - POLYGON - 2 - impls: - - args: - - name: geom_collection - value: u!geometry - return: u!geometry - - args: - - name: geom_collection - value: u!geometry - - name: type - value: i8 - return: u!geometry - - - name: "flip_coordinates" - description: > - Return a version of the input geometry with the X and Y axis flipped. - - This operation can be performed on geometries with more than 2 dimensions. However, - only X and Y axis will be flipped. - impls: - - args: - - name: geom_collection - value: u!geometry - return: u!geometry - - - name: "remove_repeated_points" - description: > - Return a version of the input geometry with duplicate consecutive points removed. - - If the `tolerance` argument is provided, consecutive points within the tolerance - distance of one another are considered to be duplicates. - impls: - - args: - - name: geom - value: u!geometry - return: u!geometry - - args: - - name: geom - value: u!geometry - - name: tolerance - value: fp64 - return: u!geometry - - - name: "buffer" - description: > - Compute and return an expanded version of the input geometry. All the points - of the returned geometry are at a distance of `buffer_radius` away from the points - of the input geometry. If a negative `buffer_radius` is provided, the geometry will - shrink instead of expand. A negative `buffer_radius` may shrink the geometry completely, - in which case an empty geometry is returned. For input the geometries of points or lines, - a negative `buffer_radius` will always return an emtpy geometry. - impls: - - args: - - name: geom - value: u!geometry - - name: buffer_radius - value: fp64 - return: u!geometry - - - name: "centroid" - description: > - Return a point which is the geometric center of mass of the input geometry. - impls: - - args: - - name: geom - value: u!geometry - return: u!geometry - - - name: "minimum_bounding_circle" - description: > - Return the smallest circle polygon that contains the input geometry. - impls: - - args: - - name: geom - value: u!geometry - return: u!geometry diff --git a/subframe/extensions/extensions/functions_logarithmic.yaml b/subframe/extensions/extensions/functions_logarithmic.yaml deleted file mode 100644 index b46f3d3..0000000 --- a/subframe/extensions/extensions/functions_logarithmic.yaml +++ /dev/null @@ -1,254 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "ln" - description: "Natural logarithm of the value" - impls: - - args: - - name: x - value: i64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 - - args: - - name: x - value: decimal - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [ NAN, ERROR, MINUS_INFINITY ] - return: fp64 - - - name: "log10" - description: "Logarithm to base 10 of the value" - impls: - - args: - - name: x - value: i64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 - - args: - - name: x - value: decimal - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [ NAN, ERROR, MINUS_INFINITY ] - return: fp64 - - - name: "log2" - description: "Logarithm to base 2 of the value" - impls: - - args: - - name: x - value: i64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 - - args: - - name: x - value: decimal - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [ NAN, ERROR, MINUS_INFINITY ] - return: fp64 - - - name: "logb" - description: > - Logarithm of the value with the given base - - logb(x, b) => log_{b} (x) - impls: - - args: - - value: i64 - name: "x" - description: "The number `x` to compute the logarithm of" - - value: i64 - name: "base" - description: "The logarithm base `b` to use" - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 - - args: - - value: fp32 - name: "x" - description: "The number `x` to compute the logarithm of" - - value: fp32 - name: "base" - description: "The logarithm base `b` to use" - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp32 - - args: - - value: fp64 - name: "x" - description: "The number `x` to compute the logarithm of" - - value: fp64 - name: "base" - description: "The logarithm base `b` to use" - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 - - args: - - value: decimal - name: "x" - description: "The number `x` to compute the logarithm of" - - value: decimal - name: "base" - description: "The logarithm base `b` to use" - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 - - - name: "log1p" - description: > - Natural logarithm (base e) of 1 + x - - log1p(x) => log(1+x) - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 - - args: - - name: x - value: decimal - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, "NULL", ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 diff --git a/subframe/extensions/extensions/functions_rounding.yaml b/subframe/extensions/extensions/functions_rounding.yaml deleted file mode 100644 index 09309f2..0000000 --- a/subframe/extensions/extensions/functions_rounding.yaml +++ /dev/null @@ -1,270 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "ceil" - description: > - Rounding to the ceiling of the value `x`. - impls: - - args: - - value: fp32 - name: "x" - return: fp32 - - args: - - value: fp64 - name: "x" - return: fp64 - - - name: "floor" - description: > - Rounding to the floor of the value `x`. - impls: - - args: - - value: fp32 - name: "x" - return: fp32 - - args: - - value: fp64 - name: "x" - return: fp64 - - - name: "round" - description: > - Rounding the value `x` to `s` decimal places. - impls: - - args: - - value: i8 - name: "x" - description: > - Numerical expression to be rounded. - - value: i32 - name: "s" - description: > - Number of decimal places to be rounded to. - - When `s` is a positive number, nothing will happen - since `x` is an integer value. - - When `s` is a negative number, the rounding is - performed to the nearest multiple of `10^(-s)`. - options: - rounding: - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule - - TIE_DOWN: round ties with FLOOR rule - - TIE_UP: round ties with CEILING rule - - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule - - TIE_TO_ODD: round to nearest value; if exactly halfway, tie - to the odd option. - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR, - AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ] - nullability: DECLARED_OUTPUT - return: i8? - - args: - - value: i16 - name: "x" - description: > - Numerical expression to be rounded. - - value: i32 - name: "s" - description: > - Number of decimal places to be rounded to. - - When `s` is a positive number, nothing will happen - since `x` is an integer value. - - When `s` is a negative number, the rounding is - performed to the nearest multiple of `10^(-s)`. - options: - rounding: - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule - - TIE_DOWN: round ties with FLOOR rule - - TIE_UP: round ties with CEILING rule - - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule - - TIE_TO_ODD: round to nearest value; if exactly halfway, tie - to the odd option. - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR, - AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ] - nullability: DECLARED_OUTPUT - return: i16? - - args: - - value: i32 - name: "x" - description: > - Numerical expression to be rounded. - - value: i32 - name: "s" - description: > - Number of decimal places to be rounded to. - - When `s` is a positive number, nothing will happen - since `x` is an integer value. - - When `s` is a negative number, the rounding is - performed to the nearest multiple of `10^(-s)`. - options: - rounding: - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule - - TIE_DOWN: round ties with FLOOR rule - - TIE_UP: round ties with CEILING rule - - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule - - TIE_TO_ODD: round to nearest value; if exactly halfway, tie - to the odd option. - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR, - AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ] - nullability: DECLARED_OUTPUT - return: i32? - - args: - - value: i64 - name: "x" - description: > - Numerical expression to be rounded. - - value: i32 - name: "s" - description: > - Number of decimal places to be rounded to. - - When `s` is a positive number, nothing will happen - since `x` is an integer value. - - When `s` is a negative number, the rounding is - performed to the nearest multiple of `10^(-s)`. - options: - rounding: - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule - - TIE_DOWN: round ties with FLOOR rule - - TIE_UP: round ties with CEILING rule - - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule - - TIE_TO_ODD: round to nearest value; if exactly halfway, tie - to the odd option. - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR, - AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ] - nullability: DECLARED_OUTPUT - return: i64? - - args: - - value: fp32 - name: "x" - description: > - Numerical expression to be rounded. - - value: i32 - name: "s" - description: > - Number of decimal places to be rounded to. - - When `s` is a positive number, the rounding - is performed to a `s` number of decimal places. - - When `s` is a negative number, the rounding is - performed to the left side of the decimal point - as specified by `s`. - options: - rounding: - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule - - TIE_DOWN: round ties with FLOOR rule - - TIE_UP: round ties with CEILING rule - - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule - - TIE_TO_ODD: round to nearest value; if exactly halfway, tie - to the odd option. - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR, - AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ] - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - value: fp64 - name: "x" - description: > - Numerical expression to be rounded. - - value: i32 - name: "s" - description: > - Number of decimal places to be rounded to. - - When `s` is a positive number, the rounding - is performed to a `s` number of decimal places. - - When `s` is a negative number, the rounding is - performed to the left side of the decimal point - as specified by `s`. - options: - rounding: - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule - - TIE_DOWN: round ties with FLOOR rule - - TIE_UP: round ties with CEILING rule - - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule - - TIE_TO_ODD: round to nearest value; if exactly halfway, tie - to the odd option. - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR, - AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ] - nullability: DECLARED_OUTPUT - return: fp64? diff --git a/subframe/extensions/extensions/functions_set.yaml b/subframe/extensions/extensions/functions_set.yaml deleted file mode 100644 index 58b9642..0000000 --- a/subframe/extensions/extensions/functions_set.yaml +++ /dev/null @@ -1,27 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "index_in" - description: > - Checks the membership of a value in a list of values - - Returns the first 0-based index value of some input `needle` if `needle` is equal to - any element in `haystack`. Returns `NULL` if not found. - - If `needle` is `NULL`, returns `NULL`. - - If `needle` is `NaN`: - - Returns 0-based index of `NaN` in `input` (default) - - Returns `NULL` (if `NAN_IS_NOT_NAN` is specified) - impls: - - args: - - name: needle - value: any1 - - name: haystack - value: list - options: - nan_equality: - values: [ NAN_IS_NAN, NAN_IS_NOT_NAN ] - nullability: DECLARED_OUTPUT - return: i64? diff --git a/subframe/extensions/extensions/functions_string.yaml b/subframe/extensions/extensions/functions_string.yaml deleted file mode 100644 index 3acdb48..0000000 --- a/subframe/extensions/extensions/functions_string.yaml +++ /dev/null @@ -1,1481 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: concat - description: >- - Concatenate strings. - - The `null_handling` option determines whether or not null values will be recognized by the function. - If `null_handling` is set to `IGNORE_NULLS`, null value arguments will be ignored when strings are concatenated. - If set to `ACCEPT_NULLS`, the result will be null if any argument passed to the concat function is null. - impls: - - args: - - value: "varchar" - name: "input" - variadic: - min: 1 - options: - null_handling: - values: [ IGNORE_NULLS, ACCEPT_NULLS ] - return: "varchar" - - args: - - value: "string" - name: "input" - variadic: - min: 1 - options: - null_handling: - values: [ IGNORE_NULLS, ACCEPT_NULLS ] - return: "string" - - - name: like - description: >- - Are two strings like each other. - - The `case_sensitivity` option applies to the `match` argument. - impls: - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "match" - description: The string to match against the input string. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "match" - description: The string to match against the input string. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - - name: substring - description: >- - Extract a substring of a specified `length` starting from position `start`. - A `start` value of 1 refers to the first characters of the string. When - `length` is not specified the function will extract a substring starting - from position `start` and ending at the end of the string. - - The `negative_start` option applies to the `start` parameter. `WRAP_FROM_END` means - the index will start from the end of the `input` and move backwards. - The last character has an index of -1, the second to last character has an index of -2, - and so on. `LEFT_OF_BEGINNING` means the returned substring will start from - the left of the first character. A `start` of -1 will begin 2 characters left of the - the `input`, while a `start` of 0 begins 1 character left of the `input`. - impls: - - args: - - value: "varchar" - name: "input" - - value: i32 - name: "start" - - value: i32 - name: "length" - options: - negative_start: - values: [ WRAP_FROM_END, LEFT_OF_BEGINNING, ERROR ] - return: "varchar" - - args: - - value: "string" - name: "input" - - value: i32 - name: "start" - - value: i32 - name: "length" - options: - negative_start: - values: [ WRAP_FROM_END, LEFT_OF_BEGINNING, ERROR ] - return: "string" - - args: - - value: "fixedchar" - name: "input" - - value: i32 - name: "start" - - value: i32 - name: "length" - options: - negative_start: - values: [ WRAP_FROM_END, LEFT_OF_BEGINNING, ERROR ] - return: "string" - - args: - - value: "varchar" - name: "input" - - value: i32 - name: "start" - options: - negative_start: - values: [ WRAP_FROM_END, LEFT_OF_BEGINNING ] - return: "varchar" - - args: - - value: "string" - name: "input" - - value: i32 - name: "start" - options: - negative_start: - values: [ WRAP_FROM_END, LEFT_OF_BEGINNING ] - return: "string" - - args: - - value: "fixedchar" - name: "input" - - value: i32 - name: "start" - options: - negative_start: - values: [ WRAP_FROM_END, LEFT_OF_BEGINNING ] - return: "string" - - - name: regexp_match_substring - description: >- - Extract a substring that matches the given regular expression pattern. The regular expression - pattern should follow the International Components for Unicode implementation - (https://unicode-org.github.io/icu/userguide/strings/regexp.html). The occurrence of the - pattern to be extracted is specified using the `occurrence` argument. Specifying `1` means - the first occurrence will be extracted, `2` means the second occurrence, and so on. - The `occurrence` argument should be a positive non-zero integer. The number of characters - from the beginning of the string to begin starting to search for pattern matches can be - specified using the `position` argument. Specifying `1` means to search for matches - starting at the first character of the input string, `2` means the second character, and so - on. The `position` argument should be a positive non-zero integer. The regular - expression capture group can be specified using the `group` argument. Specifying `0` - will return the substring matching the full regular expression. Specifying `1` will - return the substring matching only the first capture group, and so on. The `group` - argument should be a non-negative integer. - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - - Behavior is undefined if the regex fails to compile, the occurrence value is out of range, - the position value is out of range, or the group value is out of range. - impls: - - args: - - value: "varchar" - name: "input" - - value: "varchar" - name: "pattern" - - value: i64 - name: "position" - - value: i64 - name: "occurrence" - - value: i64 - name: "group" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: "varchar" - - args: - - value: "string" - name: "input" - - value: "string" - name: "pattern" - - value: i64 - name: "position" - - value: i64 - name: "occurrence" - - value: i64 - name: "group" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: "string" - - - name: regexp_match_substring_all - description: >- - Extract all substrings that match the given regular expression pattern. This will return a - list of extracted strings with one value for each occurrence of a match. The regular expression - pattern should follow the International Components for Unicode implementation - (https://unicode-org.github.io/icu/userguide/strings/regexp.html). The number of characters - from the beginning of the string to begin starting to search for pattern matches can be - specified using the `position` argument. Specifying `1` means to search for matches - starting at the first character of the input string, `2` means the second character, and so - on. The `position` argument should be a positive non-zero integer. The regular - expression capture group can be specified using the `group` argument. Specifying `0` - will return substrings matching the full regular expression. Specifying `1` will return - substrings matching only the first capture group, and so on. The `group` argument should - be a non-negative integer. - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - - Behavior is undefined if the regex fails to compile, the position value is out of range, - or the group value is out of range. - impls: - - args: - - value: "varchar" - name: "input" - - value: "varchar" - name: "pattern" - - value: i64 - name: "position" - - value: i64 - name: "group" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: "List>" - - args: - - value: "string" - name: "input" - - value: "string" - name: "pattern" - - value: i64 - name: "position" - - value: i64 - name: "group" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: "List" - - - name: starts_with - description: >- - Whether the `input` string starts with the `substring`. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - - name: ends_with - description: >- - Whether `input` string ends with the substring. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - - name: contains - description: >- - Whether the `input` string contains the `substring`. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "boolean" - - - name: strpos - description: >- - Return the position of the first occurrence of a string in another string. The first - character of the string is at position 1. If no occurrence is found, 0 is returned. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: i64 - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: i64 - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: i64 - - - name: regexp_strpos - description: >- - Return the position of an occurrence of the given regular expression pattern in a - string. The first character of the string is at position 1. The regular expression pattern - should follow the International Components for Unicode implementation - (https://unicode-org.github.io/icu/userguide/strings/regexp.html). The number of characters - from the beginning of the string to begin starting to search for pattern matches can be - specified using the `position` argument. Specifying `1` means to search for matches - starting at the first character of the input string, `2` means the second character, and so - on. The `position` argument should be a positive non-zero integer. Which occurrence to - return the position of is specified using the `occurrence` argument. Specifying `1` means - the position first occurrence will be returned, `2` means the position of the second - occurrence, and so on. The `occurrence` argument should be a positive non-zero integer. If - no occurrence is found, 0 is returned. - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - - Behavior is undefined if the regex fails to compile, the occurrence value is out of range, or - the position value is out of range. - impls: - - args: - - value: "varchar" - name: "input" - - value: "varchar" - name: "pattern" - - value: i64 - name: "position" - - value: i64 - name: "occurrence" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: i64 - - args: - - value: "string" - name: "input" - - value: "string" - name: "pattern" - - value: i64 - name: "position" - - value: i64 - name: "occurrence" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: i64 - - - name: count_substring - description: >- - Return the number of non-overlapping occurrences of a substring in an input string. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to count. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: i64 - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to count. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: i64 - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to count. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: i64 - - - name: regexp_count_substring - description: >- - Return the number of non-overlapping occurrences of a regular expression pattern in an input - string. The regular expression pattern should follow the International Components for - Unicode implementation (https://unicode-org.github.io/icu/userguide/strings/regexp.html). - The number of characters from the beginning of the string to begin starting to search for - pattern matches can be specified using the `position` argument. Specifying `1` means to - search for matches starting at the first character of the input string, `2` means the - second character, and so on. The `position` argument should be a positive non-zero integer. - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - - Behavior is undefined if the regex fails to compile or the position value is out of range. - impls: - - args: - - value: "string" - name: "input" - - value: "string" - name: "pattern" - - value: i64 - name: "position" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: i64 - - args: - - value: "varchar" - name: "input" - - value: "varchar" - name: "pattern" - - value: i64 - name: "position" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: i64 - - args: - - value: "fixedchar" - name: "input" - - value: "fixedchar" - name: "pattern" - - value: i64 - name: "position" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: i64 - - - name: replace - description: >- - Replace all occurrences of the substring with the replacement string. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - value: "string" - name: "input" - description: Input string. - - value: "string" - name: "substring" - description: The substring to replace. - - value: "string" - name: "replacement" - description: The replacement string. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "string" - - args: - - value: "varchar" - name: "input" - description: Input string. - - value: "varchar" - name: "substring" - description: The substring to replace. - - value: "varchar" - name: "replacement" - description: The replacement string. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "varchar" - - - name: concat_ws - description: Concatenate strings together separated by a separator. - impls: - - args: - - value: "string" - name: "separator" - description: Character to separate strings by. - - value: "string" - name: "string_arguments" - description: Strings to be concatenated. - variadic: - min: 1 - return: "string" - - args: - - value: "varchar" - name: "separator" - description: Character to separate strings by. - - value: "varchar" - name: "string_arguments" - description: Strings to be concatenated. - variadic: - min: 1 - return: "varchar" - - - name: repeat - description: Repeat a string `count` number of times. - impls: - - args: - - value: "string" - name: "input" - - value: i64 - name: "count" - return: "string" - - args: - - value: "varchar" - - value: i64 - name: "input" - - value: i64 - name: "count" - return: "varchar" - - - name: reverse - description: Returns the string in reverse order. - impls: - - args: - - value: "string" - name: "input" - return: "string" - - args: - - value: "varchar" - name: "input" - return: "varchar" - - args: - - value: "fixedchar" - name: "input" - return: "fixedchar" - - - name: replace_slice - description: >- - Replace a slice of the input string. A specified 'length' of characters will be deleted from - the input string beginning at the 'start' position and will be replaced by a new string. A - start value of 1 indicates the first character of the input string. If start is negative - or zero, or greater than the length of the input string, a null string is returned. If 'length' - is negative, a null string is returned. If 'length' is zero, inserting of the new string - occurs at the specified 'start' position and no characters are deleted. If 'length' is - greater than the input string, deletion will occur up to the last character of the input string. - impls: - - args: - - value: "string" - name: "input" - description: Input string. - - value: i64 - name: "start" - description: The position in the string to start deleting/inserting characters. - - value: i64 - name: "length" - description: The number of characters to delete from the input string. - - value: "string" - name: "replacement" - description: The new string to insert at the start position. - return: "string" - - args: - - value: "varchar" - name: "input" - description: Input string. - - value: i64 - name: "start" - description: The position in the string to start deleting/inserting characters. - - value: i64 - name: "length" - description: The number of characters to delete from the input string. - - value: "varchar" - name: "replacement" - description: The new string to insert at the start position. - return: "varchar" - - - name: lower - description: >- - Transform the string to lower case characters. Implementation should follow the utf8_unicode_ci - collations according to the Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/. - impls: - - args: - - value: "string" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "string" - - args: - - value: "varchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "varchar" - - args: - - value: "fixedchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "fixedchar" - - - name: upper - description: >- - Transform the string to upper case characters. Implementation should follow the utf8_unicode_ci - collations according to the Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/. - impls: - - args: - - value: "string" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "string" - - args: - - value: "varchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "varchar" - - args: - - value: "fixedchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "fixedchar" - - - name: swapcase - description: >- - Transform the string's lowercase characters to uppercase and uppercase characters to - lowercase. Implementation should follow the utf8_unicode_ci collations according to the - Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/. - impls: - - args: - - value: "string" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "string" - - args: - - value: "varchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "varchar" - - args: - - value: "fixedchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "fixedchar" - - - name: capitalize - description: >- - Capitalize the first character of the input string. Implementation should follow the - utf8_unicode_ci collations according to the Unicode Collation Algorithm described at - http://www.unicode.org/reports/tr10/. - impls: - - args: - - value: "string" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "string" - - args: - - value: "varchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "varchar" - - args: - - value: "fixedchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "fixedchar" - - - name: title - description: >- - Converts the input string into titlecase. Capitalize the first character of each word in the - input string except for articles (a, an, the). Implementation should follow the - utf8_unicode_ci collations according to the Unicode Collation Algorithm described at - http://www.unicode.org/reports/tr10/. - impls: - - args: - - value: "string" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "string" - - args: - - value: "varchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "varchar" - - args: - - value: "fixedchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "fixedchar" - - - name: initcap - description: >- - Capitalizes the first character of each word in the input string, including articles, - and lowercases the rest. Implementation should follow the utf8_unicode_ci collations - according to the Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/. - impls: - - args: - - value: "string" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "string" - - args: - - value: "varchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "varchar" - - args: - - value: "fixedchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "fixedchar" - - - name: char_length - description: >- - Return the number of characters in the input string. The length includes trailing spaces. - impls: - - args: - - value: "string" - name: "input" - return: i64 - - args: - - value: "varchar" - name: "input" - return: i64 - - args: - - value: "fixedchar" - name: "input" - return: i64 - - - name: bit_length - description: Return the number of bits in the input string. - impls: - - args: - - value: "string" - name: "input" - return: i64 - - args: - - value: "varchar" - name: "input" - return: i64 - - args: - - value: "fixedchar" - name: "input" - return: i64 - - - name: octet_length - description: Return the number of bytes in the input string. - impls: - - args: - - value: "string" - name: "input" - return: i64 - - args: - - value: "varchar" - name: "input" - return: i64 - - args: - - value: "fixedchar" - name: "input" - return: i64 - - - name: regexp_replace - description: >- - Search a string for a substring that matches a given regular expression pattern and replace - it with a replacement string. The regular expression pattern should follow the - International Components for Unicode implementation (https://unicode-org.github - .io/icu/userguide/strings/regexp.html). The occurrence of the pattern to be replaced is - specified using the `occurrence` argument. Specifying `1` means only the first occurrence - will be replaced, `2` means the second occurrence, and so on. Specifying `0` means all - occurrences will be replaced. The number of characters from the beginning of the string to - begin starting to search for pattern matches can be specified using the `position` argument. - Specifying `1` means to search for matches starting at the first character of the input - string, `2` means the second character, and so on. The `position` argument should be a - positive non-zero integer. The replacement string can capture groups using numbered - backreferences. - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - - Behavior is undefined if the regex fails to compile, the replacement contains an illegal - back-reference, the occurrence value is out of range, or the position value is out of range. - impls: - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "pattern" - description: The regular expression to search for within the input string. - - value: "string" - name: "replacement" - description: The replacement string. - - value: i64 - name: "position" - description: The position to start the search. - - value: i64 - name: "occurrence" - description: Which occurrence of the match to replace. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: "string" - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "pattern" - description: The regular expression to search for within the input string. - - value: "varchar" - name: "replacement" - description: The replacement string. - - value: i64 - name: "position" - description: The position to start the search. - - value: i64 - name: "occurrence" - description: Which occurrence of the match to replace. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: "varchar" - - - name: ltrim - description: >- - Remove any occurrence of the characters from the left side of the string. - If no characters are specified, spaces are removed. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to remove characters from." - - value: "varchar" - name: "characters" - description: "The set of characters to remove." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to remove characters from." - - value: "string" - name: "characters" - description: "The set of characters to remove." - return: "string" - - - name: rtrim - description: >- - Remove any occurrence of the characters from the right side of the string. - If no characters are specified, spaces are removed. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to remove characters from." - - value: "varchar" - name: "characters" - description: "The set of characters to remove." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to remove characters from." - - value: "string" - name: "characters" - description: "The set of characters to remove." - return: "string" - - - name: trim - description: >- - Remove any occurrence of the characters from the left and right sides of - the string. If no characters are specified, spaces are removed. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to remove characters from." - - value: "varchar" - name: "characters" - description: "The set of characters to remove." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to remove characters from." - - value: "string" - name: "characters" - description: "The set of characters to remove." - return: "string" - - - name: lpad - description: >- - Left-pad the input string with the string of 'characters' until the specified length of the - string has been reached. If the input string is longer than 'length', remove characters from - the right-side to shorten it to 'length' characters. If the string of 'characters' is longer - than the remaining 'length' needed to be filled, only pad until 'length' has been reached. - If 'characters' is not specified, the default value is a single space. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "varchar" - name: "characters" - description: "The string of characters to use for padding." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "string" - name: "characters" - description: "The string of characters to use for padding." - return: "string" - - - name: rpad - description: >- - Right-pad the input string with the string of 'characters' until the specified length of the - string has been reached. If the input string is longer than 'length', remove characters from - the left-side to shorten it to 'length' characters. If the string of 'characters' is longer - than the remaining 'length' needed to be filled, only pad until 'length' has been reached. - If 'characters' is not specified, the default value is a single space. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "varchar" - name: "characters" - description: "The string of characters to use for padding." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "string" - name: "characters" - description: "The string of characters to use for padding." - return: "string" - - - name: center - description: >- - Center the input string by padding the sides with a single `character` until the specified - `length` of the string has been reached. By default, if the `length` will be reached with - an uneven number of padding, the extra padding will be applied to the right side. - The side with extra padding can be controlled with the `padding` option. - - Behavior is undefined if the number of characters passed to the `character` argument is not 1. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "varchar" - name: "character" - description: "The character to use for padding." - options: - padding: - values: [ RIGHT, LEFT ] - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "string" - name: "character" - description: "The character to use for padding." - options: - padding: - values: [ RIGHT, LEFT ] - return: "string" - - - name: left - description: Extract `count` characters starting from the left of the string. - impls: - - args: - - value: "varchar" - name: "input" - - value: i32 - name: "count" - return: "varchar" - - args: - - value: "string" - name: "input" - - value: i32 - name: "count" - return: "string" - - - name: right - description: Extract `count` characters starting from the right of the string. - impls: - - args: - - value: "varchar" - name: "input" - - value: i32 - name: "count" - return: "varchar" - - args: - - value: "string" - name: "input" - - value: i32 - name: "count" - return: "string" - - - name: string_split - description: >- - Split a string into a list of strings, based on a specified `separator` character. - impls: - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "separator" - description: A character used for splitting the string. - return: "List>" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "separator" - description: A character used for splitting the string. - return: "List" - - - name: regexp_string_split - description: >- - Split a string into a list of strings, based on a regular expression pattern. The - substrings matched by the pattern will be used as the separators to split the input - string and will not be included in the resulting list. The regular expression - pattern should follow the International Components for Unicode implementation - (https://unicode-org.github.io/icu/userguide/strings/regexp.html). - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - impls: - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "pattern" - description: The regular expression to search for within the input string. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: "List>" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "pattern" - description: The regular expression to search for within the input string. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: "List" - -aggregate_functions: - - - - name: string_agg - description: Concatenates a column of string values with a separator. - impls: - - args: - - value: "string" - name: "input" - description: "Column of string values." - - value: "string" - name: "separator" - constant: true - description: "Separator for concatenated strings" - ordered: true - return: "string" diff --git a/subframe/extensions/extensions/type_variations.yaml b/subframe/extensions/extensions/type_variations.yaml deleted file mode 100644 index f6f96d5..0000000 --- a/subframe/extensions/extensions/type_variations.yaml +++ /dev/null @@ -1,25 +0,0 @@ -%YAML 1.2 ---- -type_variations: - - parent: string - name: dict4 - description: a four-byte dictionary encoded string - functions: INHERITS - - parent: string - name: bigoffset - description: >- - The arrow large string representation of strings, still restricted to the default string size defined in - Substrait. - functions: SEPARATE - - parent: struct - name: avro - description: an avro encoded struct - functions: SEPARATE - - parent: struct - name: cstruct - description: a cstruct representation of the struct - functions: SEPARATE - - parent: struct - name: dict2 - description: a 2-byte dictionary encoded string. - functions: INHERITS diff --git a/subframe/utils.py b/subframe/utils.py new file mode 100644 index 0000000..89cdf7d --- /dev/null +++ b/subframe/utils.py @@ -0,0 +1,35 @@ +import re +from substrait.gen.proto.type_pb2 import Type + + +def to_substrait_type(dtype: str): + if dtype in ("bool", "boolean"): + return Type(bool=Type.Boolean()) + elif dtype == "i8": + return Type(i8=Type.I8()) + elif dtype == "i16": + return Type(i16=Type.I16()) + elif dtype == "i32": + return Type(i32=Type.I32()) + elif dtype == "i64": + return Type(i64=Type.I64()) + elif dtype == "fp32": + return Type(fp32=Type.FP32()) + elif dtype == "fp64": + return Type(fp64=Type.FP64()) + elif dtype == "timestamp": + return Type(timestamp=Type.Timestamp()) + elif dtype == "timestamp_tz": + return Type(timestamp_tz=Type.TimestampTZ()) + elif dtype == "date": + return Type(date=Type.Date()) + elif dtype == "time": + return Type(time=Type.Time()) + elif dtype == "interval_year": + return Type(interval_year=Type.IntervalYear()) + elif dtype.startswith("decimal") or dtype.startswith("DECIMAL"): + (_, scale, precision, _) = re.split(r"\W+", dtype) + + return Type(decimal=Type.Decimal(scale=int(scale), precision=int(precision))) + else: + raise Exception(f"Unknown type - {dtype}") diff --git a/subframe/value.py b/subframe/value.py index 47700cf..6544d06 100644 --- a/subframe/value.py +++ b/subframe/value.py @@ -42,28 +42,21 @@ def name(self, name: str): def _apply_function(self, other: "Value", url: str, func: str, col_name: str): from subframe import registry - if not url.startswith("http"): - url = ( - f"https://github.com/substrait-io/substrait/blob/main/extensions/{url}" - ) - - functions = registry.lookup_scalar_function(url, func) - - res = None - - for f in functions: - res = f.lookup_signature( - [self.data_type.WhichOneof("kind"), other.data_type.WhichOneof("kind")] - ) - if res: - break + (func_entry, rtn) = registry.lookup_function( + url, + function_name=func, + signature=[ + self.data_type, + other.data_type, + ], + ) - output_type = substrait_type_from_substrait_str(res[2]["return"]) + output_type = rtn return Value( expression=stalg.Expression( scalar_function=stalg.Expression.ScalarFunction( - function_reference=res[0], + function_reference=func_entry.anchor, output_type=output_type, arguments=[ stalg.FunctionArgument(value=self.expression), @@ -73,7 +66,7 @@ def _apply_function(self, other: "Value", url: str, func: str, col_name: str): ), data_type=output_type, name=f"{col_name}({self._name}, {other._name})", - extensions={url: {res[1]: res[0]}}, + extensions={func_entry.uri: {str(func_entry): func_entry.anchor}}, ) def __add__(self, other: "Value"): @@ -152,24 +145,14 @@ def __init__( def _apply_aggregate_function(self, url: str, func: str, col_name: str): from subframe import registry - if not url.startswith("http"): - url = ( - f"https://github.com/substrait-io/substrait/blob/main/extensions/{url}" - ) - - functions = registry.lookup_aggregate_function(url, func) - - res = None - - for f in functions: - res = f.lookup_signature([self.data_type.WhichOneof("kind")]) - if res: - break + (func_entry, rtn) = registry.lookup_function( + url, function_name=func, signature=[self.data_type] + ) - output_type = substrait_type_from_substrait_str(res[2]["return"]) + output_type = rtn aggregate_function = stalg.AggregateFunction( - function_reference=res[0], + function_reference=func_entry.anchor, phase=stalg.AggregationPhase.AGGREGATION_PHASE_INITIAL_TO_RESULT, # TODO arguments=[stalg.FunctionArgument(value=self.expression)], output_type=output_type, @@ -178,7 +161,7 @@ def _apply_aggregate_function(self, url: str, func: str, col_name: str): return AggregateValue( aggregate_function=aggregate_function, data_type=output_type, - extensions={url: {res[1]: res[0]}}, + extensions={func_entry.uri: {str(func_entry): func_entry.anchor}}, name=f"{col_name}({self._name})", ) diff --git a/tests/test_derivation_expression.py b/tests/test_derivation_expression.py new file mode 100644 index 0000000..9e8812c --- /dev/null +++ b/tests/test_derivation_expression.py @@ -0,0 +1,89 @@ +from substrait.gen.proto.type_pb2 import Type +from subframe.derivation_expression import evaluate + + +def test_simple_arithmetic(): + assert evaluate("1 + 1") == 2 + + +def test_simple_arithmetic_with_variables(): + assert evaluate("1 + var", {"var": 2}) == 3 + + +def test_simple_arithmetic_precedence(): + assert evaluate("1 + var * 3", {"var": 2}) == 7 + + +def test_simple_arithmetic_parenthesis(): + assert evaluate("(1 + var) * 3", {"var": 2}) == 9 + + +def test_min_max(): + assert evaluate("min(var, 7) + max(var, 7) * 2", {"var": 5}) == 19 + + +def test_ternary(): + assert evaluate("var > 3 ? 1 : 0", {"var": 5}) == 1 + assert evaluate("var > 3 ? 1 : 0", {"var": 2}) == 0 + + +def test_multiline(): + assert ( + evaluate( + """ + temp = min(var, 7) + max(var, 7) * 2 + temp + 1 + """, + {"var": 5}, + ) + == 20 + ) + + +def test_simple_data_types(): + assert evaluate("i8") == Type(i8=Type.I8()) + assert evaluate("i16") == Type(i16=Type.I16()) + assert evaluate("i32") == Type(i32=Type.I32()) + assert evaluate("i64") == Type(i64=Type.I64()) + assert evaluate("fp32") == Type(fp32=Type.FP32()) + assert evaluate("fp64") == Type(fp64=Type.FP64()) + assert evaluate("boolean") == Type(bool=Type.Boolean()) + + +def test_data_type(): + assert evaluate("decimal", {"S": 20, "P": 10}) == Type( + decimal=Type.Decimal(scale=21, precision=11) + ) + + +def test_decimal_example(): + def func(P1, S1, P2, S2): + init_scale = max(S1, S2) # 14 + init_prec = init_scale + max(P1 - S1, P2 - S2) + 1 + min_scale = min(init_scale, 6) + delta = init_prec - 38 + prec = min(init_prec, 38) + scale_after_borrow = max(init_scale - delta, min_scale) + scale = scale_after_borrow if init_prec > 38 else init_scale + return Type(decimal=Type.Decimal(scale=prec, precision=scale)) + + args = {"P1": 10, "S1": 8, "P2": 14, "S2": 2} + + func_eval = func(**args) + + assert ( + evaluate( + """ + init_scale = max(S1,S2) + init_prec = init_scale + max(P1 - S1, P2 - S2) + 1 + min_scale = min(init_scale, 6) + delta = init_prec - 38 + prec = min(init_prec, 38) + scale_after_borrow = max(init_scale - delta, min_scale) + scale = init_prec > 38 ? scale_after_borrow : init_scale + DECIMAL + """, + args, + ) + == func_eval + ) diff --git a/tests/test_registry.py b/tests/test_registry.py new file mode 100644 index 0000000..e534c62 --- /dev/null +++ b/tests/test_registry.py @@ -0,0 +1,190 @@ +import yaml + +from substrait.gen.proto.type_pb2 import Type +from subframe.extension_registry import FunctionRegistry + +content = """%YAML 1.2 +--- +scalar_functions: + - name: "test_fn" + description: "" + impls: + - args: + - value: i8 + variadic: + min: 2 + return: i8 + - name: "test_fn_variadic_any" + description: "" + impls: + - args: + - value: any1 + variadic: + min: 2 + return: any1 + - name: "add" + description: "Add two values." + impls: + - args: + - name: x + value: i8 + - name: y + value: i8 + options: + overflow: + values: [ SILENT, SATURATE, ERROR ] + return: i8 + - args: + - name: x + value: i8 + - name: y + value: i8 + - name: z + value: any + options: + overflow: + values: [ SILENT, SATURATE, ERROR ] + return: i16 + - args: + - name: x + value: any1 + - name: y + value: any1 + - name: z + value: any2 + options: + overflow: + values: [ SILENT, SATURATE, ERROR ] + return: any2 + - name: "test_decimal" + impls: + - args: + - name: x + value: decimal + - name: y + value: decimal + return: decimal + +""" + + +registry = FunctionRegistry() + +registry.register_extension_dict(yaml.safe_load(content), uri="test") + + +def i8(): + return Type(i8=Type.I8()) + + +def i16(): + return Type(i16=Type.I16()) + + +def bool(): + return Type(bool=Type.Boolean()) + + +def decimal(scale, precision): + return Type(decimal=Type.Decimal(scale=scale, precision=precision)) + + +def test_non_existing_uri(): + assert ( + registry.lookup_function( + uri="non_existent", function_name="add", signature=[i8(), i8()] + ) + is None + ) + + +def test_non_existing_function(): + assert ( + registry.lookup_function( + uri="test", function_name="sub", signature=[i8(), i8()] + ) + is None + ) + + +def test_non_existing_function_signature(): + assert ( + registry.lookup_function(uri="test", function_name="add", signature=[i8()]) + is None + ) + + +def test_exact_match(): + assert registry.lookup_function( + uri="test", function_name="add", signature=[i8(), i8()] + )[1] == Type(i8=Type.I8()) + + +def test_wildcard_match(): + assert registry.lookup_function( + uri="test", function_name="add", signature=[i8(), i8(), bool()] + )[1] == Type(i16=Type.I16()) + + +def test_wildcard_match_fails_with_constraits(): + assert ( + registry.lookup_function( + uri="test", function_name="add", signature=[i8(), i16(), i16()] + ) + is None + ) + + +def test_wildcard_match_with_constraits(): + assert ( + registry.lookup_function( + uri="test", function_name="add", signature=[i16(), i16(), i8()] + )[1] + == i8() + ) + + +def test_variadic(): + assert ( + registry.lookup_function( + uri="test", function_name="test_fn", signature=[i8(), i8(), i8()] + )[1] + == i8() + ) + + +def test_variadic_any(): + assert ( + registry.lookup_function( + uri="test", + function_name="test_fn_variadic_any", + signature=[i16(), i16(), i16()], + )[1] + == i16() + ) + + +def test_variadic_fails_min_constraint(): + assert ( + registry.lookup_function(uri="test", function_name="test_fn", signature=[i8()]) + is None + ) + + +def test_decimal_happy_path(): + assert registry.lookup_function( + uri="test", + function_name="test_decimal", + signature=[decimal(10, 8), decimal(8, 6)], + )[1] == decimal(11, 7) + + +def test_decimal_violates_constraint(): + assert ( + registry.lookup_function( + uri="test", + function_name="test_decimal", + signature=[decimal(10, 8), decimal(12, 10)], + ) + is None + ) From 0e1f582c6aa0b52c4b3b4d673fc5336e528a0029 Mon Sep 17 00:00:00 2001 From: tokoko Date: Wed, 2 Oct 2024 21:47:28 +0000 Subject: [PATCH 2/2] fix: update lock file --- requirements.lock | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.lock b/requirements.lock index fe65811..42069c4 100644 --- a/requirements.lock +++ b/requirements.lock @@ -47,6 +47,8 @@ pyarrow-hotfix==0.6 # via ibis-framework pygments==2.18.0 # via rich +pyparsing==3.1.4 + # via subframe (pyproject.toml) pytest==8.3.3 # via subframe (pyproject.toml) python-dateutil==2.9.0.post0