Skip to content

Commit

Permalink
Merge branch 'main' of github.com:snowflakedb/snowpark-python into jk…
Browse files Browse the repository at this point in the history
…ew/numpy.full_like
  • Loading branch information
sfc-gh-jkew committed Oct 24, 2024
2 parents f832d52 + 9670b1f commit aa40999
Show file tree
Hide file tree
Showing 44 changed files with 763 additions and 382 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/daily_modin_precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@ jobs:
.tox/.coverage
.tox/coverage.xml
test-enable-cte-optimization:
name: Test Enable CTE Optimization modin-${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
test-disable-cte-optimization:
name: Test Disable CTE Optimization modin-${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
needs: build
runs-on: ${{ matrix.os.image_name }}
strategy:
Expand Down Expand Up @@ -287,15 +287,15 @@ jobs:
env:
PYTHON_VERSION: ${{ matrix.python-version }}
cloud_provider: ${{ matrix.cloud-provider }}
PYTEST_ADDOPTS: --color=yes --tb=short --enable_cte_optimization
PYTEST_ADDOPTS: --color=yes --tb=short --disable_cte_optimization
TOX_PARALLEL_NO_SPINNER: 1
shell: bash
- name: Run Snowpark pandas API tests (excluding doctests)
run: python -m tox -e "py${PYTHON_VERSION/\./}-snowparkpandasdailynotdoctest-modin-ci"
env:
PYTHON_VERSION: ${{ matrix.python-version }}
cloud_provider: ${{ matrix.cloud-provider }}
PYTEST_ADDOPTS: --color=yes --tb=short --enable_cte_optimization --skip_sql_count_check --ignore=tests/integ/modin/test_sql_counter.py
PYTEST_ADDOPTS: --color=yes --tb=short --disable_cte_optimization --skip_sql_count_check --ignore=tests/integ/modin/test_sql_counter.py
TOX_PARALLEL_NO_SPINNER: 1
shell: bash
- name: Combine coverages
Expand All @@ -306,7 +306,7 @@ jobs:
- uses: actions/upload-artifact@v4
with:
include-hidden-files: true
name: coverage_${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}-enable-cte-optimization
name: coverage_${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}-disable-cte-optimization
path: |
.tox/.coverage
.tox/coverage.xml
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/daily_precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -383,8 +383,8 @@ jobs:
.tox/.coverage
.tox/coverage.xml
test-enable-cte-optimization:
name: Test Enable CTE Optimization py-${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
test-disable-cte-optimization:
name: Test Disable CTE Optimization py-${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
needs: build
runs-on: ${{ matrix.os.image_name }}
strategy:
Expand Down Expand Up @@ -428,15 +428,15 @@ jobs:
env:
PYTHON_VERSION: ${{ matrix.python-version }}
cloud_provider: ${{ matrix.cloud-provider }}
PYTEST_ADDOPTS: --color=yes --tb=short --enable_cte_optimization
PYTEST_ADDOPTS: --color=yes --tb=short --disable_cte_optimization
TOX_PARALLEL_NO_SPINNER: 1
shell: bash
- name: Run tests (excluding doctests)
run: python -m tox -e "py${PYTHON_VERSION/\./}-dailynotdoctest-ci"
env:
PYTHON_VERSION: ${{ matrix.python-version }}
cloud_provider: ${{ matrix.cloud-provider }}
PYTEST_ADDOPTS: --color=yes --tb=short --enable_cte_optimization
PYTEST_ADDOPTS: --color=yes --tb=short --disable_cte_optimization
TOX_PARALLEL_NO_SPINNER: 1
shell: bash
- name: Combine coverages
Expand All @@ -447,7 +447,7 @@ jobs:
- uses: actions/upload-artifact@v4
with:
include-hidden-files: true
name: coverage_${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}-enable-cte-optimization
name: coverage_${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}-disable-cte-optimization
path: |
.tox/.coverage
.tox/coverage.xml
Expand Down
59 changes: 0 additions & 59 deletions .github/workflows/precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -246,64 +246,6 @@ jobs:
.tox/.coverage
.tox/coverage.xml
test-enable-cte-optimization:
name: Test Enable CTE Optimization py-${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
needs: build
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest-64-cores]
python-version: ["3.9"]
cloud-provider: [aws]
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Display Python version
run: python -c "import sys; print(sys.version)"
- name: Decrypt parameters.py
shell: bash
run: .github/scripts/decrypt_parameters.sh
env:
PARAMETER_PASSWORD: ${{ secrets.PARAMETER_PASSWORD }}
CLOUD_PROVIDER: ${{ matrix.cloud-provider }}
- name: Download wheel(s)
uses: actions/download-artifact@v4
with:
name: wheel
path: dist
- name: Show wheels downloaded
run: ls -lh dist
shell: bash
- name: Upgrade setuptools, pip and wheel
run: python -m pip install -U setuptools pip wheel
- name: Install tox
run: python -m pip install tox
- name: Run tests (excluding doctests)
run: python -m tox -e "py${PYTHON_VERSION/\./}-notdoctest-ci"
env:
PYTHON_VERSION: ${{ matrix.python-version }}
cloud_provider: ${{ matrix.cloud-provider }}
PYTEST_ADDOPTS: --color=yes --tb=short --enable_cte_optimization
TOX_PARALLEL_NO_SPINNER: 1
shell: bash
- name: Combine coverages
run: python -m tox -e coverage --skip-missing-interpreters false
shell: bash
env:
SNOWFLAKE_IS_PYTHON_RUNTIME_TEST: 1
- uses: actions/upload-artifact@v4
with:
include-hidden-files: true
name: coverage_${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}-enable-cte-optimization
path: |
.tox/.coverage
.tox/coverage.xml
test-snowpark-pandas:
name: Test modin-${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}
needs: build
Expand Down Expand Up @@ -604,7 +546,6 @@ jobs:
needs:
- test
- test-local-testing
- test-enable-cte-optimization
- test-snowpark-pandas
- test-modin-extra-without-pandas-extra
- test-snowpark-multithreading-mode
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
- Fixed a bug where the automatic cleanup of temporary tables could interfere with the results of async query execution.
- Fixed a bug in `DataFrame.analytics.time_series_agg` function to handle multiple data points in same sliding interval.

#### Deprecations:

- Deprecated warnings will be triggered when using snowpark-python with Python 3.8. For more details, please refer to https://docs.snowflake.com/en/developer-guide/python-runtime-support-policy.

### Snowpark pandas API Updates

#### New Features
Expand Down
37 changes: 33 additions & 4 deletions src/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
#
import doctest
import logging
import os
import sys
Expand All @@ -15,21 +16,48 @@

RUNNING_ON_GH = os.getenv("GITHUB_ACTIONS") == "true"
TEST_SCHEMA = "GH_JOB_{}".format(str(uuid.uuid4()).replace("-", "_"))
LOCAL_TESTING_MODE = False


def pytest_addoption(parser):
if not any(
"--local_testing_mode" in opt.names() for opt in parser._anonymous.options
):
parser.addoption("--local_testing_mode", action="store_true", default=False)


def pytest_runtest_makereport(item, call):
from _pytest.runner import pytest_runtest_makereport as _pytest_runtest_makereport

tr = _pytest_runtest_makereport(item, call)

if call.excinfo is not None and LOCAL_TESTING_MODE:
if call.excinfo.type == doctest.UnexpectedException and isinstance(
call.excinfo.value.exc_info[1], NotImplementedError
):
tr.outcome = "skipped"
tr.wasxfail = "[Local Testing] Function has not been implemented yet."

return tr


# scope is module so that we ensure we delete the session before
# moving onto running the tests in the tests dir. Having only one
# session is important to certain UDF tests to pass , since they
# use the @udf decorator
@pytest.fixture(autouse=True, scope="module")
def add_snowpark_session(doctest_namespace):
def add_snowpark_session(doctest_namespace, pytestconfig):
global LOCAL_TESTING_MODE
LOCAL_TESTING_MODE = pytestconfig.getoption("local_testing_mode")
sys.path.append("tests/")
with open("tests/parameters.py", encoding="utf-8") as f:
exec(f.read(), globals())
with Session.builder.configs(
globals()["CONNECTION_PARAMETERS"]
with Session.builder.configs(globals()["CONNECTION_PARAMETERS"]).config(
"local_testing", LOCAL_TESTING_MODE
).create() as session:
session.sql_simplifier_enabled = os.environ.get("USE_SQL_SIMPLIFIER") == "1"
session.sql_simplifier_enabled = (
os.environ.get("USE_SQL_SIMPLIFIER") == "1" or LOCAL_TESTING_MODE
)
if RUNNING_ON_GH:
session.sql(f"CREATE SCHEMA IF NOT EXISTS {TEST_SCHEMA}").collect()
# This is needed for test_get_schema_database_works_after_use_role in test_session_suite
Expand All @@ -39,5 +67,6 @@ def add_snowpark_session(doctest_namespace):
session.use_schema(TEST_SCHEMA)
doctest_namespace["session"] = session
yield
LOCAL_TESTING_MODE = False
if RUNNING_ON_GH:
session.sql(f"DROP SCHEMA IF EXISTS {TEST_SCHEMA}").collect()
24 changes: 24 additions & 0 deletions src/snowflake/snowpark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
]


import sys
import warnings

from snowflake.snowpark.version import VERSION

__version__ = ".".join(str(x) for x in VERSION if x is not None)
Expand Down Expand Up @@ -69,3 +72,24 @@
WhenNotMatchedClause,
)
from snowflake.snowpark.window import Window, WindowSpec

_deprecation_warning_msg = (
"Python Runtime 3.8 reached its End-Of-Life (EOL) on October 14, 2024, there will be no further bug fixes "
"or security updates for this runtime. We recommend that you upgrade your existing Python 3.8 objects to "
"Python 3.9, 3.10 or 3.11 before March 31, 2025. Please note that end of support does not impact execution, "
"and you will still be able to update and invoke existing objects. "
"However, they will be running on an unsupported runtime which will no longer be maintained or patched by "
"the Snowflake team. For more details, please refer "
"to https://docs.snowflake.com/en/developer-guide/python-runtime-support-policy."
)
warnings.filterwarnings(
"once", # ensure the warning is only shown once to avoid warning explosion
message=_deprecation_warning_msg,
)

if sys.version_info.major == 3 and sys.version_info.minor == 8:
warnings.warn(
_deprecation_warning_msg,
category=DeprecationWarning,
stacklevel=2,
)
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ class PlanState(Enum):
# the number of SelectStatement nodes in the plan that have
# _merge_projection_complexity_with_subquery set to True
NUM_SELECTS_WITH_COMPLEXITY_MERGED = "num_selects_with_complexity_merged"
# number of cte nodes detected
NUM_CTE_NODES = "num_cte_nodes"
# node complexity distribution for the duplicated nodes that detected as cte
# NOTE: this is not the cte node complexity distribution, in other words, if a
# node occurs twice, it will be counted twice
DUPLICATED_NODE_COMPLEXITY_DISTRIBUTION = "duplicated_node_distribution"


def sum_node_complexities(
Expand Down
37 changes: 20 additions & 17 deletions src/snowflake/snowpark/_internal/analyzer/select_statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from collections import UserDict, defaultdict
from copy import copy, deepcopy
from enum import Enum
from functools import cached_property
from functools import cached_property, reduce
from typing import (
TYPE_CHECKING,
AbstractSet,
Expand All @@ -34,7 +34,10 @@
TableFunctionRelation,
)
from snowflake.snowpark._internal.analyzer.window_expression import WindowExpression
from snowflake.snowpark._internal.compiler.cte_utils import encode_node_id_with_query
from snowflake.snowpark._internal.compiler.cte_utils import (
encode_node_id_with_query,
merge_referenced_ctes,
)
from snowflake.snowpark._internal.error_message import SnowparkClientExceptionMessages
from snowflake.snowpark.types import DataType

Expand Down Expand Up @@ -319,10 +322,6 @@ def get_snowflake_plan(self, skip_schema_query) -> SnowflakePlan:
def plan_state(self) -> Dict[PlanState, Any]:
return self.snowflake_plan.plan_state

@property
def num_duplicate_nodes(self) -> int:
return self.snowflake_plan.num_duplicate_nodes

@property
def cumulative_node_complexity(self) -> Dict[PlanNodeCategory, int]:
if self._cumulative_node_complexity is None:
Expand Down Expand Up @@ -370,8 +369,9 @@ def column_states(self, value: ColumnStateDict):

@property
@abstractmethod
def referenced_ctes(self) -> Set[WithQueryBlock]:
"""Return the set of ctes referenced by the whole selectable subtree, includes its-self and children"""
def referenced_ctes(self) -> Dict[WithQueryBlock, int]:
"""Return the dict of ctes referenced by the whole selectable subtree and the
reference count of the cte. Includes itself and its children"""
pass


Expand Down Expand Up @@ -422,10 +422,10 @@ def query_params(self) -> Optional[Sequence[Any]]:
return None

@property
def referenced_ctes(self) -> Set[WithQueryBlock]:
def referenced_ctes(self) -> Dict[WithQueryBlock, int]:
# the SelectableEntity only allows select from base table. No
# CTE table will be referred.
return set()
return dict()


class SelectSQL(Selectable):
Expand Down Expand Up @@ -513,10 +513,10 @@ def to_subqueryable(self) -> "SelectSQL":
return new

@property
def referenced_ctes(self) -> Set[WithQueryBlock]:
def referenced_ctes(self) -> Dict[WithQueryBlock, int]:
# SelectSQL directly calls sql query, there will be no
# auto created CTE tables referenced
return set()
return dict()


class SelectSnowflakePlan(Selectable):
Expand Down Expand Up @@ -588,7 +588,7 @@ def reset_cumulative_node_complexity(self) -> None:
self.snowflake_plan.reset_cumulative_node_complexity()

@property
def referenced_ctes(self) -> Set[WithQueryBlock]:
def referenced_ctes(self) -> Dict[WithQueryBlock, int]:
return self._snowflake_plan.referenced_ctes


Expand Down Expand Up @@ -863,7 +863,7 @@ def cumulative_node_complexity(self, value: Dict[PlanNodeCategory, int]):
self._cumulative_node_complexity = value

@property
def referenced_ctes(self) -> Set[WithQueryBlock]:
def referenced_ctes(self) -> Dict[WithQueryBlock, int]:
return self.from_.referenced_ctes

def to_subqueryable(self) -> "Selectable":
Expand Down Expand Up @@ -1311,7 +1311,7 @@ def reset_cumulative_node_complexity(self) -> None:
self.snowflake_plan.reset_cumulative_node_complexity()

@property
def referenced_ctes(self) -> Set[WithQueryBlock]:
def referenced_ctes(self) -> Dict[WithQueryBlock, int]:
return self._snowflake_plan.referenced_ctes


Expand Down Expand Up @@ -1402,9 +1402,12 @@ def individual_node_complexity(self) -> Dict[PlanNodeCategory, int]:
return {PlanNodeCategory.SET_OPERATION: len(self.set_operands) - 1}

@property
def referenced_ctes(self) -> Set[WithQueryBlock]:
def referenced_ctes(self) -> Dict[WithQueryBlock, int]:
# get a union of referenced cte tables from all child nodes
return set().union(*[node.referenced_ctes for node in self._nodes])
# and sum up the reference counts
return reduce(
merge_referenced_ctes, [node.referenced_ctes for node in self._nodes]
)


class DeriveColumnDependencyError(Exception):
Expand Down
Loading

0 comments on commit aa40999

Please sign in to comment.