Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-evandenberg committed Sep 9, 2024
1 parent 26bff92 commit 4489b1a
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 38 deletions.
2 changes: 1 addition & 1 deletion tests/integ/modin/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def sql_counter():
# extending the sql counts in the future.
#
# The following line must be commented when merged into main.
@pytest.fixture(autouse=True)
# @pytest.fixture(autouse=True)
def auto_annotate_sql_counter(request):
counter = SqlCounter()

Expand Down
53 changes: 20 additions & 33 deletions tests/integ/modin/frame/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import numpy as np
import pandas as native_pd
import pytest
from tests.sql_counter import sql_count_checker
from pytest import param

import snowflake.snowpark.modin.plugin # noqa: F401
Expand Down Expand Up @@ -66,7 +65,6 @@
"data, func, return_type", BASIC_DATA_FUNC_PYTHON_RETURN_TYPE_MAP
)
@pytest.mark.modin_sp_precommit
@sql_count_checker(query_count=5, join_count=2, udtf_count=1, select_count=2, describe_count=16)
def test_axis_1_basic_types_without_type_hints(data, func, return_type):
# this test processes functions without type hints and invokes the UDTF solution.
native_df = native_pd.DataFrame(data, columns=["A", "b"])
Expand All @@ -79,7 +77,6 @@ def test_axis_1_basic_types_without_type_hints(data, func, return_type):
"data, func, return_type", BASIC_DATA_FUNC_PYTHON_RETURN_TYPE_MAP
)
@pytest.mark.modin_sp_precommit
@sql_count_checker(query_count=4, udf_count=1, select_count=2, describe_count=4)
def test_axis_1_basic_types_with_type_hints(data, func, return_type):
# create explicitly for supported python types UDF with type hints and process via vUDF.
native_df = native_pd.DataFrame(data, columns=["A", "b"])
Expand Down Expand Up @@ -111,7 +108,6 @@ def test_axis_1_basic_types_with_type_hints(data, func, return_type):
),
],
)
@sql_count_checker(query_count=4, udf_count=1, select_count=2, describe_count=4)
def test_axis_1_index_passed_as_name(df, row_label):
# when using apply(axis=1) the original index of the dataframe is passed as name.
# test here for this for regular index and multi-index scenario.
Expand All @@ -124,7 +120,7 @@ def foo(row) -> str:

snow_df = pd.DataFrame(df)
# Invoking a single UDF typically requires 3 queries (package management, code upload, UDF registration) upfront.
with SqlCounter(query_count=4, join_count=0, udtf_count=0, describe_count=0):
with SqlCounter(query_count=4, join_count=0, udtf_count=0):
eval_snowpark_pandas_result(snow_df, df, lambda x: x.apply(foo, axis=1))


Expand Down Expand Up @@ -205,14 +201,14 @@ def foo(row) -> str:
],
],
)
@sql_count_checker(query_count=5, join_count=2, udtf_count=1, select_count=2, describe_count=16)
@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
def test_axis_1_date_time_timestamp_type(data, func, expected_result):
snow_df = pd.DataFrame(data)
result = snow_df.apply(func, axis=1)
assert_snowpark_pandas_equal_to_pandas(result, expected_result)


@sql_count_checker(query_count=5, join_count=2, udtf_count=1, select_count=2, describe_count=16)
@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
def test_axis_1_return_list():
snow_df = pd.DataFrame([[1, 2], [3, 4]])
native_df = native_pd.DataFrame([[1, 2], [3, 4]])
Expand All @@ -221,7 +217,7 @@ def test_axis_1_return_list():
)


@sql_count_checker(query_count=5, join_count=2, udtf_count=1, select_count=2, describe_count=16)
@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
def test_axis_1_return_series():
snow_df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "b"])
native_df = native_pd.DataFrame([[1, 2], [3, 4]], columns=["A", "b"])
Expand All @@ -232,7 +228,7 @@ def test_axis_1_return_series():
)


@sql_count_checker(query_count=5, join_count=2, udtf_count=1, select_count=2, describe_count=16)
@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
def test_axis_1_return_series_with_different_label_results():
df = native_pd.DataFrame([[1, 2], [3, 4]], columns=["A", "b"])
snow_df = pd.DataFrame(df)
Expand Down Expand Up @@ -267,13 +263,13 @@ def test_axis_1_return_series_with_different_label_results():
),
],
)
@sql_count_checker(query_count=5, join_count=2, udtf_count=1, select_count=2, describe_count=16)
@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
def test_axis_1_column_labels(native_df, func):
snow_df = pd.DataFrame(native_df)
eval_snowpark_pandas_result(snow_df, native_df, lambda x: x.apply(func, axis=1))


@sql_count_checker(query_count=5, join_count=2, udtf_count=1, select_count=2, describe_count=16)
@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
def test_axis_1_raw():
snow_df = pd.DataFrame([[1, 2], [3, 4]])
native_df = native_pd.DataFrame([[1, 2], [3, 4]])
Expand All @@ -282,7 +278,7 @@ def test_axis_1_raw():
)


@sql_count_checker(query_count=6, select_count=2, describe_count=14)
@sql_count_checker(query_count=6)
def test_axis_1_return_not_json_serializable_label():
snow_df = pd.DataFrame([1])
with pytest.raises(
Expand All @@ -300,7 +296,6 @@ def test_axis_1_return_not_json_serializable_label():
snow_df.apply(lambda x: native_pd.DataFrame([1, 2]), axis=1).to_pandas()


@sql_count_checker(query_count=14, udf_count=2, select_count=6, describe_count=10)
def test_axis_1_apply_args_kwargs():
def f(x, y, z=1) -> int:
return x.sum() + y + z
Expand Down Expand Up @@ -345,22 +340,22 @@ class TestNotImplemented:
@pytest.mark.parametrize(
"data, func, return_type", BASIC_DATA_FUNC_PYTHON_RETURN_TYPE_MAP
)
@sql_count_checker(describe_count=1)
@sql_count_checker(query_count=0)
def test_axis_0(self, data, func, return_type):
snow_df = pd.DataFrame(data)
msg = "Snowpark pandas apply API doesn't yet support axis == 0"
with pytest.raises(NotImplementedError, match=msg):
snow_df.apply(func)

@pytest.mark.parametrize("result_type", ["reduce", "expand", "broadcast"])
@sql_count_checker(describe_count=1)
@sql_count_checker(query_count=0)
def test_result_type(self, result_type):
snow_df = pd.DataFrame([[1, 2], [3, 4]])
msg = "Snowpark pandas apply API doesn't yet support 'result_type' parameter"
with pytest.raises(NotImplementedError, match=msg):
snow_df.apply(lambda x: [1, 2], axis=1, result_type=result_type)

@sql_count_checker(describe_count=3)
@sql_count_checker(query_count=0)
def test_axis_1_apply_args_kwargs_with_snowpandas_object(self):
def f(x, y=None) -> native_pd.Series:
return x + (y if y is not None else 0)
Expand Down Expand Up @@ -420,7 +415,7 @@ def f(x, y=None) -> native_pd.Series:
lambda x: native_pd.Series([1, 2], index=TEST_INDEX_WITH_NULL_1),
],
)
@sql_count_checker(query_count=5, join_count=2, udtf_count=1, select_count=2, describe_count=16)
@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
def test_axis_1_multi_index_column_labels(apply_func):
data = [[i + j for j in range(0, 4)] for i in range(0, 4)]

Expand All @@ -432,7 +427,7 @@ def test_axis_1_multi_index_column_labels(apply_func):
)


@sql_count_checker(query_count=5, join_count=2, udtf_count=1, select_count=2, describe_count=16)
@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
def test_axis_1_multi_index_column_labels_with_different_results():
data = [[i + j for j in range(0, 4)] for i in range(0, 4)]

Expand All @@ -448,7 +443,6 @@ def test_axis_1_multi_index_column_labels_with_different_results():
eval_snowpark_pandas_result(snow_df, df, lambda df: df.apply(apply_func, axis=1))


@sql_count_checker(query_count=10, join_count=4, udtf_count=2, select_count=4, describe_count=31)
def test_axis_1_multi_index_column_labels_none_names():
data = [[i + j for j in range(0, 4)] for i in range(0, 4)]

Expand Down Expand Up @@ -476,7 +470,7 @@ def test_axis_1_multi_index_column_labels_none_names():
)


@sql_count_checker(query_count=5, join_count=2, udtf_count=1, select_count=2, describe_count=16)
@sql_count_checker(query_count=5, join_count=2, udtf_count=1)
def test_axis_1_multi_index_column_labels_different_lengths():
data = [[i + j for j in range(0, 4)] for i in range(0, 4)]

Expand All @@ -492,7 +486,7 @@ def test_axis_1_multi_index_column_labels_different_lengths():
eval_snowpark_pandas_result(snow_df, df, lambda df: df.apply(apply_func, axis=1))


@sql_count_checker(query_count=3, select_count=1, describe_count=8)
@sql_count_checker(query_count=3)
def test_axis_1_multi_index_column_labels_different_levels_negative():
data = [[i + j for j in range(0, 4)] for i in range(0, 4)]

Expand Down Expand Up @@ -522,7 +516,6 @@ def test_axis_1_multi_index_column_labels_different_levels_negative():
)


@sql_count_checker(query_count=10, join_count=4, udtf_count=2, select_count=4, describe_count=31)
def test_apply_variant_json_null():
# series -> scalar
def f(v):
Expand Down Expand Up @@ -582,7 +575,7 @@ def g(v):

@pytest.mark.modin_sp_precommit
@pytest.mark.parametrize("data, apply_func", TRANSFORM_DATA_FUNC_MAP)
@sql_count_checker(describe_count=1)
@sql_count_checker(query_count=0)
def test_basic_dataframe_transform(data, apply_func):
msg = "Snowpark pandas apply API doesn't yet support axis == 0"
with pytest.raises(NotImplementedError, match=msg):
Expand All @@ -602,7 +595,7 @@ def test_basic_dataframe_transform(data, apply_func):


@pytest.mark.parametrize("func", AGGREGATION_FUNCTIONS)
@sql_count_checker(describe_count=1)
@sql_count_checker(query_count=0)
def test_dataframe_transform_aggregation_negative(func):
snow_df = pd.DataFrame([[0, 1, 2], [1, 2, 3]])
with pytest.raises(
Expand All @@ -612,7 +605,7 @@ def test_dataframe_transform_aggregation_negative(func):
snow_df.transform(func)


@sql_count_checker(describe_count=1)
@sql_count_checker(query_count=0)
def test_dataframe_transform_invalid_function_name_negative(session):
snow_df = pd.DataFrame([[0, 1, 2], [1, 2, 3]])
with pytest.raises(
Expand All @@ -632,7 +625,7 @@ def test_dataframe_transform_invalid_function_name_negative(session):


@pytest.mark.parametrize("func", INVALID_TYPES_FOR_TRANSFORM)
@sql_count_checker(describe_count=1)
@sql_count_checker(query_count=0)
def test_dataframe_transform_invalid_types_negative(func):
snow_df = pd.DataFrame([[0, 1, 2], [1, 2, 3]])
with pytest.raises(
Expand All @@ -651,7 +644,6 @@ def test_dataframe_transform_invalid_types_negative(func):
join_count=0,
)
@pytest.mark.parametrize("is_sorted", [True, False])
@sql_count_checker(query_count=8, udf_count=2, select_count=4, describe_count=11)
def test_fix_1001470(is_sorted):
test_df = pd.DataFrame({"income": [5000, 15000, 30000], "col": [3, 2, 1]})
if is_sorted:
Expand All @@ -672,7 +664,6 @@ def foo(row) -> float:
assert np.array_equal(ans1, ans2)


@sql_count_checker(query_count=9, join_count=3, udtf_count=1, select_count=3, describe_count=22)
def test_bug_SNOW_1172448():
# This test case checks that reading a table + apply work together. Before SNOW-1172448 there
# was a bug where the join within _apply_with_udtf_and_dynamic_pivot_along_axis_1 would fail
Expand Down Expand Up @@ -717,7 +708,6 @@ def foo(row):
high_count_expected=True,
high_count_reason="upload of larger data, udtf registration, additional temp table creation till bugfix in SNOW-1060191 is in",
)
@sql_count_checker(query_count=13, join_count=3, udtf_count=1, select_count=2, describe_count=19)
def test_dataframe_relative_to_default_partition_size_with_apply_udtf(data):
# test here that a Dataframe with size <, =, > than the default udtf partition size gets processed correctly.
df = pd.DataFrame(data, columns=["A", "B", "C"])
Expand All @@ -731,7 +721,6 @@ def foo(row):
assert len(df) == len(data)


@sql_count_checker(query_count=3, select_count=1, describe_count=8)
def test_with_duplicates_negative():
df = native_pd.DataFrame([[1, 2], [3, 4]])
snow_df = pd.DataFrame(df)
Expand Down Expand Up @@ -760,7 +749,6 @@ def foo(x):

@pytest.mark.parametrize("partition_size", [1, 2])
@pytest.mark.parametrize("data", [{"a": [1], "b": [2]}, {"a": [2], "b": [3]}])
@sql_count_checker(query_count=5, join_count=2, udtf_count=1, select_count=2, describe_count=16)
def test_apply_axis_1_with_if_where_duplicates_not_executed(partition_size, data):
df = native_pd.DataFrame(data)
snow_df = pd.DataFrame(df)
Expand Down Expand Up @@ -817,6 +805,7 @@ def test_numpy_integers_in_return_values_snow_1227264(return_value):
np.nan,
],
)
@sql_count_checker(query_count=5, udtf_count=1, join_count=2)
def test_apply_axis_1_frame_with_column_of_all_nulls_snow_1233832(null_value):
eval_snowpark_pandas_result(
*create_test_dfs({"null_col": [null_value], "int_col": [1]}),
Expand All @@ -837,7 +826,6 @@ def test_apply_axis_1_frame_with_column_of_all_nulls_snow_1233832(null_value):
# [scipy, np], 9),
],
)
@sql_count_checker(query_count=7, udf_count=1, select_count=3, describe_count=5)
def test_apply_axis1_with_3rd_party_libraries_and_decorator(
packages, expected_query_count
):
Expand Down Expand Up @@ -879,7 +867,6 @@ def func(row):
@pytest.mark.xfail(
reason="TODO: SNOW-1261830 need to support PandasSeriesType annotation."
)
@sql_count_checker(query_count=6, select_count=2, describe_count=5)
def test_apply_axis1_with_dynamic_pivot_and_with_3rd_party_libraries_and_decorator(
packages, expected_query_count
):
Expand Down
8 changes: 4 additions & 4 deletions tests/integ/modin/test_sql_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,22 +80,22 @@ def test_sql_counter_with_context_manager_inside_loop():

@sql_count_checker(no_check=True)
def test_sql_counter_with_multiple_checks():
with SqlCounter(query_count=1):
with SqlCounter(query_count=1, describe_count=1):
df = pd.DataFrame({"a": [1, 2, 3]})
assert len(df) == 3

with SqlCounter(query_count=1):
with SqlCounter(query_count=1, describe_count=1):
df = pd.DataFrame({"b": [4, 5, 6]})
assert len(df) == 3

with SqlCounter(query_count=1):
with SqlCounter(query_count=1, describe_count=1):
df = pd.DataFrame({"c": [7, 8, 9]})
assert len(df) == 3


@sql_count_checker(no_check=True)
def test_sql_counter_with_context_manager_outside_loop():
sc = SqlCounter(query_count=3)
sc = SqlCounter(query_count=3, describe_count=3)
sc.__enter__()
for _ in range(3):
df = pd.DataFrame({"a": [1, 2, 3]})
Expand Down

0 comments on commit 4489b1a

Please sign in to comment.