Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-jdu committed Oct 22, 2024
1 parent cf91b8d commit 7751757
Show file tree
Hide file tree
Showing 19 changed files with 111 additions and 101 deletions.
22 changes: 11 additions & 11 deletions tests/integ/modin/crosstab/test_crosstab.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def test_margins(self, dropna, a, b, c):
@pytest.mark.parametrize("normalize", [0, 1, True, "all", "index", "columns"])
def test_normalize(self, dropna, normalize, a, b, c):
query_count = 1 if normalize in (0, "index") else 2
join_count = 3 if normalize in (0, "index") else 2
join_count = 3 if normalize in (0, "index") and dropna else 2
if dropna:
join_count -= 2

Expand All @@ -340,9 +340,9 @@ def test_normalize(self, dropna, normalize, a, b, c):
@pytest.mark.parametrize("normalize", [0, 1, True, "all", "index", "columns"])
def test_normalize_and_margins(self, dropna, normalize, a, b, c):
counts = {
"columns": [3, 5 if dropna else 9, 4],
"index": [1, 5 if dropna else 8, 3],
"all": [3, 12 if dropna else 19, 7],
"columns": [3, 4 if dropna else 7, 3],
"index": [1, 3 if dropna else 4, 1],
"all": [3, 7 if dropna else 10, 3],
}
counts[0] = counts["index"]
counts[1] = counts["columns"]
Expand Down Expand Up @@ -374,8 +374,8 @@ def test_normalize_and_margins(self, dropna, normalize, a, b, c):
@pytest.mark.parametrize("aggfunc", ["count", "mean", "min", "max", "sum"])
def test_normalize_margins_and_values(self, dropna, normalize, aggfunc, a, b, c):
counts = {
"columns": [3, 29 if dropna else 41, 4],
"index": [1, 23 if dropna else 32, 3],
"columns": [3, 10 if dropna else 13, 3],
"index": [1, 5 if dropna else 6, 1],
"all": [3, 54 if dropna else 75, 7],
}
counts[0] = counts["index"]
Expand Down Expand Up @@ -451,9 +451,9 @@ def eval_func(lib):
@pytest.mark.parametrize("aggfunc", ["count", "mean", "min", "max", "sum"])
def test_normalize_and_values(self, dropna, normalize, aggfunc, a, b, c):
counts = {
"columns": [2, 4 if dropna else 10],
"index": [1, 5 if dropna else 11],
"all": [2, 4 if dropna else 10],
"columns": [2, 4 if dropna else 6],
"index": [1, 3 if dropna else 4],
"all": [2, 4 if dropna else 6],
}
counts[0] = counts["index"]
counts[1] = counts["columns"]
Expand Down Expand Up @@ -520,7 +520,7 @@ def test_normalize_margins_and_values_not_supported(
@pytest.mark.parametrize("aggfunc", ["count", "mean", "min", "max", "sum"])
def test_values(self, dropna, aggfunc, basic_crosstab_dfs):
query_count = 1
join_count = 2 if dropna else 5
join_count = 2 if dropna else 3
native_df = basic_crosstab_dfs[0]

with SqlCounter(query_count=query_count, join_count=join_count):
Expand All @@ -539,7 +539,7 @@ def test_values(self, dropna, aggfunc, basic_crosstab_dfs):
@pytest.mark.parametrize("aggfunc", ["count", "mean", "min", "max", "sum"])
def test_values_series_like(self, dropna, aggfunc, basic_crosstab_dfs):
query_count = 5
join_count = 2 if dropna else 5
join_count = 2 if dropna else 3
native_df, snow_df = basic_crosstab_dfs

def eval_func(df):
Expand Down
8 changes: 4 additions & 4 deletions tests/integ/modin/frame/test_assign.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def assign_func(df):

@pytest.mark.parametrize("new_col_value", [2, [10, 11, 12], "x"])
def test_assign_basic_non_pandas_object(new_col_value):
join_count = 4 if isinstance(new_col_value, list) else 1
join_count = 3 if isinstance(new_col_value, list) else 1
with SqlCounter(query_count=1, join_count=join_count):
snow_df, native_df = create_test_dfs(
[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
Expand All @@ -74,7 +74,7 @@ def test_assign_basic_non_pandas_object(new_col_value):
)


@sql_count_checker(query_count=1, join_count=4)
@sql_count_checker(query_count=1, join_count=3)
def test_assign_invalid_long_column_length_negative():
# pandas errors out in this test, since we are attempting to assign a column of length 5 to a DataFrame with length 3.
# Snowpark pandas on the other hand, just truncates the last element of the new column so that it is the correct length. If we wanted
Expand All @@ -98,7 +98,7 @@ def test_assign_invalid_long_column_length_negative():
assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(snow_df, native_df)


@sql_count_checker(query_count=1, join_count=4)
@sql_count_checker(query_count=1, join_count=3)
def test_assign_invalid_short_column_length_negative():
# pandas errors out in this test, since we are attempting to assign a column of length 2 to a DataFrame with length 3.
# Snowpark pandas on the other hand, just broadcasts the last element of the new column so that it is filled. If we wanted
Expand Down Expand Up @@ -226,7 +226,7 @@ def test_assign_self_columns():
)


@sql_count_checker(query_count=1, join_count=4)
@sql_count_checker(query_count=1, join_count=3)
def test_overwrite_columns_via_assign():
snow_df, native_df = create_test_dfs(
[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
Expand Down
4 changes: 2 additions & 2 deletions tests/integ/modin/frame/test_cache_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def test_cache_result_simple(self, inplace):
native_df = perform_chained_operations(
native_pd.DataFrame(np.arange(15).reshape((3, 5))), native_pd
)
with SqlCounter(query_count=1, union_count=29):
with SqlCounter(query_count=1, union_count=11):
snow_df = perform_chained_operations(snow_df, pd)
assert_snowpark_pandas_equals_to_pandas_without_dtypecheck(
snow_df, native_df
Expand Down Expand Up @@ -213,7 +213,7 @@ def test_cache_result_post_applymap(self, inplace, simple_test_data):
with SqlCounter(
query_count=11,
union_count=9,
udf_count=2,
udf_count=1,
high_count_expected=True,
high_count_reason="applymap requires additional queries to setup the UDF.",
):
Expand Down
26 changes: 13 additions & 13 deletions tests/integ/modin/frame/test_describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,19 @@ def test_describe_numeric_only(data):
# In total, we thus have 2 + 2 * (N - 1 + N) + 1 = 4N + 1 UNIONs for an N-column frame.
[
# If there are multiple modes, return the value that appears first
({"a": ["k", "j", "j", "k"], "b": ["y", "y", "y", "z"]}, 9),
({"a": ["k", "j", "j", "k"], "b": ["y", "y", "y", "z"]}, 5),
# Empty columns are numeric by default (df constructor must explicitly specify object dtype)
({"a": [], "b": []}, 9),
({"a": [], "b": []}, 5),
# Heterogeneous data is considered non-numeric
({2: ["string", 0, None], -1: [1.1, 2.2, "hello"], 0: [None, None, None]}, 13),
({2: ["string", 0, None], -1: [1.1, 2.2, "hello"], 0: [None, None, None]}, 6),
(
[
[None, "quick", None],
["fox", "quick", "lazy"],
["dog", "dog", "lazy"],
[None, None, None],
],
13,
6,
),
],
)
Expand Down Expand Up @@ -107,7 +107,7 @@ def test_describe_empty_cols():
# 4K-1 UNIONs to compute top/freq for K object-dtype columns (see comment on
# test_describe_obj_only for reasoning).
# Since we have K=2 object columns, the result is 9 + (4 * 2 - 1) = 16 UNIONs.
([int, object], None, None, 16),
([int, object], None, None, 12),
(np.number, [], None, 7),
# Including only datetimes has 7 statistics since std is not computed.
# Since there is only 1 column, all quantiles are computed in a single QC.
Expand All @@ -127,8 +127,8 @@ def test_describe_empty_cols():
# include and exclude cannot directly overlap
([int, "O"], [float, "O"], ValueError, 0),
# Like select_dtypes, a dtype in include/exclude can be a subtype of a dtype in the other
([int, "O"], [float, np.number, np.datetime64], None, 9),
("O", None, None, 9),
([int, "O"], [float, np.number, np.datetime64], None, 5),
("O", None, None, 5),
],
)
def test_describe_include_exclude(
Expand Down Expand Up @@ -285,9 +285,9 @@ def timestamp_describe_comparator(snow_res, native_res):
# Don't need to test all permutations of include/exclude with MultiIndex -- this is covered by
# tests for select_dtypes, as well as other tests in this file
[
("all", 16),
("all", 12),
(np.number, 7),
(object, 9),
(object, 5),
],
)
def test_describe_multiindex(index, columns, include, expected_union_count):
Expand All @@ -312,10 +312,10 @@ def test_describe_multiindex(index, columns, include, expected_union_count):
"include, exclude, expected_union_count",
[
(None, None, 7),
("all", None, 12),
("all", None, 11),
(np.number, None, 7),
(None, float, 10),
(object, None, 5),
(None, float, 9),
(object, None, 4),
(None, object, 7),
(int, float, 5),
(float, int, 5),
Expand Down Expand Up @@ -350,7 +350,7 @@ def helper(df):

@sql_count_checker(
query_count=3,
union_count=21,
union_count=8,
)
# SNOW-1320296 - pd.concat SQL Compilation ambigious __row_position__ issue
def test_describe_object_file(resources_path):
Expand Down
16 changes: 8 additions & 8 deletions tests/integ/modin/frame/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,7 @@ def loc_set_helper(df):
else:
df.loc[pd.Series(row_key), :] = pd.DataFrame(item)

expected_join_count = 4 if not row_key.dtype == bool else 2
expected_join_count = 3 if not row_key.dtype == bool else 2

with SqlCounter(query_count=1, join_count=expected_join_count):
eval_snowpark_pandas_result(pd.DataFrame(df), df, loc_set_helper, inplace=True)
Expand Down Expand Up @@ -851,7 +851,7 @@ def loc_set_helper(df):
df.loc[row_key, col_key] = item

expected_join_count = (
6 if isinstance(col_key, str) and isinstance(item, list) else 1
4 if isinstance(col_key, str) and isinstance(item, list) else 1
)

with SqlCounter(query_count=1, join_count=expected_join_count):
Expand Down Expand Up @@ -914,7 +914,7 @@ def test_df_loc_set_list_like_row_key(row_key, key_type):
)

expected_join_count = (
2 if all(isinstance(i, bool) for i in row_key) and len(row_key) > 0 else 4
2 if all(isinstance(i, bool) for i in row_key) and len(row_key) > 0 else 3
)

# test case for df.loc[row_key] = item
Expand Down Expand Up @@ -1220,7 +1220,7 @@ def loc_set_helper(df):
# otherwise, pandas raise ValueError: cannot reindex on an axis with duplicate labels
or (df.columns.equals(df.columns.union(col_key)))
):
query_count, join_count, expect_exception = 1, 4, False
query_count, join_count, expect_exception = 1, 3, False
if isinstance(col_key, native_pd.Series):
query_count += 1

Expand Down Expand Up @@ -2696,7 +2696,7 @@ def test_empty_df_loc_set_series_and_list(native_item):
else native_item
)

expected_join_count = 2 if isinstance(native_item, native_pd.Series) else 4
expected_join_count = 2 if isinstance(native_item, native_pd.Series) else 3

def setitem_op(df):
item = native_item if isinstance(df, native_pd.DataFrame) else snow_item
Expand Down Expand Up @@ -2761,7 +2761,7 @@ def set_loc_helper(df):
else:
df.loc[key] = native_item_df

expected_join_count = 1 if key == slice(None, None, None) else 4
expected_join_count = 1 if key == slice(None, None, None) else 3
with SqlCounter(query_count=1, join_count=expected_join_count):
eval_snowpark_pandas_result(snow_df, native_df, set_loc_helper, inplace=True)

Expand Down Expand Up @@ -3010,7 +3010,7 @@ def loc_set_helper(df):
len(row_key) - len(native_item)
)

expected_join_count = 4 if len(item) > 1 else 2
expected_join_count = 3 if len(item) > 1 else 2
# 4 extra queries for index, 1 for converting to native pandas in loc_set_helper, 2 for iter and 1 for tolist
with SqlCounter(
query_count=5 if item_type_name == "index" else 1,
Expand Down Expand Up @@ -3340,7 +3340,7 @@ def loc_set_helper(df):
else:
df.loc[snow_indexers] = item

expected_join_count = 4
expected_join_count = 3
if isinstance(indexer[0], slice):
expected_join_count = 1

Expand Down
2 changes: 1 addition & 1 deletion tests/integ/modin/frame/test_name.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_create_dataframe_from_object_with_name(sample):
)


@sql_count_checker(query_count=1, join_count=2, union_count=1)
@sql_count_checker(query_count=1, join_count=1, union_count=1)
def test_create_dataframe_from_snowpark_pandas_series():
df = pd.DataFrame([[2, 3, 4], [5, 6, 7]], columns=["X", "Y", "Z"])
df = pd.DataFrame([df.X, df.iloc[:, 2]])
Expand Down
8 changes: 4 additions & 4 deletions tests/integ/modin/frame/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def setitem(df):
else:
df[key] = val

expected_join_count = 3 if isinstance(key.start, int) else 4
expected_join_count = 3

with SqlCounter(query_count=1, join_count=expected_join_count):
eval_snowpark_pandas_result(snow_df, native_df, setitem, inplace=True)
Expand Down Expand Up @@ -246,7 +246,7 @@ def func_insert_new_column(df):


# matching_item_row_by_label is False here.
@sql_count_checker(query_count=2, join_count=8)
@sql_count_checker(query_count=2, join_count=6)
def test_df_setitem_array_value():
# Case: setting an array as a new column (df[col] = arr) copies that data
data = {"a": [1, 2, 3], "b": [4, 5, 6]}
Expand Down Expand Up @@ -376,7 +376,7 @@ def func_insert_new_column(df, column):
elif isinstance(column, native_pd.Index) and not isinstance(
column, native_pd.DatetimeIndex
):
expected_join_count = 4
expected_join_count = 3

if (
key == "a"
Expand Down Expand Up @@ -672,7 +672,7 @@ def helper(df):
def helper(df):
df["x"] = df.loc[df.b < 0, "b"]

with SqlCounter(query_count=1, join_count=3):
with SqlCounter(query_count=1, join_count=2):
eval_snowpark_pandas_result(snow_df, native_df, helper, inplace=True)


Expand Down
2 changes: 1 addition & 1 deletion tests/integ/modin/groupby/test_groupby_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def test_dataframe_groupby_transform_conflicting_labels_negative():

@sql_count_checker(
query_count=11,
join_count=10,
join_count=8,
udtf_count=2,
high_count_expected=True,
high_count_reason="performing two groupby transform operations that use UDTFs and compare with pandas",
Expand Down
22 changes: 11 additions & 11 deletions tests/integ/modin/pivot/test_pivot_table_dropna.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ def test_pivot_table_single_value_with_dropna(df_data_with_nulls, dropna, column
@pytest.mark.parametrize(
"aggfunc, expected_join_count",
[
("mean", 5),
({"D": "max", "E": "sum"}, 3),
({"D": ["count", "max"], "E": ["mean", "sum"]}, 7),
({"D": "min", "E": ["mean"]}, 3),
(["min", "max"], 11),
("mean", 3),
({"D": "max", "E": "sum"}, 2),
({"D": ["count", "max"], "E": ["mean", "sum"]}, 4),
({"D": "min", "E": ["mean"]}, 2),
(["min", "max"], 6),
],
)
def test_pivot_table_multiple_values_dropna_nonnull_data(
Expand All @@ -60,11 +60,11 @@ def test_pivot_table_multiple_values_dropna_nonnull_data(
@pytest.mark.parametrize(
"aggfunc, expected_join_count",
[
({"E": "count", "F": ["mean", "sum"]}, 5),
({"E": ["min", "max"], "F": ["mean", "sum"]}, 7),
(["min", "max"], 7),
({"E": "min", "F": "mean"}, 3),
({"E": "max", "F": "max"}, 3),
({"E": "count", "F": ["mean", "sum"]}, 3),
({"E": ["min", "max"], "F": ["mean", "sum"]}, 4),
(["min", "max"], 4),
({"E": "min", "F": "mean"}, 2),
({"E": "max", "F": "max"}, 2),
],
)
def test_pivot_table_multiple_pivot_values_dropna_null_data(
Expand Down Expand Up @@ -106,7 +106,7 @@ def test_pivot_table_single_all_aggfuncs_dropna_and_null_data(
df_data_with_nulls_2,
values,
):
expected_join_count = 19 if len(values) > 1 else 9
expected_join_count = 10 if len(values) > 1 else 5
with SqlCounter(query_count=1, join_count=expected_join_count):
pivot_table_test_helper(
df_data_with_nulls_2,
Expand Down
Loading

0 comments on commit 7751757

Please sign in to comment.