-
Notifications
You must be signed in to change notification settings - Fork 110
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SNOW-1747461 [x] I acknowledge that I have ensured my changes to be thread-safe This PR adds support for the `on` parameter with `Resampler` and does some small cleanup removing repeated calls to utility functions for Resample methods. --------- Signed-off-by: Naren Krishna <naren.krishna@snowflake.com>
- Loading branch information
1 parent
50a9dcf
commit 47cadd2
Showing
6 changed files
with
159 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
# | ||
# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. | ||
# | ||
|
||
import modin.pandas as pd | ||
import numpy as np | ||
import pandas as native_pd | ||
import pytest | ||
|
||
import snowflake.snowpark.modin.plugin # noqa: F401 | ||
from snowflake.snowpark.modin.plugin._internal.resample_utils import ( | ||
IMPLEMENTED_AGG_METHODS, | ||
IMPLEMENTED_DATEOFFSET_STRINGS, | ||
) | ||
from tests.integ.modin.utils import create_test_dfs, eval_snowpark_pandas_result | ||
from tests.integ.utils.sql_counter import sql_count_checker | ||
|
||
agg_func = pytest.mark.parametrize( | ||
"agg_func", list(filter(lambda x: x not in ["indices"], IMPLEMENTED_AGG_METHODS)) | ||
) | ||
freq = pytest.mark.parametrize("freq", IMPLEMENTED_DATEOFFSET_STRINGS) | ||
|
||
|
||
@freq | ||
@agg_func | ||
# One extra query to convert index to native pandas for dataframe constructor | ||
@sql_count_checker(query_count=3, join_count=1) | ||
def test_resample_on(freq, agg_func): | ||
rule = f"2{freq}" | ||
# Note that supplying 'on' to Resampler replaces the existing index of the DataFrame with the 'on' column | ||
eval_snowpark_pandas_result( | ||
*create_test_dfs( | ||
{ | ||
"A": np.random.randn(15), | ||
"B": native_pd.date_range("2020-01-01", periods=15, freq=f"1{freq}"), | ||
}, | ||
index=native_pd.date_range("2020-10-01", periods=15, freq=f"1{freq}"), | ||
), | ||
lambda df: getattr(df.resample(rule=rule, on="B", closed="left"), agg_func)(), | ||
check_freq=False, | ||
) | ||
|
||
|
||
# One extra query to convert index to native pandas for dataframe constructor | ||
@sql_count_checker(query_count=3, join_count=1) | ||
def test_resample_hashable_on(): | ||
eval_snowpark_pandas_result( | ||
*create_test_dfs( | ||
{ | ||
"A": np.random.randn(15), | ||
1: native_pd.date_range("2020-01-01", periods=15, freq="1s"), | ||
}, | ||
index=native_pd.date_range("2020-10-01", periods=15, freq="1s"), | ||
), | ||
lambda df: df.resample(rule="2s", on=1, closed="left").min(), | ||
check_freq=False, | ||
) | ||
|
||
|
||
@sql_count_checker(query_count=0) | ||
def test_resample_non_datetime_on(): | ||
native_df = native_pd.DataFrame( | ||
data={ | ||
"A": np.random.randn(15), | ||
"B": native_pd.date_range("2020-01-01", periods=15, freq="1s"), | ||
}, | ||
index=native_pd.date_range("2020-10-01", periods=15, freq="1s"), | ||
) | ||
snow_df = pd.DataFrame(native_df) | ||
with pytest.raises( | ||
TypeError, | ||
match="Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'Index'", | ||
): | ||
native_df.resample(rule="2s", on="A").min() | ||
with pytest.raises( | ||
TypeError, match="Only valid with DatetimeIndex or TimedeltaIndex" | ||
): | ||
snow_df.resample(rule="2s", on="A").min().to_pandas() | ||
|
||
|
||
@sql_count_checker(query_count=1) | ||
# One query to get the Modin frame data column pandas labels | ||
def test_resample_invalid_on(): | ||
eval_snowpark_pandas_result( | ||
*create_test_dfs( | ||
{ | ||
"A": np.random.randn(15), | ||
"B": native_pd.date_range("2020-01-01", periods=15, freq="1s"), | ||
}, | ||
index=native_pd.date_range("2020-10-01", periods=15, freq="1s"), | ||
), | ||
lambda df: df.resample(rule="2s", on="invalid", closed="left").min(), | ||
expect_exception=True, | ||
expect_exception_type=KeyError, | ||
expect_exception_match="invalid", | ||
) |