Skip to content

Commit

Permalink
SNOW-1707419: Improve error message for unknown timezone in tz_conver…
Browse files Browse the repository at this point in the history
…t and tz_localize (#2429)

<!---
Please answer these questions before creating your pull request. Thanks!
--->

1. Which Jira issue is this PR addressing? Make sure that there is an
accompanying issue to your PR.

   <!---
   In this section, please add a Snowflake Jira issue number.
   
Note that if a corresponding GitHub issue exists, you should still
include
   the Snowflake Jira issue number. For example, for GitHub issue
#1400, you should
   add "SNOW-1335071" here.
    --->

   Fixes SNOW-1707419

2. Fill out the following pre-review checklist:

- [x] I am adding a new automated test(s) to verify correctness of my
new code
- [ ] If this test skips Local Testing mode, I'm requesting review from
@snowflakedb/local-testing
   - [ ] I am adding new logging messages
   - [ ] I am adding a new telemetry message
   - [ ] I am adding new credentials
   - [ ] I am adding a new dependency
- [ ] If this is a new feature/behavior, I'm adding the Local Testing
parity changes.

3. Please describe how your code solves the related issue.

Improve error message for unknown timezone in tz_convert and
tz_localize.
  • Loading branch information
sfc-gh-helmeleegy authored Oct 15, 2024
1 parent 2d61a4b commit 74769aa
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 66 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
- Improved warning messages for operations that lead to materialization with inadvertent slowness.
- Removed unnecessary warning message about `convert_dtype` in `Series.apply`.
- Improved generated SQL query for `head` and `iloc` when the row key is a slice.
- Improved error message when passing an unknown timezone to `tz_convert` and `tz_localize` in `Series`, `DataFrame`, `Series.dt`, and `DatetimeIndex`.

#### Bug Fixes

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import pandas.core.resample
import pandas.io.parsers
import pandas.io.parsers.readers
import pytz # type: ignore
from modin.core.storage_formats import BaseQueryCompiler # type: ignore
from pandas import Timedelta
from pandas._libs import lib
Expand Down Expand Up @@ -17323,6 +17324,10 @@ def dt_tz_localize(
ErrorMessage.parameter_not_implemented_error("ambiguous", method_name)
if not isinstance(nonexistent, str) or nonexistent != "raise":
ErrorMessage.parameter_not_implemented_error("nonexistent", method_name)
if isinstance(tz, str) and tz not in pytz.all_timezones:
ErrorMessage.not_implemented(
f"Snowpark pandas method '{method_name}' doesn't support 'tz={tz}'"
)

return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
Expand All @@ -17346,6 +17351,15 @@ def dt_tz_convert(
Returns:
A new QueryCompiler containing values with converted time zone.
"""
if not include_index:
method_name = "Series.dt.tz_convert"
else:
method_name = "DatetimeIndex.tz_convert"
if isinstance(tz, str) and tz not in pytz.all_timezones:
ErrorMessage.not_implemented(
f"Snowpark pandas method '{method_name}' doesn't support 'tz={tz}'"
)

return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
lambda column: tz_convert_column(column, tz),
Expand Down Expand Up @@ -18897,6 +18911,10 @@ def tz_convert(
ErrorMessage.not_implemented(
"Snowpark pandas 'tz_convert' method doesn't support 'copy=False'"
)
if isinstance(tz, str) and tz not in pytz.all_timezones:
ErrorMessage.not_implemented(
f"Snowpark pandas 'tz_convert' method doesn't support 'tz={tz}'"
)

return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
Expand Down Expand Up @@ -18969,6 +18987,10 @@ def tz_localize(
ErrorMessage.not_implemented(
"Snowpark pandas 'tz_localize' method doesn't yet support the 'nonexistent' parameter"
)
if isinstance(tz, str) and tz not in pytz.all_timezones:
ErrorMessage.not_implemented(
f"Snowpark pandas 'tz_localize' method doesn't support 'tz={tz}'"
)

return SnowflakeQueryCompiler(
self._modin_frame.apply_snowpark_function_to_columns(
Expand Down
15 changes: 8 additions & 7 deletions tests/integ/modin/frame/test_tz_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,17 @@ def test_tz_convert(tz):


@pytest.mark.parametrize(
"axis, level, copy, exception",
"axis, level, copy, tz, exception",
[
(1, None, None, NotImplementedError),
("columns", None, None, NotImplementedError),
(0, 1, None, NotImplementedError),
(0, None, False, NotImplementedError),
(1, None, None, "UTC", NotImplementedError),
("columns", None, None, "UTC", NotImplementedError),
(0, 1, None, "UTC", NotImplementedError),
(0, None, False, "UTC", NotImplementedError),
(0, None, None, "UTC+09:00", NotImplementedError),
],
)
@sql_count_checker(query_count=0)
def test_tz_convert_negative(axis, level, copy, exception):
def test_tz_convert_negative(axis, level, copy, tz, exception):
datetime_index = native_pd.DatetimeIndex(
[
"2014-04-04 23:56:01.000000001",
Expand All @@ -101,7 +102,7 @@ def test_tz_convert_negative(axis, level, copy, exception):

with pytest.raises(exception):
snow_df.tz_convert(
tz="UTC",
tz=tz,
axis=axis,
level=level,
copy=copy,
Expand Down
39 changes: 24 additions & 15 deletions tests/integ/modin/frame/test_tz_localize.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,24 +76,33 @@ def test_tz_localize(tz):


@pytest.mark.parametrize(
"axis, level, copy, ambiguous, nonexistent, exception",
"tz, axis, level, copy, ambiguous, nonexistent, exception",
[
(1, None, None, "raise", "raise", NotImplementedError),
("columns", None, None, "raise", "raise", NotImplementedError),
(0, 1, None, "raise", "raise", NotImplementedError),
(0, None, False, "raise", "raise", NotImplementedError),
(0, None, True, "infer", "raise", NotImplementedError),
(0, None, True, "NaT", "raise", NotImplementedError),
(0, None, True, np.array([True, True, False]), "raise", NotImplementedError),
(0, None, True, "raise", "shift_forward", NotImplementedError),
(0, None, True, "raise", "shift_backward", NotImplementedError),
(0, None, True, "raise", "NaT", NotImplementedError),
(0, None, True, "raise", pd.Timedelta("1h"), NotImplementedError),
(0, None, True, "infer", "shift_forward", NotImplementedError),
("UTC", 1, None, None, "raise", "raise", NotImplementedError),
("UTC", "columns", None, None, "raise", "raise", NotImplementedError),
("UTC", 0, 1, None, "raise", "raise", NotImplementedError),
("UTC", 0, None, False, "raise", "raise", NotImplementedError),
("UTC", 0, None, True, "infer", "raise", NotImplementedError),
("UTC", 0, None, True, "NaT", "raise", NotImplementedError),
(
"UTC",
0,
None,
True,
np.array([True, True, False]),
"raise",
NotImplementedError,
),
("UTC", 0, None, True, "raise", "shift_forward", NotImplementedError),
("UTC", 0, None, True, "raise", "shift_backward", NotImplementedError),
("UTC", 0, None, True, "raise", "NaT", NotImplementedError),
("UTC", 0, None, True, "raise", pd.Timedelta("1h"), NotImplementedError),
("UTC", 0, None, True, "infer", "shift_forward", NotImplementedError),
("UTC+09:00", 0, None, None, "raise", "raise", NotImplementedError),
],
)
@sql_count_checker(query_count=0)
def test_tz_localize_negative(axis, level, copy, ambiguous, nonexistent, exception):
def test_tz_localize_negative(tz, axis, level, copy, ambiguous, nonexistent, exception):
datetime_index = native_pd.DatetimeIndex(
[
"2014-04-04 23:56:01.000000001",
Expand All @@ -108,7 +117,7 @@ def test_tz_localize_negative(axis, level, copy, ambiguous, nonexistent, excepti

with pytest.raises(exception):
snow_df.tz_localize(
tz="UTC",
tz=tz,
axis=axis,
level=level,
copy=copy,
Expand Down
36 changes: 25 additions & 11 deletions tests/integ/modin/index/test_datetime_index_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,19 @@ def test_tz_convert(tz):
)


@sql_count_checker(query_count=0)
def test_tz_convert_negative():
native_index = native_pd.date_range(
start="2021-01-01", periods=5, freq="7h", tz="US/Eastern"
)
native_index = native_index.append(
native_pd.DatetimeIndex([pd.NaT], tz="US/Eastern")
)
snow_index = pd.DatetimeIndex(native_index)
with pytest.raises(NotImplementedError):
snow_index.tz_convert(tz="UTC+09:00")


@sql_count_checker(query_count=1, join_count=1)
@timezones
def test_tz_localize(tz):
Expand All @@ -316,20 +329,21 @@ def test_tz_localize(tz):


@pytest.mark.parametrize(
"ambiguous, nonexistent",
"tz, ambiguous, nonexistent",
[
("infer", "raise"),
("NaT", "raise"),
(np.array([True, True, False]), "raise"),
("raise", "shift_forward"),
("raise", "shift_backward"),
("raise", "NaT"),
("raise", pd.Timedelta("1h")),
("infer", "shift_forward"),
(None, "infer", "raise"),
(None, "NaT", "raise"),
(None, np.array([True, True, False]), "raise"),
(None, "raise", "shift_forward"),
(None, "raise", "shift_backward"),
(None, "raise", "NaT"),
(None, "raise", pd.Timedelta("1h")),
(None, "infer", "shift_forward"),
("UTC+09:00", "raise", "raise"),
],
)
@sql_count_checker(query_count=0)
def test_tz_localize_negative(ambiguous, nonexistent):
def test_tz_localize_negative(tz, ambiguous, nonexistent):
native_index = native_pd.DatetimeIndex(
[
"2014-04-04 23:56:01.000000001",
Expand All @@ -341,7 +355,7 @@ def test_tz_localize_negative(ambiguous, nonexistent):
)
snow_index = pd.DatetimeIndex(native_index)
with pytest.raises(NotImplementedError):
snow_index.tz_localize(tz=None, ambiguous=ambiguous, nonexistent=nonexistent)
snow_index.tz_localize(tz=tz, ambiguous=ambiguous, nonexistent=nonexistent)


@pytest.mark.parametrize(
Expand Down
41 changes: 30 additions & 11 deletions tests/integ/modin/series/test_dt_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,24 @@ def test_tz_convert(tz):
)


@sql_count_checker(query_count=0)
def test_tz_convert_negative():
datetime_index = native_pd.DatetimeIndex(
[
"2014-04-04 23:56:01.000000001",
"2014-07-18 21:24:02.000000002",
"2015-11-22 22:14:03.000000003",
"2015-11-23 20:12:04.1234567890",
pd.NaT,
],
tz="US/Eastern",
)
native_ser = native_pd.Series(datetime_index)
snow_ser = pd.Series(native_ser)
with pytest.raises(NotImplementedError):
snow_ser.dt.tz_convert(tz="UTC+09:00")


@sql_count_checker(query_count=1)
@timezones
def test_tz_localize(tz):
Expand All @@ -271,20 +289,21 @@ def test_tz_localize(tz):


@pytest.mark.parametrize(
"ambiguous, nonexistent",
"tz, ambiguous, nonexistent",
[
("infer", "raise"),
("NaT", "raise"),
(np.array([True, True, False]), "raise"),
("raise", "shift_forward"),
("raise", "shift_backward"),
("raise", "NaT"),
("raise", pd.Timedelta("1h")),
("infer", "shift_forward"),
(None, "infer", "raise"),
(None, "NaT", "raise"),
(None, np.array([True, True, False]), "raise"),
(None, "raise", "shift_forward"),
(None, "raise", "shift_backward"),
(None, "raise", "NaT"),
(None, "raise", pd.Timedelta("1h")),
(None, "infer", "shift_forward"),
("UTC+09:00", "raise", "raise"),
],
)
@sql_count_checker(query_count=0)
def test_tz_localize_negative(ambiguous, nonexistent):
def test_tz_localize_negative(tz, ambiguous, nonexistent):
datetime_index = native_pd.DatetimeIndex(
[
"2014-04-04 23:56:01.000000001",
Expand All @@ -297,7 +316,7 @@ def test_tz_localize_negative(ambiguous, nonexistent):
native_ser = native_pd.Series(datetime_index)
snow_ser = pd.Series(native_ser)
with pytest.raises(NotImplementedError):
snow_ser.dt.tz_localize(tz=None, ambiguous=ambiguous, nonexistent=nonexistent)
snow_ser.dt.tz_localize(tz=tz, ambiguous=ambiguous, nonexistent=nonexistent)


@pytest.mark.parametrize("name", [None, "hello"])
Expand Down
15 changes: 8 additions & 7 deletions tests/integ/modin/series/test_tz_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,17 @@ def test_tz_convert(tz):


@pytest.mark.parametrize(
"axis, level, copy, exception",
"tz, axis, level, copy, exception",
[
(1, None, None, ValueError),
("columns", None, None, ValueError),
(0, 1, None, NotImplementedError),
(0, None, False, NotImplementedError),
("UTC", 1, None, None, ValueError),
("UTC", "columns", None, None, ValueError),
("UTC", 0, 1, None, NotImplementedError),
("UTC", 0, None, False, NotImplementedError),
("UTC+09:00", 0, None, None, NotImplementedError),
],
)
@sql_count_checker(query_count=0)
def test_tz_convert_negative(axis, level, copy, exception):
def test_tz_convert_negative(tz, axis, level, copy, exception):
datetime_index = native_pd.DatetimeIndex(
[
"2014-04-04 23:56:01.000000001",
Expand All @@ -97,7 +98,7 @@ def test_tz_convert_negative(axis, level, copy, exception):

with pytest.raises(exception):
snow_ser.tz_convert(
tz="UTC",
tz=tz,
axis=axis,
level=level,
copy=copy,
Expand Down
39 changes: 24 additions & 15 deletions tests/integ/modin/series/test_tz_localize.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,24 +74,33 @@ def test_tz_localize(tz):


@pytest.mark.parametrize(
"axis, level, copy, ambiguous, nonexistent, exception",
"tz, axis, level, copy, ambiguous, nonexistent, exception",
[
(1, None, True, "raise", "raise", ValueError),
("columns", None, True, "raise", "raise", ValueError),
(0, 1, None, "raise", "raise", NotImplementedError),
(0, None, False, "raise", "raise", NotImplementedError),
(0, None, True, "infer", "raise", NotImplementedError),
(0, None, True, "NaT", "raise", NotImplementedError),
(0, None, True, np.array([True, True, False]), "raise", NotImplementedError),
(0, None, True, "raise", "shift_forward", NotImplementedError),
(0, None, True, "raise", "shift_backward", NotImplementedError),
(0, None, True, "raise", "NaT", NotImplementedError),
(0, None, True, "raise", pd.Timedelta("1h"), NotImplementedError),
(0, None, True, "infer", "shift_forward", NotImplementedError),
("UTC", 1, None, True, "raise", "raise", ValueError),
("UTC", "columns", None, True, "raise", "raise", ValueError),
("UTC", 0, 1, None, "raise", "raise", NotImplementedError),
("UTC", 0, None, False, "raise", "raise", NotImplementedError),
("UTC", 0, None, True, "infer", "raise", NotImplementedError),
("UTC", 0, None, True, "NaT", "raise", NotImplementedError),
(
"UTC",
0,
None,
True,
np.array([True, True, False]),
"raise",
NotImplementedError,
),
("UTC", 0, None, True, "raise", "shift_forward", NotImplementedError),
("UTC", 0, None, True, "raise", "shift_backward", NotImplementedError),
("UTC", 0, None, True, "raise", "NaT", NotImplementedError),
("UTC", 0, None, True, "raise", pd.Timedelta("1h"), NotImplementedError),
("UTC", 0, None, True, "infer", "shift_forward", NotImplementedError),
("UTC+09:00", 0, None, True, "raise", "raise", NotImplementedError),
],
)
@sql_count_checker(query_count=0)
def test_tz_localize_negative(axis, level, copy, ambiguous, nonexistent, exception):
def test_tz_localize_negative(tz, axis, level, copy, ambiguous, nonexistent, exception):
datetime_index = native_pd.DatetimeIndex(
[
"2014-04-04 23:56:01.000000001",
Expand All @@ -104,7 +113,7 @@ def test_tz_localize_negative(axis, level, copy, ambiguous, nonexistent, excepti

with pytest.raises(exception):
snow_ser.tz_localize(
tz="UTC",
tz=tz,
axis=axis,
level=level,
copy=copy,
Expand Down

0 comments on commit 74769aa

Please sign in to comment.