diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py index a09bdc65e6..2e9f4939c6 100644 --- a/src/snowflake/snowpark/modin/pandas/series.py +++ b/src/snowflake/snowpark/modin/pandas/series.py @@ -2335,7 +2335,13 @@ def dt(self): # noqa: RT01, D200 if not is_datetime64_any_dtype(current_dtype): raise AttributeError("Can only use .dt accessor with datetimelike values") - from .series_utils import DatetimeProperties + from modin.pandas.series_utils import DatetimeProperties + + if DatetimeProperties._Series is not Series: + del ( + DatetimeProperties._Series + ) # Replace modin's Series class with Snowpark pandas Series + DatetimeProperties._Series = Series return DatetimeProperties(self) @@ -2468,7 +2474,13 @@ def str(self): # noqa: RT01, D200 if not is_string_dtype(current_dtype): raise AttributeError("Can only use .str accessor with string values!") - from .series_utils import StringMethods + from modin.pandas.series_utils import StringMethods + + if StringMethods._Series is not Series: + del ( + StringMethods._Series + ) # Replace modin's Series class with Snowpark pandas Series + StringMethods._Series = Series return StringMethods(self) diff --git a/src/snowflake/snowpark/modin/pandas/series_utils.py b/src/snowflake/snowpark/modin/pandas/series_utils.py index b9836025c2..e889ca0355 100644 --- a/src/snowflake/snowpark/modin/pandas/series_utils.py +++ b/src/snowflake/snowpark/modin/pandas/series_utils.py @@ -20,35 +20,12 @@ # Version 2.0. """ -Implement Series's accessors public API as pandas does. - -Accessors: `Series.cat`, `Series.str`, `Series.dt` +Overrides the `Series.cat` accessor. `Series.str` and `Series.dt` are inherited from modin. """ -import re -import sys -from typing import TYPE_CHECKING, Callable, Optional, Union - -import numpy as np -import pandas as native_pd - -from snowflake.snowpark.modin.pandas import DataFrame, Series - -if sys.version_info[0] == 3 and sys.version_info[1] >= 7: - # Python >= 3.7 - from re import Pattern as _pattern_type -else: - # Python <= 3.6 - from re import _pattern_type - -if TYPE_CHECKING: - from datetime import tzinfo - - from pandas._typing import npt # add this line to enable doc tests to run from snowflake.snowpark.modin import pandas as pd # noqa: F401 from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage -from snowflake.snowpark.modin.utils import _inherit_docstrings class CategoryMethods: @@ -101,572 +78,3 @@ def as_ordered(self, inplace=False): def as_unordered(self, inplace=False): ErrorMessage.not_implemented(self.category_not_supported_message) - - -@_inherit_docstrings(native_pd.core.strings.accessor.StringMethods) -class StringMethods: - def __init__(self, series) -> None: - # Check if dtypes is objects - - self._series = series - self._query_compiler = series._query_compiler - - def casefold(self): - ErrorMessage.method_not_implemented_error("casefold", "Series.str") - return Series(query_compiler=self._query_compiler.str_casefold()) - - def cat(self, others=None, sep=None, na_rep=None, join=None): - ErrorMessage.method_not_implemented_error("cat", "Series.str") - compiler_result = self._query_compiler.str_cat( - others=others, sep=sep, na_rep=na_rep, join=join - ) - # if others is None, result is a string. otherwise, it's a series. - return ( - compiler_result.to_pandas().squeeze() - if others is None - else Series(query_compiler=compiler_result) - ) - - def decode(self, encoding, errors="strict"): - ErrorMessage.method_not_implemented_error("decode", "Series.str") - return Series( - query_compiler=self._query_compiler.str_decode(encoding, errors=errors) - ) - - def split( - self, - pat: Optional[str] = None, - n: int = -1, - expand: bool = False, - regex: Optional[bool] = None, - ) -> Series: - if not pat and pat is not None: - raise ValueError("split() requires a non-empty pattern match.") - - else: - return Series( - query_compiler=self._query_compiler.str_split( - pat=pat, n=n, expand=expand, regex=regex - ) - ) - - def rsplit(self, pat=None, n=-1, expand=False): - ErrorMessage.method_not_implemented_error("rsplit", "Series.str") - - if not pat and pat is not None: - raise ValueError("rsplit() requires a non-empty pattern match.") - - else: - return Series( - query_compiler=self._query_compiler.str_rsplit( - pat=pat, n=n, expand=expand - ) - ) - - def get(self, i): - ErrorMessage.method_not_implemented_error("get", "Series.str") - return Series(query_compiler=self._query_compiler.str_get(i)) - - def join(self, sep): - ErrorMessage.method_not_implemented_error("join", "Series.str") - if sep is None: - raise AttributeError("'NoneType' object has no attribute 'join'") - return Series(query_compiler=self._query_compiler.str_join(sep)) - - def get_dummies(self, sep="|"): - ErrorMessage.method_not_implemented_error("get_dummies", "Series.str") - return DataFrame(query_compiler=self._query_compiler.str_get_dummies(sep)) - - def contains( - self, - pat: str, - case: bool = True, - flags: int = 0, - na: object = None, - regex: bool = True, - ): - return Series( - query_compiler=self._query_compiler.str_contains( - pat, case=case, flags=flags, na=na, regex=regex - ) - ) - - def replace( - self, - pat: str, - repl: Union[str, Callable], - n: int = -1, - case: Optional[bool] = None, - flags: int = 0, - regex: bool = True, - ) -> Series: - if not (isinstance(repl, str) or callable(repl)): - raise TypeError("repl must be a string or callable") - return Series( - query_compiler=self._query_compiler.str_replace( - pat, repl, n=n, case=case, flags=flags, regex=regex - ) - ) - - def pad(self, width, side="left", fillchar=" "): - ErrorMessage.method_not_implemented_error("pad", "Series.str") - if len(fillchar) != 1: - raise TypeError("fillchar must be a character, not str") - return Series( - query_compiler=self._query_compiler.str_pad( - width, side=side, fillchar=fillchar - ) - ) - - def center(self, width, fillchar=" "): - ErrorMessage.method_not_implemented_error("center", "Series.str") - if len(fillchar) != 1: - raise TypeError("fillchar must be a character, not str") - return Series( - query_compiler=self._query_compiler.str_center(width, fillchar=fillchar) - ) - - def ljust(self, width, fillchar=" "): - ErrorMessage.method_not_implemented_error("ljust", "Series.str") - if len(fillchar) != 1: - raise TypeError("fillchar must be a character, not str") - return Series( - query_compiler=self._query_compiler.str_ljust(width, fillchar=fillchar) - ) - - def rjust(self, width, fillchar=" "): - ErrorMessage.method_not_implemented_error("rjust", "Series.str") - if len(fillchar) != 1: - raise TypeError("fillchar must be a character, not str") - return Series( - query_compiler=self._query_compiler.str_rjust(width, fillchar=fillchar) - ) - - def zfill(self, width): - ErrorMessage.method_not_implemented_error("zfill", "Series.str") - return Series(query_compiler=self._query_compiler.str_zfill(width)) - - def wrap(self, width, **kwargs): - ErrorMessage.method_not_implemented_error("wrap", "Series.str") - if width <= 0: - raise ValueError(f"invalid width {width} (must be > 0)") - return Series(query_compiler=self._query_compiler.str_wrap(width, **kwargs)) - - def slice( - self, - start: Optional[int] = None, - stop: Optional[int] = None, - step: Optional[int] = None, - ): - if step == 0: - raise ValueError("slice step cannot be zero") - return Series( - query_compiler=self._query_compiler.str_slice( - start=start, stop=stop, step=step - ) - ) - - def slice_replace(self, start=None, stop=None, repl=None): - ErrorMessage.method_not_implemented_error("slice_replace", "Series.str") - return Series( - query_compiler=self._query_compiler.str_slice_replace( - start=start, stop=stop, repl=repl - ) - ) - - def count(self, pat: str, flags: int = 0, **kwargs): - if not isinstance(pat, (str, _pattern_type)): - raise TypeError("first argument must be string or compiled pattern") - return Series( - query_compiler=self._query_compiler.str_count(pat, flags=flags, **kwargs) - ) - - def startswith(self, pat, na=np.NaN): - return Series(query_compiler=self._query_compiler.str_startswith(pat, na=na)) - - def encode(self, encoding, errors="strict"): - ErrorMessage.method_not_implemented_error("encode", "Series.str") - return Series( - query_compiler=self._query_compiler.str_encode(encoding, errors=errors) - ) - - def endswith(self, pat, na=np.NaN): - return Series(query_compiler=self._query_compiler.str_endswith(pat, na=na)) - - def findall(self, pat, flags=0, **kwargs): - ErrorMessage.method_not_implemented_error("findall", "Series.str") - if not isinstance(pat, (str, _pattern_type)): - raise TypeError("first argument must be string or compiled pattern") - return Series( - query_compiler=self._query_compiler.str_findall(pat, flags=flags, **kwargs) - ) - - def fullmatch(self, pat, case=True, flags=0, na=None): - ErrorMessage.method_not_implemented_error("fullmatch", "Series.str") - if not isinstance(pat, (str, re.Pattern)): - raise TypeError("first argument must be string or compiled pattern") - return self._Series( - query_compiler=self._query_compiler.str_fullmatch( - pat, case=case, flags=flags, na=na - ) - ) - - def match(self, pat, case=True, flags=0, na=np.NaN): - ErrorMessage.method_not_implemented_error("match", "Series.str") - if not isinstance(pat, (str, _pattern_type)): - raise TypeError("first argument must be string or compiled pattern") - return Series( - query_compiler=self._query_compiler.str_match(pat, flags=flags, na=na) - ) - - def extract(self, pat, flags=0, expand=True): - ErrorMessage.method_not_implemented_error("extract", "Series.str") - query_compiler = self._query_compiler.str_extract( - pat, flags=flags, expand=expand - ) - return ( - DataFrame(query_compiler=query_compiler) - if expand or re.compile(pat).groups > 1 - else Series(query_compiler=query_compiler) - ) - - def extractall(self, pat, flags=0): - ErrorMessage.method_not_implemented_error("extractall", "Series.str") - return Series(query_compiler=self._query_compiler.str_extractall(pat, flags)) - - def len(self): - return Series(query_compiler=self._query_compiler.str_len()) - - def strip(self, to_strip: str = None) -> Series: - return Series(query_compiler=self._query_compiler.str_strip(to_strip=to_strip)) - - def rstrip(self, to_strip=None): - ErrorMessage.method_not_implemented_error("rstrip", "Series.str") - return Series(query_compiler=self._query_compiler.str_rstrip(to_strip=to_strip)) - - def lstrip(self, to_strip=None): - ErrorMessage.method_not_implemented_error("lstrip", "Series.str") - return Series(query_compiler=self._query_compiler.str_lstrip(to_strip=to_strip)) - - def partition(self, sep=" ", expand=True): - ErrorMessage.method_not_implemented_error("partition", "Series.str") - if sep is not None and len(sep) == 0: - raise ValueError("empty separator") - - return (DataFrame if expand else Series)( - query_compiler=self._query_compiler.str_partition(sep=sep, expand=expand) - ) - - def removeprefix(self, prefix): - ErrorMessage.method_not_implemented_error("removeprefix", "Series.str") - return Series(query_compiler=self._query_compiler.str_removeprefix(prefix)) - - def removesuffix(self, suffix): - ErrorMessage.method_not_implemented_error("removesuffix", "Series.str") - return Series(query_compiler=self._query_compiler.str_removesuffix(suffix)) - - def repeat(self, repeats): - ErrorMessage.method_not_implemented_error("repeat", "Series.str") - return Series(query_compiler=self._query_compiler.str_repeat(repeats)) - - def rpartition(self, sep=" ", expand=True): - ErrorMessage.method_not_implemented_error("rpartition", "Series.str") - if sep is not None and len(sep) == 0: - raise ValueError("empty separator") - - else: - return Series( - query_compiler=self._query_compiler.str_rpartition( - sep=sep, expand=expand - ) - ) - - def lower(self): - return Series(query_compiler=self._query_compiler.str_lower()) - - def upper(self): - return Series(query_compiler=self._query_compiler.str_upper()) - - def title(self): - return Series(query_compiler=self._query_compiler.str_title()) - - def find(self, sub, start=0, end=None): - ErrorMessage.method_not_implemented_error("find", "Series.str") - if not isinstance(sub, str): - raise TypeError(f"expected a string object, not {type(sub).__name__}") - return Series( - query_compiler=self._query_compiler.str_find(sub, start=start, end=end) - ) - - def rfind(self, sub, start=0, end=None): - ErrorMessage.method_not_implemented_error("rfind", "Series.str") - if not isinstance(sub, str): - raise TypeError(f"expected a string object, not {type(sub).__name__}") - return Series( - query_compiler=self._query_compiler.str_rfind(sub, start=start, end=end) - ) - - def index(self, sub, start=0, end=None): - ErrorMessage.method_not_implemented_error("index", "Series.str") - if not isinstance(sub, str): - raise TypeError(f"expected a string object, not {type(sub).__name__}") - return Series( - query_compiler=self._query_compiler.str_index(sub, start=start, end=end) - ) - - def rindex(self, sub, start=0, end=None): - ErrorMessage.method_not_implemented_error("rindex", "Series.str") - if not isinstance(sub, str): - raise TypeError(f"expected a string object, not {type(sub).__name__}") - return Series( - query_compiler=self._query_compiler.str_rindex(sub, start=start, end=end) - ) - - def capitalize(self): - return Series(query_compiler=self._query_compiler.str_capitalize()) - - def swapcase(self): - ErrorMessage.method_not_implemented_error("swapcase", "Series.str") - return Series(query_compiler=self._query_compiler.str_swapcase()) - - def normalize(self, form): - ErrorMessage.method_not_implemented_error("normalize", "Series.str") - return Series(query_compiler=self._query_compiler.str_normalize(form)) - - def translate(self, table): - ErrorMessage.method_not_implemented_error("translate", "Series.str") - return Series(query_compiler=self._query_compiler.str_translate(table)) - - def isalnum(self): - ErrorMessage.method_not_implemented_error("isalnum", "Series.str") - return Series(query_compiler=self._query_compiler.str_isalnum()) - - def isalpha(self): - ErrorMessage.method_not_implemented_error("isalpha", "Series.str") - return Series(query_compiler=self._query_compiler.str_isalpha()) - - def isdigit(self): - return Series(query_compiler=self._query_compiler.str_isdigit()) - - def isspace(self): - ErrorMessage.method_not_implemented_error("isspace", "Series.str") - return Series(query_compiler=self._query_compiler.str_isspace()) - - def islower(self): - return Series(query_compiler=self._query_compiler.str_islower()) - - def isupper(self): - return Series(query_compiler=self._query_compiler.str_isupper()) - - def istitle(self): - return Series(query_compiler=self._query_compiler.str_istitle()) - - def isnumeric(self): - ErrorMessage.method_not_implemented_error("isnumeric", "Series.str") - return Series(query_compiler=self._query_compiler.str_isnumeric()) - - def isdecimal(self): - ErrorMessage.method_not_implemented_error("isdecimal", "Series.str") - return Series(query_compiler=self._query_compiler.str_isdecimal()) - - -@_inherit_docstrings(native_pd.core.indexes.accessors.CombinedDatetimelikeProperties) -class DatetimeProperties: - def __init__(self, series) -> None: - self._series = series - self._query_compiler = series._query_compiler - - @property - def date(self): - return Series(query_compiler=self._query_compiler.dt_property("date")) - - @property - def time(self): - return Series(query_compiler=self._query_compiler.dt_property("time")) - - @property - def timetz(self): - return Series(query_compiler=self._query_compiler.dt_property("timetz")) - - @property - def year(self): - return Series(query_compiler=self._query_compiler.dt_property("year")) - - @property - def month(self): - return Series(query_compiler=self._query_compiler.dt_property("month")) - - @property - def day(self): - return Series(query_compiler=self._query_compiler.dt_property("day")) - - @property - def hour(self): - return Series(query_compiler=self._query_compiler.dt_property("hour")) - - @property - def minute(self): - return Series(query_compiler=self._query_compiler.dt_property("minute")) - - @property - def second(self): - return Series(query_compiler=self._query_compiler.dt_property("second")) - - @property - def microsecond(self): - return Series(query_compiler=self._query_compiler.dt_property("microsecond")) - - @property - def nanosecond(self): - return Series(query_compiler=self._query_compiler.dt_property("nanosecond")) - - @property - def dayofweek(self): - return Series(query_compiler=self._query_compiler.dt_property("dayofweek")) - - @property - def weekday(self): - return Series(query_compiler=self._query_compiler.dt_property("weekday")) - - @property - def dayofyear(self): - return Series(query_compiler=self._query_compiler.dt_property("dayofyear")) - - @property - def quarter(self): - return Series(query_compiler=self._query_compiler.dt_property("quarter")) - - @property - def is_month_start(self): - return Series(query_compiler=self._query_compiler.dt_property("is_month_start")) - - @property - def is_month_end(self): - return Series(query_compiler=self._query_compiler.dt_property("is_month_end")) - - @property - def is_quarter_start(self): - return Series( - query_compiler=self._query_compiler.dt_property("is_quarter_start") - ) - - @property - def is_quarter_end(self): - return Series(query_compiler=self._query_compiler.dt_property("is_quarter_end")) - - @property - def is_year_start(self): - return Series(query_compiler=self._query_compiler.dt_property("is_year_start")) - - @property - def is_year_end(self): - return Series(query_compiler=self._query_compiler.dt_property("is_year_end")) - - @property - def is_leap_year(self): - return Series(query_compiler=self._query_compiler.dt_property("is_leap_year")) - - @property - def daysinmonth(self): - return Series(query_compiler=self._query_compiler.dt_property("daysinmonth")) - - @property - def days_in_month(self): - return Series(query_compiler=self._query_compiler.dt_property("days_in_month")) - - @property - def tz(self) -> "tzinfo | None": - dtype = self._series.dtype - if isinstance(dtype, np.dtype): - return None - return dtype.tz - - @property - def freq(self): - return self._query_compiler.dt_property("freq").to_pandas().squeeze() - - def to_period(self, *args, **kwargs): - return Series(query_compiler=self._query_compiler.dt_to_period(*args, **kwargs)) - - def to_pydatetime(self): - return Series(query_compiler=self._query_compiler.dt_to_pydatetime()).to_numpy() - - def tz_localize(self, *args, **kwargs): - return Series( - query_compiler=self._query_compiler.dt_tz_localize(*args, **kwargs) - ) - - def tz_convert(self, *args, **kwargs): - return Series( - query_compiler=self._query_compiler.dt_tz_convert(*args, **kwargs) - ) - - def normalize(self, *args, **kwargs): - return Series(query_compiler=self._query_compiler.dt_normalize(*args, **kwargs)) - - def strftime(self, *args, **kwargs): - return Series(query_compiler=self._query_compiler.dt_strftime(*args, **kwargs)) - - def round(self, *args, **kwargs): - return Series(query_compiler=self._query_compiler.dt_round(*args, **kwargs)) - - def floor(self, *args, **kwargs): - return Series(query_compiler=self._query_compiler.dt_floor(*args, **kwargs)) - - def ceil(self, *args, **kwargs): - return Series(query_compiler=self._query_compiler.dt_ceil(*args, **kwargs)) - - def month_name(self, *args, **kwargs): - return Series( - query_compiler=self._query_compiler.dt_month_name(*args, **kwargs) - ) - - def day_name(self, *args, **kwargs): - return Series(query_compiler=self._query_compiler.dt_day_name(*args, **kwargs)) - - def total_seconds(self, *args, **kwargs): - return Series( - query_compiler=self._query_compiler.dt_total_seconds(*args, **kwargs) - ) - - def to_pytimedelta(self) -> "npt.NDArray[np.object_]": - res = self._query_compiler.dt_to_pytimedelta() - return res.to_numpy()[:, 0] - - @property - def seconds(self): - return Series(query_compiler=self._query_compiler.dt_property("seconds")) - - @property - def days(self): - return Series(query_compiler=self._query_compiler.dt_property("days")) - - @property - def microseconds(self): - return Series(query_compiler=self._query_compiler.dt_property("microseconds")) - - @property - def nanoseconds(self): - return Series(query_compiler=self._query_compiler.dt_property("nanoseconds")) - - @property - def components(self): - - return DataFrame(query_compiler=self._query_compiler.dt_property("components")) - - @property - def qyear(self): - return Series(query_compiler=self._query_compiler.dt_property("qyear")) - - @property - def start_time(self): - return Series(query_compiler=self._query_compiler.dt_property("start_time")) - - @property - def end_time(self): - return Series(query_compiler=self._query_compiler.dt_property("end_time")) - - def to_timestamp(self, *args, **kwargs): - return Series( - query_compiler=self._query_compiler.dt_to_timestamp(*args, **kwargs) - ) diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index ed48017e0d..a2d421def9 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -8482,6 +8482,105 @@ def columnarize(self) -> "SnowflakeQueryCompiler": return self + def dt_date(self) -> "SnowflakeQueryCompiler": + return self.dt_property("date") + + def dt_time(self) -> "SnowflakeQueryCompiler": + return self.dt_property("time") + + def dt_timetz(self) -> "SnowflakeQueryCompiler": + return self.dt_property("timetz") + + def dt_year(self) -> "SnowflakeQueryCompiler": + return self.dt_property("year") + + def dt_month(self) -> "SnowflakeQueryCompiler": + return self.dt_property("month") + + def dt_day(self) -> "SnowflakeQueryCompiler": + return self.dt_property("day") + + def dt_hour(self) -> "SnowflakeQueryCompiler": + return self.dt_property("hour") + + def dt_minute(self) -> "SnowflakeQueryCompiler": + return self.dt_property("minute") + + def dt_second(self) -> "SnowflakeQueryCompiler": + return self.dt_property("second") + + def dt_microsecond(self) -> "SnowflakeQueryCompiler": + return self.dt_property("microsecond") + + def dt_nanosecond(self) -> "SnowflakeQueryCompiler": + return self.dt_property("nanosecond") + + def dt_dayofweek(self) -> "SnowflakeQueryCompiler": + return self.dt_property("dayofweek") + + def dt_weekday(self) -> "SnowflakeQueryCompiler": + return self.dt_property("weekday") + + def dt_dayofyear(self) -> "SnowflakeQueryCompiler": + return self.dt_property("dayofyear") + + def dt_quarter(self) -> "SnowflakeQueryCompiler": + return self.dt_property("quarter") + + def dt_is_month_start(self) -> "SnowflakeQueryCompiler": + return self.dt_property("is_month_start") + + def dt_is_month_end(self) -> "SnowflakeQueryCompiler": + return self.dt_property("is_month_end") + + def dt_is_quarter_start(self) -> "SnowflakeQueryCompiler": + return self.dt_property("is_quarter_start") + + def dt_is_quarter_end(self) -> "SnowflakeQueryCompiler": + return self.dt_property("is_quarter_end") + + def dt_is_year_start(self) -> "SnowflakeQueryCompiler": + return self.dt_property("is_year_start") + + def dt_is_year_end(self) -> "SnowflakeQueryCompiler": + return self.dt_property("is_year_end") + + def dt_is_leap_year(self) -> "SnowflakeQueryCompiler": + return self.dt_property("is_leap_year") + + def dt_daysinmonth(self) -> "SnowflakeQueryCompiler": + return self.dt_property("daysinmonth") + + def dt_days_in_month(self) -> "SnowflakeQueryCompiler": + return self.dt_property("days_in_month") + + def dt_freq(self) -> "SnowflakeQueryCompiler": + return self.dt_property("freq") + + def dt_seconds(self) -> "SnowflakeQueryCompiler": + return self.dt_property("seconds") + + def dt_days(self) -> "SnowflakeQueryCompiler": + return self.dt_property("days") + + def dt_microseconds(self) -> "SnowflakeQueryCompiler": + return self.dt_property("microseconds") + + def dt_nanoseconds(self) -> "SnowflakeQueryCompiler": + return self.dt_property("nanoseconds") + + def dt_components(self) -> "SnowflakeQueryCompiler": + return self.dt_property("components") + + def dt_qyear(self) -> "SnowflakeQueryCompiler": + return self.dt_property("qyear") + + def dt_start_time(self) -> "SnowflakeQueryCompiler": + return self.dt_property("start_time") + + def dt_end_time(self) -> "SnowflakeQueryCompiler": + return self.dt_property("end_time") + def dt_property(self, property_name: str) -> "SnowflakeQueryCompiler": """ Extracts the specified date or time part from the timestamp. @@ -11581,6 +11680,21 @@ def output_col( return SnowflakeQueryCompiler(new_internal_frame) + def str_cat( + self, + others: ListLike, + sep: Optional[str] = None, + na_rep: Optional[str] = None, + join: Literal["left", "right", "outer", "inner"] = "left", + ) -> None: + ErrorMessage.method_not_implemented_error("cat", "Series.str") + + def str_decode(self, encoding: str, errors: str) -> None: + ErrorMessage.method_not_implemented_error("decode", "Series.str") + + def str_encode(self, encoding: str, errors: str) -> None: + ErrorMessage.method_not_implemented_error("encode", "Series.str") + def str_startswith( self, pat: Union[str, tuple], na: object = None ) -> "SnowflakeQueryCompiler": @@ -11619,6 +11733,37 @@ def str_endswith( """ return self._str_startswith_endswith(pat, na, is_startswith=False) + def str_find(self, sub: str, start: int = 0, end: Optional[int] = None) -> None: + ErrorMessage.method_not_implemented_error("find", "Series.str") + + def str_rfind(self, sub: str, start: int = 0, end: Optional[int] = None) -> None: + ErrorMessage.method_not_implemented_error("rfind", "Series.str") + + def str_findall(self, pat: str, flags: int = 0) -> None: + ErrorMessage.method_not_implemented_error("findall", "Series.str") + + def str_index(self, sub: str, start: int = 0, end: Optional[int] = None) -> None: + ErrorMessage.method_not_implemented_error("index", "Series.str") + + def str_rindex(self, sub: str, start: int = 0, end: Optional[int] = None) -> None: + ErrorMessage.method_not_implemented_error("rindex", "Series.str") + + def str_fullmatch( + self, pat: str, case: bool = True, flags: int = 0, na: object = None + ) -> None: + ErrorMessage.method_not_implemented_error("fullmatch", "Series.str") + + def str_match( + self, pat: str, case: bool = True, flags: int = 0, na: object = None + ) -> None: + ErrorMessage.method_not_implemented_error("match", "Series.str") + + def str_extract(self, pat: str, flags: int = 0, expand: bool = True) -> None: + ErrorMessage.method_not_implemented_error("extract", "Series.str") + + def str_extractall(self, pat: str, flags: int = 0, expand: bool = True) -> None: + ErrorMessage.method_not_implemented_error("extractall", "Series.str") + def str_capitalize(self) -> "SnowflakeQueryCompiler": """ Capitalize the string @@ -11637,6 +11782,12 @@ def str_capitalize(self) -> "SnowflakeQueryCompiler": ) return SnowflakeQueryCompiler(new_internal_frame) + def str_isalnum(self) -> None: + ErrorMessage.method_not_implemented_error("isalnum", "Series.str") + + def str_isalpha(self) -> None: + ErrorMessage.method_not_implemented_error("isalpha", "Series.str") + def str_isdigit(self) -> "SnowflakeQueryCompiler": """ Check whether all characters in each string are digits. @@ -11652,6 +11803,9 @@ def str_isdigit(self) -> "SnowflakeQueryCompiler": ) return SnowflakeQueryCompiler(new_internal_frame) + def str_isspace(self) -> None: + ErrorMessage.method_not_implemented_error("isspace", "Series.str") + def str_islower(self) -> "SnowflakeQueryCompiler": """ Check whether all characters in each string are lowercase. @@ -11712,6 +11866,12 @@ def str_istitle(self) -> "SnowflakeQueryCompiler": ) return SnowflakeQueryCompiler(new_internal_frame) + def str_isnumeric(self) -> None: + ErrorMessage.method_not_implemented_error("isnumeric", "Series.str") + + def str_isdecimal(self) -> None: + ErrorMessage.method_not_implemented_error("isdecimal", "Series.str") + def str_lower(self) -> "SnowflakeQueryCompiler": """ Convert strings to lowercase. @@ -11779,6 +11939,9 @@ def _get_regex_params(self, flags: int = 0) -> str: params = params + "s" return params + def str_center(self, width: int, fillchar: str = " ") -> None: + ErrorMessage.method_not_implemented_error("center", "Series.str") + def str_contains( self, pat: str, @@ -11876,6 +12039,29 @@ def output_col(col_name: ColumnOrName) -> SnowparkColumn: ) return SnowflakeQueryCompiler(new_internal_frame) + def str_get(self, i: int) -> None: + ErrorMessage.method_not_implemented_error("get", "Series.str") + + def str_get_dummies(self, sep: str) -> None: + ErrorMessage.method_not_implemented_error("get_dummies", "Series.str") + + def str_join(self, sep: str) -> None: + ErrorMessage.method_not_implemented_error("join", "Series.str") + + def str_pad( + self, + width: int, + side: Literal["left", "right", "both"] = "left", + fillchar: str = " ", + ) -> None: + ErrorMessage.method_not_implemented_error("pad", "Series.str") + + def str_partition(self, sep: str = " ", expand: bool = True) -> None: + ErrorMessage.method_not_implemented_error("partition", "Series.str") + + def str_rpartition(self, sep: str = " ", expand: bool = True) -> None: + ErrorMessage.method_not_implemented_error("rpartition", "Series.str") + def str_len(self, **kwargs: Any) -> "SnowflakeQueryCompiler": """ Compute the length of each element in the Series/Index @@ -11896,6 +12082,15 @@ def str_len(self, **kwargs: Any) -> "SnowflakeQueryCompiler": ) ) + def str_ljust(self, width: int, fillchar: str = " ") -> None: + ErrorMessage.method_not_implemented_error("ljust", "Series.str") + + def str_rjust(self, width: int, fillchar: str = " ") -> None: + ErrorMessage.method_not_implemented_error("rjust", "Series.str") + + def str_normalize(self, form: Literal["NFC", "NFKC", "NFD", "NFKD"]) -> None: + ErrorMessage.method_not_implemented_error("normalize", "Series.str") + def str_slice( self, start: Optional[int] = None, @@ -12031,6 +12226,14 @@ def output_col( ) return SnowflakeQueryCompiler(new_internal_frame) + def str_slice_replace( + self, + start: Optional[int] = None, + stop: Optional[int] = None, + repl: Optional[Union[str, Callable]] = None, + ) -> None: + ErrorMessage.method_not_implemented_error("slice_replace", "Series.str") + def str_split( self, pat: Optional[str] = None, @@ -12076,6 +12279,8 @@ def str_split( ) if pandas.isnull(regex): regex = False + if not pat and pat is not None: + raise ValueError("split() requires a non-empty pattern match.") if n is None: n = -1 @@ -12165,6 +12370,11 @@ def output_col( ) return SnowflakeQueryCompiler(new_internal_frame) + def str_rsplit( + self, pat: Optional[str] = None, *, n: int = -1, expand: bool = False + ) -> None: + ErrorMessage.method_not_implemented_error("rsplit", "Series.str") + def str_replace( self, pat: str, @@ -12290,6 +12500,15 @@ def output_col( ) return SnowflakeQueryCompiler(new_internal_frame) + def str_repeat(self, repeats: int) -> None: + ErrorMessage.method_not_implemented_error("repeat", "Series.str") + + def str_removeprefix(self, prefix: str) -> None: + ErrorMessage.method_not_implemented_error("removeprefix", "Series.str") + + def str_removesuffix(self, prefix: str) -> None: + ErrorMessage.method_not_implemented_error("removesuffix", "Series.str") + def str_strip(self, to_strip: Union[str, None] = None) -> "SnowflakeQueryCompiler": """ Remove leading and trailing characters. @@ -12321,6 +12540,24 @@ def output_col(col_name: ColumnOrName) -> SnowparkColumn: ) return SnowflakeQueryCompiler(new_internal_frame) + def str_lstrip(self, to_strip: Union[str, None] = None) -> None: + ErrorMessage.method_not_implemented_error("lstrip", "Series.str") + + def str_rstrip(self, to_strip: Union[str, None] = None) -> None: + ErrorMessage.method_not_implemented_error("rstrip", "Series.str") + + def str_swapcase(self) -> None: + ErrorMessage.method_not_implemented_error("swapcase", "Series.str") + + def str_translate(self, table: dict) -> None: + ErrorMessage.method_not_implemented_error("translate", "Series.str") + + def str_wrap(self, width: int, **kwargs: Any) -> None: + ErrorMessage.method_not_implemented_error("wrap", "Series.str") + + def str_zfill(self, width: int) -> None: + ErrorMessage.method_not_implemented_error("zfill", "Series.str") + def qcut( self, q: Union[int, ListLike], @@ -12727,13 +12964,7 @@ def cut( return bins, SnowflakeQueryCompiler(ret_frame) def str_casefold(self) -> None: - """ - Returns: - New query compiler with updated values. - """ - ErrorMessage.not_implemented( - "Snowpark pandas doesn't yet support casefold method" - ) + ErrorMessage.method_not_implemented_error("casefold", "Series.str") def dt_to_period(self, freq: Optional[str] = None) -> None: """ diff --git a/tests/integ/modin/pandas_api_coverage.py b/tests/integ/modin/pandas_api_coverage.py index 23bd19a5df..9dbf477f50 100644 --- a/tests/integ/modin/pandas_api_coverage.py +++ b/tests/integ/modin/pandas_api_coverage.py @@ -8,6 +8,8 @@ from datetime import datetime from functools import update_wrapper +from modin.pandas.series_utils import DatetimeProperties as DatetimePropertiesClazz + # once we are fully off vendored modin we can make these the # upstream modin classes import snowflake.snowpark.modin.pandas as pdi @@ -22,9 +24,6 @@ ) from snowflake.snowpark.modin.pandas.plotting import Plotting as PlottingClazz from snowflake.snowpark.modin.pandas.resample import Resampler as ResamplerClazz -from snowflake.snowpark.modin.pandas.series_utils import ( - DatetimeProperties as DatetimePropertiesClazz, -) # Not in current version of Modin # from modin.pandas.window import Expanding as ExpandingClazz