Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…or DataFrameGroupBy/SeriesGroupBy.bfill/ffill
  • Loading branch information
sfc-gh-helmeleegy committed Oct 14, 2024
1 parent 7c22750 commit 81a1d88
Show file tree
Hide file tree
Showing 6 changed files with 449 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
- Added support for applying Snowpark Python functions (e.g., `sin`) in `Series.map`, `Series.apply`, `DataFrame.apply` and `DataFrame.applymap`.
- Added support for `np.subtract`, `np.multiply`, `np.divide`, and `np.true_divide`.
- Added support for tracking usages of `__array_ufunc__`.
- Added support for `DataFrameGroupBy.bfill`, `SeriesGroupBy.bfill`, `DataFrameGroupBy.ffill`, and `SeriesGroupBy.ffill`.

#### Improvements

Expand Down
6 changes: 4 additions & 2 deletions docs/source/modin/supported/groupby_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ Computations/descriptive stats
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``any`` | P | ``N`` for non-integer/boolean types |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``bfill`` | N | |
| ``bfill`` | P | When GroupBy axis is 1,``N``; |
| | | GroupBy axis = 0 is fully supported. |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``corr`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
Expand All @@ -104,7 +105,8 @@ Computations/descriptive stats
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``diff`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``ffill`` | N | |
| ``ffill`` | P | When GroupBy axis is 1,``N``; |
| | | GroupBy axis = 0 is fully supported. |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``fillna`` | P | GroupBy axis = 0 is supported. |
| | | Does not support ``downcast`` parameter |
Expand Down
171 changes: 169 additions & 2 deletions src/snowflake/snowpark/modin/plugin/docstrings/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,97 @@ def skew():
pass

def ffill():
pass
"""
Forward fill the values.
Parameters
----------
limit : int, optional
Limit of how many values to fill.
Returns
-------
Series or DataFrame
Object with missing values filled.
See also
--------
Series.ffill
Returns Series with minimum number of char in object.
DataFrame.ffill
Object with missing values filled or None if inplace=True.
Series.fillna
Fill NaN values of a Series.
DataFrame.fillna
Fill NaN values of a DataFrame.
Examples
--------
For SeriesGroupBy:
>>> key = [0, 0, 1, 1]
>>> ser = pd.Series([np.nan, 2, 3, np.nan], index=key)
>>> ser
0 NaN
0 2.0
1 3.0
1 NaN
dtype: float64
>>> ser.groupby(level=0).ffill()
0 NaN
0 2.0
1 3.0
1 3.0
dtype: float64
For DataFrameGroupBy:
>>> df = pd.DataFrame(
... {
... "key": [0, 0, 1, 1, 1],
... "A": [np.nan, 2, np.nan, 3, np.nan],
... "B": [2, 3, np.nan, np.nan, np.nan],
... "C": [np.nan, np.nan, 2, np.nan, np.nan],
... }
... )
>>> df
key A B C
0 0 NaN 2.0 NaN
1 0 2.0 3.0 NaN
2 1 NaN NaN 2.0
3 1 3.0 NaN NaN
4 1 NaN NaN NaN
Propagate non-null values forward or backward within each group along columns.
>>> df.groupby("key").ffill()
A B C
0 NaN 2.0 NaN
1 2.0 3.0 NaN
2 NaN NaN 2.0
3 3.0 NaN 2.0
4 3.0 NaN 2.0
Propagate non-null values forward or backward within each group along rows.
>>> df.T.groupby(np.array([0, 0, 1, 1])).ffill().T
key A B C
0 0.0 0.0 2.0 2.0
1 0.0 2.0 3.0 3.0
2 1.0 1.0 NaN 2.0
3 1.0 3.0 NaN NaN
4 1.0 1.0 NaN NaN
Only replace the first NaN element within a group along rows.
>>> df.groupby("key").ffill(limit=1)
A B C
0 NaN 2.0 NaN
1 2.0 3.0 NaN
2 NaN NaN 2.0
3 3.0 NaN 2.0
4 3.0 NaN NaN
"""

def sem():
pass
Expand Down Expand Up @@ -1086,7 +1176,84 @@ def cummin():
"""

def bfill():
pass
"""
Backward fill the values.
Parameters
----------
limit : int, optional
Limit of how many values to fill.
Returns
-------
Series or DataFrame
Object with missing values filled.
See also
-------
Series.bfill
Backward fill the missing values in the dataset.
DataFrame.bfill
Backward fill the missing values in the dataset.
Series.fillna
Fill NaN values of a Series.
DataFrame.fillna
Fill NaN values of a DataFrame.
Examples
--------
With Series:
>>> index = ['Falcon', 'Falcon', 'Parrot', 'Parrot', 'Parrot']
>>> s = pd.Series([None, 1, None, None, 3], index=index)
>>> s
Falcon NaN
Falcon 1.0
Parrot NaN
Parrot NaN
Parrot 3.0
dtype: float64
>>> s.groupby(level=0).bfill()
Falcon 1.0
Falcon 1.0
Parrot 3.0
Parrot 3.0
Parrot 3.0
dtype: float64
>>> s.groupby(level=0).bfill(limit=1)
Falcon 1.0
Falcon 1.0
Parrot NaN
Parrot 3.0
Parrot 3.0
dtype: float64
With DataFrame:
>>> df = pd.DataFrame({'A': [1, None, None, None, 4],
... 'B': [None, None, 5, None, 7]}, index=index)
>>> df
A B
Falcon 1.0 NaN
Falcon NaN NaN
Parrot NaN 5.0
Parrot NaN NaN
Parrot 4.0 7.0
>>> df.groupby(level=0).bfill()
A B
Falcon 1.0 NaN
Falcon NaN NaN
Parrot 4.0 5.0
Parrot 4.0 7.0
Parrot 4.0 7.0
>>> df.groupby(level=0).bfill(limit=1)
A B
Falcon 1.0 NaN
Falcon NaN NaN
Parrot NaN 5.0
Parrot 4.0 7.0
Parrot 4.0 7.0
"""

def prod():
pass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,25 @@ def any(self, skipna: bool = True):
)

def bfill(self, limit=None):
ErrorMessage.method_not_implemented_error(name="bfill", class_="GroupBy")
is_series_groupby = self.ndim == 1

# TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
query_compiler = self._query_compiler.groupby_fillna(
self._by,
self._axis,
self._kwargs,
value=None,
method="bfill",
fill_axis=None,
inplace=False,
limit=limit,
downcast=None,
)
return (
pd.Series(query_compiler=query_compiler)
if is_series_groupby
else pd.DataFrame(query_compiler=query_compiler)
)

def corr(self, **kwargs):
# TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
Expand Down Expand Up @@ -507,7 +525,25 @@ def diff(self):
ErrorMessage.method_not_implemented_error(name="diff", class_="GroupBy")

def ffill(self, limit=None):
ErrorMessage.method_not_implemented_error(name="ffill", class_="GroupBy")
is_series_groupby = self.ndim == 1

# TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
query_compiler = self._query_compiler.groupby_fillna(
self._by,
self._axis,
self._kwargs,
value=None,
method="ffill",
fill_axis=None,
inplace=False,
limit=limit,
downcast=None,
)
return (
pd.Series(query_compiler=query_compiler)
if is_series_groupby
else pd.DataFrame(query_compiler=query_compiler)
)

def fillna(
self,
Expand Down
Loading

0 comments on commit 81a1d88

Please sign in to comment.