Skip to content

Commit

Permalink
SNOW-1692063 - More functions for numpy compatibility (#2433)
Browse files Browse the repository at this point in the history
- logical comparison functions
- a few more math functions
- modified chain testing test to work with operations that result in
mixed types
  • Loading branch information
sfc-gh-jkew authored Oct 11, 2024
1 parent e8f024e commit 078cdf6
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 18 deletions.
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
### Snowpark pandas API Updates

#### New Features
- Added numpy compatibility support for `np.float_power`, `np.mod`, `np.remainder`, `np.greater`, `np.greater_equal`, `np.less`, `np.less_equal`, `np.not_equal`, and `np.equal`.

## 1.23.0 (2024-10-09)

Expand Down Expand Up @@ -54,8 +55,8 @@
- Added support for `DataFrame.tz_localize` and `Series.tz_localize`.
- Added support for `DataFrame.tz_convert` and `Series.tz_convert`.
- Added support for applying Snowpark Python functions (e.g., `sin`) in `Series.map`, `Series.apply`, `DataFrame.apply` and `DataFrame.applymap`.
- Added support for `np.subtract`, `np.multiply`, `np.divide`, `np.true_divide`
- Added support for tracking usages of `__array_ufunc__`
- Added support for `np.subtract`, `np.multiply`, `np.divide`, and `np.true_divide`.
- Added support for tracking usages of `__array_ufunc__`.

#### Improvements

Expand Down
18 changes: 18 additions & 0 deletions docs/source/modin/numpy.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,24 @@ NumPy ufuncs called with Snowpark pandas arguments will ignore kwargs.
+-----------------------------+----------------------------------------------------+
| ``np.true_divide`` | Mapped to df.__truediv__(df2) |
+-----------------------------+----------------------------------------------------+
| ``np.float_power`` | Mapped to df.__pow__(df2) |
+-----------------------------+----------------------------------------------------+
| ``np.mod`` | Mapped to df.__mod__(df2) |
+-----------------------------+----------------------------------------------------+
| ``np.remainder`` | Mapped to df.__mod__(df2) |
+-----------------------------+----------------------------------------------------+
| ``np.greater`` | Mapped to df > df2 |
+-----------------------------+----------------------------------------------------+
| ``np.greater_equal`` | Mapped to df >= df2 |
+-----------------------------+----------------------------------------------------+
| ``np.less`` | Mapped to df < df2 |
+-----------------------------+----------------------------------------------------+
| ``np.less_equal`` | Mapped to df <= df2 |
+-----------------------------+----------------------------------------------------+
| ``np.not_equal`` | Mapped to df != df2 |
+-----------------------------+----------------------------------------------------+
| ``np.equal`` | Mapped to df == df2 |
+-----------------------------+----------------------------------------------------+
| ``np.logical_and`` | Mapped to df.__and__(df2) |
+-----------------------------+----------------------------------------------------+
| ``np.logical_or`` | Mapped to df.__or__(df2) |
Expand Down
24 changes: 12 additions & 12 deletions src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,14 +135,14 @@ def map_to_bools(inputs: Any) -> Any:
"divide": lambda obj, inputs: obj.__truediv__(*inputs), # same as true_divide
"logaddexp": NotImplemented,
"logaddexp2": NotImplemented,
"true_divide": lambda obj, inputs, kwargs: obj.__truediv__(*inputs),
"floor_divide": NotImplemented,
"true_divide": lambda obj, inputs: obj.__truediv__(*inputs),
"floor_divide": lambda obj, inputs: obj.__floordiv__(*inputs),
"negative": NotImplemented,
"positive": NotImplemented,
"power": NotImplemented,
"float_power": NotImplemented,
"remainder": NotImplemented,
"mod": NotImplemented,
"power": NotImplemented, # cannot use obj.__pow__
"float_power": lambda obj, inputs: obj.__pow__(*inputs),
"remainder": lambda obj, inputs: obj.__mod__(*inputs),
"mod": lambda obj, inputs: obj.__mod__(*inputs),
"fmod": NotImplemented,
"divmod": NotImplemented,
"absolute": NotImplemented,
Expand Down Expand Up @@ -192,12 +192,12 @@ def map_to_bools(inputs: Any) -> Any:
"left_shift": NotImplemented,
"right_shift": NotImplemented,
# comparison functions
"greater": NotImplemented,
"greater_equal": NotImplemented,
"less": NotImplemented,
"less_equal": NotImplemented,
"not_equal": NotImplemented,
"equal": NotImplemented,
"greater": lambda obj, inputs: obj > inputs[0],
"greater_equal": lambda obj, inputs: obj >= inputs[0],
"less": lambda obj, inputs: obj < inputs[0],
"less_equal": lambda obj, inputs: obj <= inputs[0],
"not_equal": lambda obj, inputs: obj != inputs[0],
"equal": lambda obj, inputs: obj == inputs[0],
"logical_and": lambda obj, inputs: obj.astype("bool").__and__(
*map_to_bools(inputs)
),
Expand Down
29 changes: 25 additions & 4 deletions tests/integ/modin/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,25 @@ def test_logical_operators():


@pytest.mark.parametrize(
"np_ufunc", [np.add, np.subtract, np.divide, np.multiply, np.true_divide]
"np_ufunc",
[
np.add,
np.subtract,
np.divide,
np.multiply,
np.true_divide,
np.float_power,
np.mod,
np.remainder,
np.greater,
np.greater_equal,
np.less,
np.less_equal,
np.not_equal,
np.equal,
],
)
def test_np_ufunc_arithmetic_operators(np_ufunc):
def test_np_ufunc_binop_operators(np_ufunc):
data = {
"A": [3, 1, 2, 2, 1, 2, 5, 1, 2],
"B": [1, 2, 3, 4, 1, 2, 3, 4, 1],
Expand All @@ -93,8 +109,13 @@ def test_np_ufunc_arithmetic_operators(np_ufunc):

with SqlCounter(query_count=1):
# Test chained numpy ufuncs
snow_result = np_ufunc(snow_df["A"], np_ufunc(snow_df["A"], 1))
pandas_result = np_ufunc(pandas_df["A"], np_ufunc(pandas_df["A"], 1))
snow_result = np_ufunc(
np_ufunc(snow_df["A"], snow_df["A"]), np_ufunc(snow_df["A"], snow_df["A"])
)
pandas_result = np_ufunc(
np_ufunc(pandas_df["A"], pandas_df["A"]),
np_ufunc(pandas_df["A"], pandas_df["A"]),
)
assert_array_equal(np.array(snow_result), np.array(pandas_result))


Expand Down

0 comments on commit 078cdf6

Please sign in to comment.