resolve conflicts

Signed-off-by: Labanya Mukhopadhyay <labanya.mukhopadhyay@snowflake.com>
snowflakedb · Oct 22, 2024 · ab42ecf · ab42ecf
2 parents b9ae187 + a9a2eef
commit ab42ecf
Show file tree

Hide file tree

Showing 74 changed files with 1,335 additions and 571 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,11 @@
 
 - Added support for 'Service' domain to `session.lineage.trace` API.
 - Added support for `copy_grants` parameter when registering UDxF and stored procedures.
+- Added support for the following methods in `DataFrameWriter` to support daisy-chaining:
+  - `option`
+  - `options`
+  - `partition_by`
+- Added support for `snowflake_cortex_summarize`.
 
 #### Improvements
 
@@ -27,17 +32,23 @@
 - Added support for `np.subtract`, `np.multiply`, `np.divide`, and `np.true_divide`.
 - Added support for tracking usages of `__array_ufunc__`.
 - Added numpy compatibility support for `np.float_power`, `np.mod`, `np.remainder`, `np.greater`, `np.greater_equal`, `np.less`, `np.less_equal`, `np.not_equal`, and `np.equal`.
+- Added numpy compatibility support for `np.log`, `np.log2`, and `np.log10`
 - Added support for `DataFrameGroupBy.bfill`, `SeriesGroupBy.bfill`, `DataFrameGroupBy.ffill`, and `SeriesGroupBy.ffill`.
 - Added support for `on` parameter with `Resampler`.
+- Added support for timedelta inputs in `value_counts()`.
+- Added support for applying Snowpark Python function `snowflake_cortex_summarize`.
+- Added support for `DataFrame`/`Series.attrs`
 - Added support for `DataFrame.align` and `Series.align` for `axis=0`.
 
 #### Improvements
 
 - Improved generated SQL query for `head` and `iloc` when the row key is a slice.
 - Improved error message when passing an unknown timezone to `tz_convert` and `tz_localize` in `Series`, `DataFrame`, `Series.dt`, and `DatetimeIndex`.
 - Improved documentation for `tz_convert` and `tz_localize` in `Series`, `DataFrame`, `Series.dt`, and `DatetimeIndex` to specify the supported timezone formats.
+- Added additional kwargs support for `df.apply` and `series.apply` ( as well as `map` and `applymap` ) when using snowpark functions. This allows for some position independent compatibility between apply and functions where the first argument is not a pandas object.
 - Improved generated SQL query for `iloc` and `iat` when the row key is a scalar.
 - Removed all joins in `iterrows`.
+- Improved documentation for `Series.map` to reflect the unsupported features.
 
 #### Bug Fixes
 
@@ -46,6 +57,9 @@
 - Fixed a bug where `DataFrame` and `Series` `round()` would raise `AssertionError` for `Timedelta` columns. Instead raise `NotImplementedError` for `round()` on `Timedelta`.
 - Fixed a bug where `reindex` fails when the new index is a Series with non-overlapping types from the original index.
 - Fixed a bug where calling `__getitem__` on a DataFrameGroupBy object always returned a DataFrameGroupBy object if `as_index=False`.
+- Fixed a bug where inserting timedelta values into an existing column would silently convert the values to integers instead of raising `NotImplementedError`.
+- Fixed a bug where `DataFrame.shift()` on axis=0 and axis=1 would fail to propagate timedelta types.
+- `DataFrame.abs()`, `DataFrame.__neg__()`, `DataFrame.stack()`, and `DataFrame.unstack()` now raise `NotImplementedError` for timedelta inputs instead of failing to propagate timedelta types.
 
 ### Snowpark Local Testing Updates
 

diff --git a/docs/source/modin/numpy.rst b/docs/source/modin/numpy.rst
@@ -37,6 +37,12 @@ NumPy ufuncs called with Snowpark pandas arguments will ignore kwargs.
 +-----------------------------+----------------------------------------------------+
 | ``np.float_power``          | Mapped to df.__pow__(df2)                          |
 +-----------------------------+----------------------------------------------------+
+| ``np.log``                  | Mapped to df.apply(snowpark.functions.ln)          |
++-----------------------------+----------------------------------------------------+
+| ``np.log2``                 | Mapped to df.apply(snowpark.functions.log, base=2) |
++-----------------------------+----------------------------------------------------+
+| ``np.log10``                | Mapped to df.apply(snowpark.functions.log, base=10)|
++-----------------------------+----------------------------------------------------+
 | ``np.mod``                  | Mapped to df.__mod__(df2)                          |
 +-----------------------------+----------------------------------------------------+
 | ``np.remainder``            | Mapped to df.__mod__(df2)                          |

diff --git a/docs/source/modin/supported/dataframe_supported.rst b/docs/source/modin/supported/dataframe_supported.rst
@@ -19,7 +19,7 @@ Attributes
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``at``                      | P                               | ``N`` for set with MultiIndex                      |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``attrs``                   | N                               |                                                    |
+| ``attrs``                   | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``axes``                    | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+

diff --git a/docs/source/modin/supported/series_supported.rst b/docs/source/modin/supported/series_supported.rst
@@ -21,9 +21,7 @@ Attributes
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``at``                      | P                               | ``N`` for set with MultiIndex                      |
 +-----------------------------+---------------------------------+----------------------------------------------------+
-| ``attrs``                   | N                               | Reading ``attrs`` always returns an empty dict,    |
-|                             |                                 | and attempting to modify or set ``attrs`` will     |
-|                             |                                 | fail.                                              |
+| ``attrs``                   | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``axes``                    | Y                               |                                                    |
 +-----------------------------+---------------------------------+----------------------------------------------------+
@@ -261,7 +259,7 @@ Methods
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``lt``                      | P                               | ``level``                        |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
-| ``map``                     | P                               |                                  | See ``apply``                                      |
+| ``map``                     | P                               |  ``na_action``                   | ``N`` if ``func`` is not callable                  |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``mask``                    | P                               |                                  | ``N`` if given ``axis`` or ``level`` parameters,   |
 |                             |                                 |                                  | ``N`` if ``cond`` or ``other`` is Callable         |

diff --git a/docs/source/snowpark/functions.rst b/docs/source/snowpark/functions.rst
@@ -258,6 +258,7 @@ Functions
     sin
     sinh
     skew
+    snowflake_cortex_summarize
     sort_array
     soundex
     split

diff --git a/src/snowflake/snowpark/_internal/analyzer/select_statement.py b/src/snowflake/snowpark/_internal/analyzer/select_statement.py
@@ -22,10 +22,6 @@
 )
 
 import snowflake.snowpark._internal.utils
-from snowflake.snowpark._internal.analyzer.cte_utils import (
-    encode_node_id_with_query,
-    encoded_query_id,
-)
 from snowflake.snowpark._internal.analyzer.query_plan_analysis_utils import (
     PlanNodeCategory,
     PlanState,
@@ -38,6 +34,7 @@
     TableFunctionRelation,
 )
 from snowflake.snowpark._internal.analyzer.window_expression import WindowExpression
+from snowflake.snowpark._internal.compiler.cte_utils import encode_node_id_with_query
 from snowflake.snowpark._internal.error_message import SnowparkClientExceptionMessages
 from snowflake.snowpark.types import DataType
 
@@ -248,12 +245,6 @@ def sql_query(self) -> str:
         """Returns the sql query of this Selectable logical plan."""
         pass
 
-    @property
-    @abstractmethod
-    def placeholder_query(self) -> Optional[str]:
-        """Returns the placeholder query of this Selectable logical plan."""
-        pass
-
     @cached_property
     def encoded_node_id_with_query(self) -> str:
         """
@@ -265,11 +256,6 @@ def encoded_node_id_with_query(self) -> str:
         """
         return encode_node_id_with_query(self)
 
-    @cached_property
-    def encoded_query_id(self) -> Optional[str]:
-        """Returns an encoded id of the queries for this Selectable logical plan."""
-        return encoded_query_id(self)
-
     @property
     @abstractmethod
     def query_params(self) -> Optional[Sequence[Any]]:
@@ -321,7 +307,6 @@ def get_snowflake_plan(self, skip_schema_query) -> SnowflakePlan:
                 expr_to_alias=self.expr_to_alias,
                 df_aliased_col_name_to_real_col_name=self.df_aliased_col_name_to_real_col_name,
                 source_plan=self,
-                placeholder_query=self.placeholder_query,
                 referenced_ctes=self.referenced_ctes,
             )
             # set api_calls to self._snowflake_plan outside of the above constructor
@@ -419,10 +404,6 @@ def __deepcopy__(self, memodict={}) -> "SelectableEntity":  # noqa: B006
     def sql_query(self) -> str:
         return f"{analyzer_utils.SELECT}{analyzer_utils.STAR}{analyzer_utils.FROM}{self.entity.name}"
 
-    @property
-    def placeholder_query(self) -> Optional[str]:
-        return None
-
     @property
     def sql_in_subquery(self) -> str:
         return self.entity.name
@@ -505,10 +486,6 @@ def __deepcopy__(self, memodict={}) -> "SelectSQL":  # noqa: B006
     def sql_query(self) -> str:
         return self._sql_query
 
-    @property
-    def placeholder_query(self) -> Optional[str]:
-        return None
-
     @property
     def query_params(self) -> Optional[Sequence[Any]]:
         return self._query_param
@@ -582,14 +559,6 @@ def snowflake_plan(self):
     def sql_query(self) -> str:
         return self._snowflake_plan.queries[-1].sql
 
-    @property
-    def placeholder_query(self) -> Optional[str]:
-        return self._snowflake_plan.placeholder_query
-
-    @cached_property
-    def encoded_query_id(self) -> Optional[str]:
-        return self._snowflake_plan.encoded_query_id
-
     @property
     def schema_query(self) -> Optional[str]:
         return self.snowflake_plan.schema_query
@@ -659,7 +628,6 @@ def __init__(
         self.api_calls = (
             self.from_.api_calls.copy() if self.from_.api_calls is not None else None
         )  # will be replaced by new api calls if any operation.
-        self._placeholder_query = None
         # indicate whether we should try to merge the projection complexity of the current
         # SelectStatement with the projection complexity of from_ during the calculation of
         # node complexity. For example:
@@ -787,46 +755,6 @@ def sql_query(self) -> str:
             self._sql_query = self.from_.sql_query
             return self._sql_query
         from_clause = self.from_.sql_in_subquery
-        if (
-            self.analyzer.session._cte_optimization_enabled
-            and (not self.analyzer.session._query_compilation_stage_enabled)
-            and self.from_.encoded_query_id
-        ):
-            placeholder = f"{analyzer_utils.LEFT_PARENTHESIS}{self.from_.encoded_query_id}{analyzer_utils.RIGHT_PARENTHESIS}"
-            self._sql_query = self.placeholder_query.replace(placeholder, from_clause)
-        else:
-            where_clause = (
-                f"{analyzer_utils.WHERE}{self.analyzer.analyze(self.where, self.df_aliased_col_name_to_real_col_name)}"
-                if self.where is not None
-                else snowflake.snowpark._internal.utils.EMPTY_STRING
-            )
-            order_by_clause = (
-                f"{analyzer_utils.ORDER_BY}{analyzer_utils.COMMA.join(self.analyzer.analyze(x, self.df_aliased_col_name_to_real_col_name) for x in self.order_by)}"
-                if self.order_by
-                else snowflake.snowpark._internal.utils.EMPTY_STRING
-            )
-            limit_clause = (
-                f"{analyzer_utils.LIMIT}{self.limit_}"
-                if self.limit_ is not None
-                else snowflake.snowpark._internal.utils.EMPTY_STRING
-            )
-            offset_clause = (
-                f"{analyzer_utils.OFFSET}{self.offset}"
-                if self.offset
-                else snowflake.snowpark._internal.utils.EMPTY_STRING
-            )
-            self._sql_query = f"{analyzer_utils.SELECT}{self.projection_in_str}{analyzer_utils.FROM}{from_clause}{where_clause}{order_by_clause}{limit_clause}{offset_clause}"
-        return self._sql_query
-
-    @property
-    def placeholder_query(self) -> str:
-        if self._placeholder_query:
-            return self._placeholder_query
-        from_clause = f"{analyzer_utils.LEFT_PARENTHESIS}{self.from_.encoded_query_id}{analyzer_utils.RIGHT_PARENTHESIS}"
-        if not self.has_clause and not self.projection:
-            self._placeholder_query = from_clause
-            return self._placeholder_query
-
         where_clause = (
             f"{analyzer_utils.WHERE}{self.analyzer.analyze(self.where, self.df_aliased_col_name_to_real_col_name)}"
             if self.where is not None
@@ -847,8 +775,8 @@ def placeholder_query(self) -> str:
             if self.offset
             else snowflake.snowpark._internal.utils.EMPTY_STRING
         )
-        self._placeholder_query = f"{analyzer_utils.SELECT}{self.projection_in_str}{analyzer_utils.FROM}{from_clause}{where_clause}{order_by_clause}{limit_clause}{offset_clause}"
-        return self._placeholder_query
+        self._sql_query = f"{analyzer_utils.SELECT}{self.projection_in_str}{analyzer_utils.FROM}{from_clause}{where_clause}{order_by_clause}{limit_clause}{offset_clause}"
+        return self._sql_query
 
     @property
     def query_params(self) -> Optional[Sequence[Any]]:
@@ -1354,10 +1282,6 @@ def snowflake_plan(self):
     def sql_query(self) -> str:
         return self._snowflake_plan.queries[-1].sql
 
-    @property
-    def placeholder_query(self) -> Optional[str]:
-        return self._snowflake_plan.placeholder_query
-
     @property
     def schema_query(self) -> Optional[str]:
         return self._snowflake_plan.schema_query
@@ -1402,7 +1326,6 @@ class SetStatement(Selectable):
     def __init__(self, *set_operands: SetOperand, analyzer: "Analyzer") -> None:
         super().__init__(analyzer=analyzer)
         self._sql_query = None
-        self._placeholder_query = None
         self.set_operands = set_operands
         self._nodes = []
         for operand in set_operands:
@@ -1425,7 +1348,6 @@ def __deepcopy__(self, memodict={}) -> "SetStatement":  # noqa: B006
             *deepcopy(self.set_operands, memodict), analyzer=self.analyzer
         )
         _deepcopy_selectable_fields(from_selectable=self, to_selectable=copied)
-        copied._placeholder_query = self._placeholder_query
         copied._sql_query = self._sql_query
 
         return copied
@@ -1439,15 +1361,6 @@ def sql_query(self) -> str:
             self._sql_query = sql
         return self._sql_query
 
-    @property
-    def placeholder_query(self) -> Optional[str]:
-        if not self._placeholder_query:
-            sql = f"({self.set_operands[0].selectable.encoded_query_id})"
-            for i in range(1, len(self.set_operands)):
-                sql = f"{sql}{self.set_operands[i].operator}({self.set_operands[i].selectable.encoded_query_id})"
-            self._placeholder_query = sql
-        return self._placeholder_query
-
     @property
     def schema_query(self) -> str:
         """The first operand decide the column attributes of a query with set operations.