snowflakedb · sfc-gh-stan · Jul 31, 2023 · Jul 21, 2023 · Jul 28, 2023 · Jul 28, 2023
@@ -217,6 +217,7 @@ Functions
     object_keys
     object_pick
     pandas_udf
+    pandas_udtf
     parse_json
     parse_xml
     percent_rank

@@ -7034,6 +7034,30 @@ def pandas_udtf(
         - :func:`udtf`
         - :meth:`UDTFRegistration.register() <snowflake.snowpark.udf.UDTFRegistration.register>`
 
+    Compared to the default row-by-row processing pattern of a normal UDTF, which sometimes is
+    inefficient, vectorized Python UDTFs (user-defined table functions) enable seamless partition-by-partition processing
+    by operating on partitions as
+    `Pandas DataFrames <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_
+    and returning results as
+    `Pandas DataFrames <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_
+    or lists of `Pandas arrays <https://pandas.pydata.org/docs/reference/api/pandas.array.html>`_
+    or `Pandas Series <https://pandas.pydata.org/docs/reference/series.html>`_.
+
+    In addition, vectorized Python UDTFs allow for easy integration with libraries that operate on pandas DataFrames or pandas arrays.
+
+    A vectorized UDTF handler class:
+    - defines an :code:`end_partition` method that takes in a DataFrame argument and returns a :code:`pandas.DataFrame` or a tuple of :code:`pandas.Series` or :code:`pandas.arrays` where each array is a column.
+    - does NOT define a :code:`process` method.
+    - optionally defines a handler class with an :code:`__init__` method which will be invoked before processing each partition.
+
+    You can use :func:`~snowflake.snowpark.functions.udtf`, :meth:`register` or
+    :func:`~snowflake.snowpark.functions.pandas_udtf` to create a vectorized UDTF by providing
+    appropriate return and input types. If you would like to use :meth:`register_from_file` to
+    create a vectorized UDTF, you need to explicitly mark the handler method as vectorized using
+    either the decorator `@vectorized(input=pandas.DataFrame)` or setting `<class>.end_partition._sf_vectorized_input = pandas.DataFrame`
+
+    Note: A vectorized UDTF must be called with `~snowflake.snowpark.Window.partition_by` to build the partitions.
+
     Example::
         >>> from snowflake.snowpark.types import PandasSeriesType, PandasDataFrameType, IntegerType
         >>> class multiply:

@@ -1845,6 +1845,11 @@ def create_dataframe(
             >>> import pandas as pd
             >>> session.create_dataframe(pd.DataFrame([(1, 2, 3, 4)], columns=["a", "b", "c", "d"])).collect()
             [Row(a=1, b=2, c=3, d=4)]
+
+        Note:
+            When `data` is a pandas DataFrame, `snowflake.connector.pandas_tools.write_pandas` is called, which
+            requires permission to (1) CREATE STAGE (2) CREATE TABLE and (3) CREATE FILE FORMAT under the current
+            database and schema.
         """
         if data is None:
             raise ValueError("data cannot be None.")

@@ -3,7 +3,17 @@
 # Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved.
 #
 
-"""User-defined functions (UDFs) in Snowpark. Refer to :class:`~snowflake.snowpark.udf.UDFRegistration` for details and sample code."""
+"""User-defined functions (UDFs) in Snowpark. Please see `Python UDFs <https://docs.snowflake.com/en/developer-guide/snowpark/python/creating-udfs>`_ for details.
+Furthermore, there is vectorized UDF (Please see `Python UDF Batch API <https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-batch.html>`__ for details). Compared to the default row-by-row processing pattern of a normal UDF, which sometimes is
+inefficient, a vectorized UDF allows vectorized operations on a dataframe, with the input as a `Pandas DataFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ or `Pandas Series <https://pandas.pydata.org/docs/reference/api/pandas.Series.html>`_. In a
+vectorized UDF, you can operate on a batches of rows by handling Pandas DataFrame or Pandas Series.
+
+In brief, the advantages of a vectorized UDF include
+    - The potential for better performance if your Python code operates efficiently on batches of rows.
+    - Less transformation logic is required if you are calling into libraries that operate on Pandas DataFrames or Pandas arrays.
+
+Refer to :class:`~snowflake.snowpark.udf.UDFRegistration` for sample code on how to create and use regular and vectorized UDF's using Snowpark Python API.
+"""
 import sys
 from types import ModuleType
 from typing import Callable, Dict, List, Optional, Tuple, Union
@@ -122,16 +132,6 @@ class UDFRegistration:
     permanently. The methods that register a UDF return a :class:`UserDefinedFunction` object,
     which you can also use in :class:`~snowflake.snowpark.Column` expressions.
 
-    Note:
-        Before creating a UDF, think about whether you want to create a vectorized UDF (also referred to as `Python UDF Batch API`) or a regular UDF.
-        The advantages of a vectorized UDF are:
-
-          - The potential for better performance if your Python code operates efficiently on batches of rows.
-          - Less transformation logic is required if you are calling into libraries that operate on Pandas DataFrames or Pandas arrays.
-
-        Refer to `Python UDF Batch API <https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-batch.html>`__ for more details.
-        The following text explains how to create a regular UDF and a vectorized UDF by using the Snowpark Python APIs.
-
     There are two ways to register a UDF with Snowpark:
 
         - Use :func:`~snowflake.snowpark.functions.udf` or :meth:`register`. By pointing to a
@@ -200,18 +200,13 @@ class UDFRegistration:
           Therefore, this approach is useful and efficient when all your Python code is already in
           source files.
 
-    Compared to the default row-by-row processing pattern of a normal UDF, which sometimes is
-    inefficient, a vectorized UDF allows vectorized operations on a dataframe, with the input as a
-    `Pandas DataFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_
-    or `Pandas Series <https://pandas.pydata.org/docs/reference/api/pandas.Series.html>`_. In a
-    vectorized UDF, you can operate on a batches of rows by handling Pandas DataFrame or Pandas
-    Series. You can use :func:`~snowflake.snowpark.functions.udf`, :meth:`register` or
-    :func:`~snowflake.snowpark.functions.pandas_udf` to create a vectorized UDF by providing
-    appropriate return and input types. If you would like to use :meth:`register_from_file` to
-    create a vectorized UDF, you would need to explicitly mark the handler function as vectorized using
-    either the `vectorized` Decorator or a function attribute. Please see
-    `Python UDF Batch API <https://docs.snowflake.com/en/developer-guide/udf/python/udf-python-batch.html>`
-    for examples.
+
+    For a vectorized UDF:
+        You can use :func:`~snowflake.snowpark.functions.udf`, :meth:`register` or
+        :func:`~snowflake.snowpark.functions.pandas_udf` to create a vectorized UDF by providing
+        appropriate return and input types. If you would like to use :meth:`register_from_file` to
+        create a vectorized UDF, you need to explicitly mark the handler function as vectorized using
+        either the `vectorized` Decorator or a function attribute.
 
     Snowflake supports the following data types for the parameters for a UDF:
 

@@ -3,7 +3,23 @@
 # Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved.
 #
 
-"""User-defined table functions (UDTFs) in Snowpark. Refer to :class:`~snowflake.snowpark.udtf.UDTFRegistration` for details and sample code."""
+"""User-defined table functions (UDTFs) in Snowpark. Please see `Python UDTF <https://docs.snowflake.com/en/developer-guide/snowpark/python/creating-udtfs>_` for details.
+There is also vectorized UDTF. Compared to the default row-by-row processing pattern of a normal UDTF, which sometimes is inefficient, vectorized Python UDTFs (user-defined table functions) enable seamless partition-by-partition processing
+by operating on partitions as `Pandas DataFrames <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ and returning results as`Pandas DataFrames <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ or lists of
+`Pandas arrays <https://pandas.pydata.org/docs/reference/api/pandas.array.html>`_ or `Pandas Series <https://pandas.pydata.org/docs/reference/series.html>`_.
+
+In addition, vectorized Python UDTFs allow for easy integration with libraries that operate on pandas DataFrames or pandas arrays.
+
+A vectorized UDTF handler class:
+    - defines an :code:`end_partition` method that takes in a DataFrame argument and returns a :code:`pandas.DataFrame` or a tuple of :code:`pandas.Series` or :code:`pandas.arrays` where each array is a column.
+    - does NOT define a :code:`process` method.
+    - optionally defines a handler class with an :code:`__init__` method which will be invoked before processing each partition.
+
+Note:
+    A vectorized UDTF must be called with `~snowflake.snowpark.Window.partition_by` to build the partitions.
+
+Refer to :class:`~snowflake.snowpark.udtf.UDTFRegistration` for details and sample code on how to create regular and vectorized UDTFs using Snowpark Python API.
+"""
 import sys
 from types import ModuleType
 from typing import Callable, Dict, List, Optional, Tuple, Type, Union
@@ -298,11 +314,7 @@ class UDTFRegistration:
         - :meth:`~snowflake.snowpark.Session.table_function`
         - :meth:`~snowflake.snowpark.DataFrame.join_table_function`
 
-    Compared to the default row-by-row processing pattern of a normal UDTF, which sometimes is
-    inefficient, a vectorized UDTF allows vectorized operations on a dataframe, with the input as a
-    `Pandas DataFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_. In a
-    vectorized UDTF, you can operate on a batches of rows by handling Pandas DataFrame or Pandas
-    Series. You can use :func:`~snowflake.snowpark.functions.udtf`, :meth:`register` or
+    You can use :func:`~snowflake.snowpark.functions.udtf`, :meth:`register` or
     :func:`~snowflake.snowpark.functions.pandas_udtf` to create a vectorized UDTF by providing
     appropriate return and input types. If you would like to use :meth:`register_from_file` to
     create a vectorized UDTF, you would need to explicitly mark the handler method as vectorized using