update docs

dtenedor · Nov 13, 2023 · 0ac4f1a · 0ac4f1a
1 parent 5b91a0d
commit 0ac4f1a
Showing 1 changed file with 21 additions and 10 deletions.
diff --git a/python/docs/source/user_guide/sql/python_udtf.rst b/python/docs/source/user_guide/sql/python_udtf.rst
@@ -65,8 +65,8 @@ To implement a Python UDTF, you first need to define a class implementing the me
 
         def analyze(self, *args: Any) -> AnalyzeResult:
             """
-            Computes the output schema of a particular call to this function in response to the
-            arguments provided.
+            Static method to compute the output schema of a particular call to this function in
+            response to the arguments provided.
 
             This method is optional and only needed if the registration of the UDTF did not provide
             a static output schema to be use for all calls to the function. In this context,
@@ -101,20 +101,29 @@ To implement a Python UDTF, you first need to define a class implementing the me
                 partitionBy: Sequence[PartitioningColumn] = field(default_factory=tuple)
                 orderBy: Sequence[OrderingColumn] = field(default_factory=tuple)
 
+            Notes
+            -----
+            - It is possible for the `analyze` method to accept the exact arguments expected,
+              mapping 1:1 with the arguments provided to the UDTF call.
+            - The `analyze` method can instead choose ot accept positional arguments if desired
+              (with `*args`) or keyword arguments (with `**kwargs`).
+
             Examples
             --------
-            analyze implementation that returns one output column for each word in the input string
-            argument.
+            This is an `analyze` implementation that returns one output column for each word in the
+            input string argument.
 
-            >>> def analyze(self, text: str) -> AnalyzeResult:
+            >>> @staticmethod
+            ... def analyze(text: str) -> AnalyzeResult:
             ...     schema = StructType()
             ...     for index, word in enumerate(text.split(" ")):
             ...         schema = schema.add(f"word_{index}")
             ...     return AnalyzeResult(schema=schema)
 
             Same as above, but using *args to accept the arguments.
 
-            >>> def analyze(self, *args) -> AnalyzeResult:
+            >>> @staticmethod
+            ... def analyze(*args) -> AnalyzeResult:
             ...     assert len(args) == 1, "This function accepts one argument only"
             ...     assert args[0].dataType == StringType(), "Only string arguments are supported"
             ...     text = args[0]
@@ -125,7 +134,8 @@ To implement a Python UDTF, you first need to define a class implementing the me
 
             Same as above, but using **kwargs to accept the arguments.
 
-            >>> def analyze(self, **kwargs) -> AnalyzeResult:
+            >>> @staticmethod
+            ... def analyze(**kwargs) -> AnalyzeResult:
             ...     assert len(kwargs) == 1, "This function accepts one argument only"
             ...     assert "text" in kwargs, "An argument named 'text' is required"
             ...     assert kwargs["text"].dataType == StringType(), "Only strings are supported"
@@ -135,10 +145,11 @@ To implement a Python UDTF, you first need to define a class implementing the me
             ...         schema = schema.add(f"word_{index}")
             ...     return AnalyzeResult(schema=schema)
 
-            analyze implementation that returns a constant output schema, but add custom information
-            in the result metadata to be consumed by future __init__ method calls:
+            An `analyze` implementation that returns a constant output schema, but add custom
+            information in the result metadata to be consumed by future __init__ method calls:
 
-            >>> def analyze(self, text: str) -> AnalyzeResult:
+            >>> @staticmethod
+            ... def analyze(text: str) -> AnalyzeResult:
             ...     @dataclass
             ...     class AnalyzeResultWithOtherMetadata(AnalyzeResult):
             ...         num_words: int