diff --git a/python/docs/source/user_guide/sql/python_udtf.rst b/python/docs/source/user_guide/sql/python_udtf.rst index 7061453961930..d816801e15b7e 100644 --- a/python/docs/source/user_guide/sql/python_udtf.rst +++ b/python/docs/source/user_guide/sql/python_udtf.rst @@ -65,8 +65,8 @@ To implement a Python UDTF, you first need to define a class implementing the me def analyze(self, *args: Any) -> AnalyzeResult: """ - Computes the output schema of a particular call to this function in response to the - arguments provided. + Static method to compute the output schema of a particular call to this function in + response to the arguments provided. This method is optional and only needed if the registration of the UDTF did not provide a static output schema to be use for all calls to the function. In this context, @@ -101,12 +101,20 @@ To implement a Python UDTF, you first need to define a class implementing the me partitionBy: Sequence[PartitioningColumn] = field(default_factory=tuple) orderBy: Sequence[OrderingColumn] = field(default_factory=tuple) + Notes + ----- + - It is possible for the `analyze` method to accept the exact arguments expected, + mapping 1:1 with the arguments provided to the UDTF call. + - The `analyze` method can instead choose ot accept positional arguments if desired + (with `*args`) or keyword arguments (with `**kwargs`). + Examples -------- - analyze implementation that returns one output column for each word in the input string - argument. + This is an `analyze` implementation that returns one output column for each word in the + input string argument. - >>> def analyze(self, text: str) -> AnalyzeResult: + >>> @staticmethod + ... def analyze(text: str) -> AnalyzeResult: ... schema = StructType() ... for index, word in enumerate(text.split(" ")): ... schema = schema.add(f"word_{index}") @@ -114,7 +122,8 @@ To implement a Python UDTF, you first need to define a class implementing the me Same as above, but using *args to accept the arguments. - >>> def analyze(self, *args) -> AnalyzeResult: + >>> @staticmethod + ... def analyze(*args) -> AnalyzeResult: ... assert len(args) == 1, "This function accepts one argument only" ... assert args[0].dataType == StringType(), "Only string arguments are supported" ... text = args[0] @@ -125,7 +134,8 @@ To implement a Python UDTF, you first need to define a class implementing the me Same as above, but using **kwargs to accept the arguments. - >>> def analyze(self, **kwargs) -> AnalyzeResult: + >>> @staticmethod + ... def analyze(**kwargs) -> AnalyzeResult: ... assert len(kwargs) == 1, "This function accepts one argument only" ... assert "text" in kwargs, "An argument named 'text' is required" ... assert kwargs["text"].dataType == StringType(), "Only strings are supported" @@ -135,10 +145,11 @@ To implement a Python UDTF, you first need to define a class implementing the me ... schema = schema.add(f"word_{index}") ... return AnalyzeResult(schema=schema) - analyze implementation that returns a constant output schema, but add custom information - in the result metadata to be consumed by future __init__ method calls: + An `analyze` implementation that returns a constant output schema, but add custom + information in the result metadata to be consumed by future __init__ method calls: - >>> def analyze(self, text: str) -> AnalyzeResult: + >>> @staticmethod + ... def analyze(text: str) -> AnalyzeResult: ... @dataclass ... class AnalyzeResultWithOtherMetadata(AnalyzeResult): ... num_words: int