diff --git a/python/gresearch/spark/__init__.py b/python/gresearch/spark/__init__.py index 15cb4bed..37994ef9 100644 --- a/python/gresearch/spark/__init__.py +++ b/python/gresearch/spark/__init__.py @@ -29,6 +29,7 @@ from pyspark.files import SparkFiles from pyspark.sql import DataFrame from pyspark.sql.column import Column, _to_java_column +from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame from pyspark.sql.context import SQLContext from pyspark.sql.functions import col, count, lit, when from pyspark.sql.session import SparkSession @@ -292,6 +293,7 @@ def histogram(self: DataFrame, DataFrame.histogram = histogram +ConnectDataFrame.histogram = histogram class UnpersistHandle: @@ -343,6 +345,7 @@ def session_or_ctx(self: DataFrame) -> Union[SparkSession, SQLContext]: DataFrame.with_row_numbers = with_row_numbers +ConnectDataFrame.with_row_numbers = with_row_numbers DataFrame.session_or_ctx = session_or_ctx diff --git a/python/gresearch/spark/diff/__init__.py b/python/gresearch/spark/diff/__init__.py index 7776a743..174d0724 100644 --- a/python/gresearch/spark/diff/__init__.py +++ b/python/gresearch/spark/diff/__init__.py @@ -18,6 +18,7 @@ from py4j.java_gateway import JavaObject, JVMView from pyspark.sql import DataFrame +from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame from pyspark.sql.types import DataType from gresearch.spark import _to_seq, _to_map @@ -492,3 +493,9 @@ def diffwith_with_options(self: DataFrame, other: DataFrame, options: DiffOption DataFrame.diff_with_options = diff_with_options DataFrame.diffwith_with_options = diffwith_with_options + +ConnectDataFrame.diff = diff +ConnectDataFrame.diffwith = diffwith + +ConnectDataFrame.diff_with_options = diff_with_options +ConnectDataFrame.diffwith_with_options = diffwith_with_options diff --git a/python/gresearch/spark/parquet/__init__.py b/python/gresearch/spark/parquet/__init__.py index f6dd137f..dc480ec9 100644 --- a/python/gresearch/spark/parquet/__init__.py +++ b/python/gresearch/spark/parquet/__init__.py @@ -16,6 +16,7 @@ from py4j.java_gateway import JavaObject from pyspark.sql import DataFrameReader, DataFrame +from pyspark.sql.connect.readwriter import DataFrameReader as ConnectDataFrameReader from gresearch.spark import _to_seq @@ -204,3 +205,9 @@ def parquet_partitions(self: DataFrameReader, *paths: str, parallelism: Optional DataFrameReader.parquet_blocks = parquet_blocks DataFrameReader.parquet_block_columns = parquet_block_columns DataFrameReader.parquet_partitions = parquet_partitions + +ConnectDataFrameReader.parquet_metadata = parquet_metadata +ConnectDataFrameReader.parquet_schema = parquet_schema +ConnectDataFrameReader.parquet_blocks = parquet_blocks +ConnectDataFrameReader.parquet_block_columns = parquet_block_columns +ConnectDataFrameReader.parquet_partitions = parquet_partitions