Fix cache save keys #21
64 errors, 461 pass in 5h 5m 14s
559 files 559 suites 5h 5m 14s ⏱️
525 tests 461 ✅ 0 💤 0 ❌ 64 🔥
18 328 runs 17 881 ✅ 358 💤 25 ❌ 64 🔥
Results for commit 22659c5.
Annotations
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs failed: test_check_schema (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.9)/test-results-connect/pytest-1732176464.909650409-4396.xml [took 6m 44s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:14:30.423358483+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
self = <test.test_diff.DiffTest testMethod=test_check_schema>
def test_check_schema(self):
@contextlib.contextmanager
def test_requirement(error_message: str):
with self.assertRaises(ValueError) as e:
yield
self.assertEqual((error_message, ), e.exception.args)
with self.subTest("duplicate columns"):
with test_requirement("The datasets have duplicate columns.\n"
"Left column names: id, id\nRight column names: id, id"):
> self.left_df.select("id", "id").diff(self.right_df.select("id", "id"), "id")
python/test/test_diff.py:200:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:689: in diff
return Differ().diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:14:30.423358483+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs with error: test_check_schema (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1732176455.897051458-4590.xml [took 30m 36s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7f720e7b0e20>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs failed: test_dataframe_diff (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.9)/test-results-connect/pytest-1732176464.909650409-4396.xml [took 4m 15s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:18:45.64635398+00:00"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff>
def test_dataframe_diff(self):
> diff = self.left_df.diff(self.right_df, 'id').orderBy('id').collect()
python/test/test_diff.py:484:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:689: in diff
return Differ().diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fb0d0519ca0>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...t to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:18:45.64635398+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:18:45.64635398+00:00"}"
E >
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs with error: test_dataframe_diff (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1732176455.897051458-4590.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7f720e7b0e20>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs failed: test_dataframe_diff_with_changes (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.9)/test-results-connect/pytest-1732176464.909650409-4396.xml [took 6m 36s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:25:21.711862887+00:00"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_changes>
def test_dataframe_diff_with_changes(self):
options = DiffOptions().with_change_column('changes')
> diff = self.left_df.diff_with_options(self.right_df, options, 'id').orderBy('id').collect()
python/test/test_diff.py:517:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fb0d0519ca0>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:25:21.711862887+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:25:21.711862887+00:00"}"
E >
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs with error: test_dataframe_diff_with_changes (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1732176455.897051458-4590.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7f720e7b0e20>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs failed: test_dataframe_diff_with_default_options (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.9)/test-results-connect/pytest-1732176464.909650409-4396.xml [took 6m 14s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:31:36.248989893+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_default_options>
def test_dataframe_diff_with_default_options(self):
> diff = self.left_df.diff_with_options(self.right_df, DiffOptions(), 'id').orderBy('id').collect()
python/test/test_diff.py:502:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fb0d0519ca0>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:31:36.248989893+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs with error: test_dataframe_diff_with_default_options (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1732176455.897051458-4590.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7f720e7b0e20>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs failed: test_dataframe_diff_with_diff_mode_column_by_column (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.9)/test-results-connect/pytest-1732176464.909650409-4396.xml [took 4m 36s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:36:12.661556327+00:00"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_diff_mode_column_by_column>
def test_dataframe_diff_with_diff_mode_column_by_column(self):
options = DiffOptions().with_diff_mode(DiffMode.ColumnByColumn)
> diff = self.left_df.diff_with_options(self.right_df, options, 'id').orderBy('id').collect()
python/test/test_diff.py:522:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fb0d0519ca0>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:36:12.661556327+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:36:12.661556327+00:00"}"
E >
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs with error: test_dataframe_diff_with_diff_mode_column_by_column (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1732176455.897051458-4590.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7f720e7b0e20>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs failed: test_dataframe_diff_with_diff_mode_left_side (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.9)/test-results-connect/pytest-1732176464.909650409-4396.xml [took 5m 12s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:41:25.078107479+00:00"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_diff_mode_left_side>
def test_dataframe_diff_with_diff_mode_left_side(self):
options = DiffOptions().with_diff_mode(DiffMode.LeftSide)
> diff = self.left_df.diff_with_options(self.right_df, options, 'id').orderBy('id').collect()
python/test/test_diff.py:532:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fb0d0519ca0>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:41:25.078107479+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:41:25.078107479+00:00"}"
E >
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs with error: test_dataframe_diff_with_diff_mode_left_side (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1732176455.897051458-4590.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7f720e7b0e20>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs failed: test_dataframe_diff_with_diff_mode_right_side (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.9)/test-results-connect/pytest-1732176464.909650409-4396.xml [took 5m 44s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:47:09.539969737+00:00"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_diff_mode_right_side>
def test_dataframe_diff_with_diff_mode_right_side(self):
options = DiffOptions().with_diff_mode(DiffMode.RightSide)
> diff = self.left_df.diff_with_options(self.right_df, options, 'id').orderBy('id').collect()
python/test/test_diff.py:537:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fb0d0519ca0>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:47:09.539969737+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:47:09.539969737+00:00"}"
E >
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs with error: test_dataframe_diff_with_diff_mode_right_side (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1732176455.897051458-4590.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7f720e7b0e20>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs failed: test_dataframe_diff_with_diff_mode_side_by_side (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.9)/test-results-connect/pytest-1732176464.909650409-4396.xml [took 5m 43s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:52:53.005722359+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_diff_mode_side_by_side>
def test_dataframe_diff_with_diff_mode_side_by_side(self):
options = DiffOptions().with_diff_mode(DiffMode.SideBySide)
> diff = self.left_df.diff_with_options(self.right_df, options, 'id').orderBy('id').collect()
python/test/test_diff.py:527:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fb0d0519ca0>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:52:53.005722359+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs with error: test_dataframe_diff_with_diff_mode_side_by_side (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1732176455.897051458-4590.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7f720e7b0e20>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs failed: test_dataframe_diff_with_ignored (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.9)/test-results-connect/pytest-1732176464.909650409-4396.xml [took 4m 44s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:57:37.202129066+00:00"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_ignored>
def test_dataframe_diff_with_ignored(self):
> diff = self.left_df.diff(self.right_df, ['id'], ['label']).orderBy('id').collect()
python/test/test_diff.py:488:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:689: in diff
return Differ().diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fb0d0519ca0>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:57:37.202129066+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T08:57:37.202129066+00:00"}"
E >
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs with error: test_dataframe_diff_with_ignored (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1732176455.897051458-4590.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7f720e7b0e20>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs failed: test_dataframe_diff_with_options (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.9)/test-results-connect/pytest-1732176464.909650409-4396.xml [took 6m 37s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T09:04:14.756086155+00:00"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_options>
def test_dataframe_diff_with_options(self):
options = DiffOptions('d', 'l', 'r', 'i', 'c', 'r', 'n', None)
> diff = self.left_df.diff_with_options(self.right_df, options, 'id').orderBy('id').collect()
python/test/test_diff.py:507:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fb0d0519ca0>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T09:04:14.756086155+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T09:04:14.756086155+00:00"}"
E >
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs with error: test_dataframe_diff_with_options (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1732176455.897051458-4590.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7f720e7b0e20>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs failed: test_dataframe_diff_with_options_and_ignored (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.9)/test-results-connect/pytest-1732176464.909650409-4396.xml [took 4m 51s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T09:09:06.046613236+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_options_and_ignored>
def test_dataframe_diff_with_options_and_ignored(self):
options = DiffOptions('d', 'l', 'r', 'i', 'c', 'r', 'n', None)
> diff = self.left_df.diff_with_options(self.right_df, options, ['id'], ['label']).orderBy('id').collect()
python/test/test_diff.py:512:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fb0d0519ca0>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T09:09:06.046613236+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs with error: test_dataframe_diff_with_options_and_ignored (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1732176455.897051458-4590.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7f720e7b0e20>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs failed: test_dataframe_diff_with_sparse_mode (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.9)/test-results-connect/pytest-1732176464.909650409-4396.xml [took 5m 26s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T09:14:32.124690962+00:00"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diff_with_sparse_mode>
def test_dataframe_diff_with_sparse_mode(self):
options = DiffOptions().with_sparse_mode(True)
> diff = self.left_df.diff_with_options(self.right_df, options, 'id').orderBy('id').collect()
python/test/test_diff.py:542:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:746: in diff_with_options
return Differ(options).diff(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:340: in diff
return self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fb0d0519ca0>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ... to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T09:14:32.124690962+00:00"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown", grpc_status:14, created_time:"2024-11-21T09:14:32.124690962+00:00"}"
E >
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException
Check failure on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs with error: test_dataframe_diff_with_sparse_mode (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.5.3 Scala 2.12.18 Python 3.8)/test-results-connect/pytest-1732176455.897051458-4590.xml [took 0s]
Raw output
failed on setup with "pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>"
cls = <class 'test.test_diff.DiffTest'>
@classmethod
def setUpClass(cls):
super(DiffTest, cls).setUpClass()
value_row = Row('id', 'val', 'label')
> cls.left_df = cls.spark.createDataFrame([
value_row(1, 1.0, 'one'),
value_row(2, 2.0, 'two'),
value_row(3, 3.0, 'three'),
value_row(4, None, None),
value_row(5, 5.0, 'five'),
value_row(7, 7.0, 'seven'),
])
python/test/test_diff.py:36:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:500: in createDataFrame
_schema = self._inferSchemaFromList(_data, _cols)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/session.py:316: in _inferSchemaFromList
) = self._client.get_configs(
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1337: in get_configs
configs = dict(self.config(op).pairs)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1378: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1502: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.core.SparkConnectClient object at 0x7f720e7b0e20>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T08:38:13.490575711+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.8.18/x64/lib/python3.8/site-packages/pyspark/sql/connect/client/core.py:1542: SparkConnectGrpcException
Check warning on line 0 in test.test_diff.DiffTest
github-actions / Test Results
1 out of 28 runs failed: test_dataframe_diffwith (test.test_diff.DiffTest)
artifacts/Python Test Results (Spark 3.4.4 Scala 2.12.17 Python 3.9)/test-results-connect/pytest-1732176464.909650409-4396.xml [took 4m 59s]
Raw output
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T09:19:31.657721882+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
self = <test.test_diff.DiffTest testMethod=test_dataframe_diffwith>
def test_dataframe_diffwith(self):
> diff = self.left_df.diffwith(self.right_df, 'id').orderBy('id').collect()
python/test/test_diff.py:492:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
python/gresearch/spark/diff/__init__.py:717: in diffwith
return Differ().diffwith(self, other, *id_or_ignore_columns)
python/gresearch/spark/diff/__init__.py:376: in diffwith
diff = self._do_diff(left, right, id_columns, ignore_columns)
python/gresearch/spark/diff/__init__.py:508: in _do_diff
case_sensitive = left.session().conf.get("spark.sql.caseSensitive") == "true"
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/conf.py:61: in get
result = self._client.config(operation)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1039: in config
self._handle_error(error)
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1055: in _handle_error
self._handle_rpc_error(error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fb0d0519ca0>
rpc_error = <_InactiveRpcError of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all ...resses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
>
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
raise convert_exception(info, status.message) from None
raise SparkConnectGrpcException(status.message) from None
else:
> raise SparkConnectGrpcException(str(rpc_error)) from None
E pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_InactiveRpcError of RPC that terminated with:
E status = StatusCode.UNAVAILABLE
E details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"
E debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-11-21T09:19:31.657721882+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:15002: Failed to connect to remote host: Timeout occurred: FD Shutdown"}"
E >
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/pyspark/sql/connect/client.py:1095: SparkConnectGrpcException