Skip to content

Commit

Permalink
fix: fallback to json if orjson cannot serialize value (#210)
Browse files Browse the repository at this point in the history
Signed-off-by: Artem Inzhyyants <artem.inzhyyants@gmail.com>
Co-authored-by: maxi297 <maxime@airbyte.io>
  • Loading branch information
artem1205 and maxi297 authored Jan 9, 2025
1 parent 3c76ef3 commit cc8ec47
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 1 deletion.
14 changes: 13 additions & 1 deletion airbyte_cdk/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import argparse
import importlib
import ipaddress
import json
import logging
import os.path
import socket
Expand Down Expand Up @@ -46,6 +47,7 @@

VALID_URL_SCHEMES = ["https"]
CLOUD_DEPLOYMENT_MODE = "cloud"
_HAS_LOGGED_FOR_SERIALIZATION_ERROR = False


class AirbyteEntrypoint(object):
Expand Down Expand Up @@ -291,7 +293,17 @@ def set_up_secret_filter(config: TConfig, connection_specification: Mapping[str,

@staticmethod
def airbyte_message_to_string(airbyte_message: AirbyteMessage) -> str:
return orjson.dumps(AirbyteMessageSerializer.dump(airbyte_message)).decode()
global _HAS_LOGGED_FOR_SERIALIZATION_ERROR
serialized_message = AirbyteMessageSerializer.dump(airbyte_message)
try:
return orjson.dumps(serialized_message).decode()
except Exception as exception:
if not _HAS_LOGGED_FOR_SERIALIZATION_ERROR:
logger.warning(
f"There was an error during the serialization of an AirbyteMessage: `{exception}`. This might impact the sync performances."
)
_HAS_LOGGED_FOR_SERIALIZATION_ERROR = True
return json.dumps(serialized_message)

@classmethod
def extract_state(cls, args: List[str]) -> Optional[Any]:
Expand Down
23 changes: 23 additions & 0 deletions unit_tests/test_entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,3 +768,26 @@ def test_handle_record_counts(
assert isinstance(
actual_message.state.sourceStats.recordCount, float
), "recordCount value should be expressed as a float"


def test_given_serialization_error_using_orjson_then_fallback_on_json(
entrypoint: AirbyteEntrypoint, mocker, spec_mock, config_mock
):
parsed_args = Namespace(
command="read", config="config_path", state="statepath", catalog="catalogpath"
)
record = AirbyteMessage(
record=AirbyteRecordMessage(
stream="stream", data={"data": 7046723166326052303072}, emitted_at=1
),
type=Type.RECORD,
)
mocker.patch.object(MockSource, "read_state", return_value={})
mocker.patch.object(MockSource, "read_catalog", return_value={})
mocker.patch.object(MockSource, "read", return_value=[record, record])

messages = list(entrypoint.run(parsed_args))

# There will be multiple messages here because the fixture `entrypoint` sets a control message. We only care about records here
record_messages = list(filter(lambda message: "RECORD" in message, messages))
assert len(record_messages) == 2

0 comments on commit cc8ec47

Please sign in to comment.