airbytehq · tolik0 · Dec 3, 2024 · Dec 3, 2024 · Dec 4, 2024 · Dec 4, 2024
diff --git a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py
@@ -20,6 +20,9 @@
     ClientSideIncrementalRecordFilterDecorator,
 )
 from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
+from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
+    PerPartitionWithGlobalCursor,
+)
 from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
 from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
@@ -300,6 +303,60 @@ def _group_streams(
                             cursor=final_state_cursor,
                         )
                     )
+                elif (
+                    incremental_sync_component_definition
+                    and incremental_sync_component_definition.get("type", "")
+                    == DatetimeBasedCursorModel.__name__
+                    and self._stream_supports_concurrent_partition_processing(
+                        declarative_stream=declarative_stream
+                    )
+                    and hasattr(declarative_stream.retriever, "stream_slicer")
+                    and isinstance(
+                        declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
+                    )
+                ):
+                    stream_state = state_manager.get_stream_state(
+                        stream_name=declarative_stream.name, namespace=declarative_stream.namespace
+                    )
+                    partition_router = declarative_stream.retriever.stream_slicer._partition_router
+
+                    cursor = self._constructor.create_concurrent_cursor_from_perpartition_cursor(
+                        state_manager=state_manager,
+                        model_type=DatetimeBasedCursorModel,
+                        component_definition=incremental_sync_component_definition,
+                        stream_name=declarative_stream.name,
+                        stream_namespace=declarative_stream.namespace,
+                        config=config or {},
+                        stream_state=stream_state,
+                        partition_router=partition_router,
+                    )
+
+                    partition_generator = StreamSlicerPartitionGenerator(
+                        DeclarativePartitionFactory(
+                            declarative_stream.name,
+                            declarative_stream.get_json_schema(),
+                            self._retriever_factory(
+                                name_to_stream_mapping[declarative_stream.name],
+                                config,
+                                stream_state,
+                            ),
+                            self.message_repository,
+                        ),
+                        cursor,
+                    )
+
+                    concurrent_streams.append(
+                        DefaultStream(
+                            partition_generator=partition_generator,
+                            name=declarative_stream.name,
+                            json_schema=declarative_stream.get_json_schema(),
+                            availability_strategy=AlwaysAvailableAvailabilityStrategy(),
+                            primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
+                            cursor_field=cursor.cursor_field.cursor_field_key,
+                            logger=self.logger,
+                            cursor=cursor,
+                        )
+                    )
-                elif (
-                    incremental_sync_component_definition
-                    and incremental_sync_component_definition.get("type", "")
-                    == DatetimeBasedCursorModel.__name__
-                    and self._stream_supports_concurrent_partition_processing(
-                        declarative_stream=declarative_stream
-                    )
-                    and hasattr(declarative_stream.retriever, "stream_slicer")
-                    and isinstance(
-                        declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
-                    )
-                ):
-                    stream_state = state_manager.get_stream_state(
-                        stream_name=declarative_stream.name, namespace=declarative_stream.namespace
-                    )
-                    partition_router = declarative_stream.retriever.stream_slicer._partition_router
-
-                    cursor = self._constructor.create_concurrent_cursor_from_perpartition_cursor(
-                        state_manager=state_manager,
-                        model_type=DatetimeBasedCursorModel,
-                        component_definition=incremental_sync_component_definition,
-                        stream_name=declarative_stream.name,
-                        stream_namespace=declarative_stream.namespace,
-                        config=config or {},
-                        stream_state=stream_state,
-                        partition_router=partition_router,
-                    )
-
-                    partition_generator = StreamSlicerPartitionGenerator(
-                        DeclarativePartitionFactory(
-                            declarative_stream.name,
-                            declarative_stream.get_json_schema(),
-                            self._retriever_factory(
-                                name_to_stream_mapping[declarative_stream.name],
-                                config,
-                                stream_state,
-                            ),
-                            self.message_repository,
-                        ),
-                        cursor,
-                    )
-
-                    concurrent_streams.append(
-                        DefaultStream(
-                            partition_generator=partition_generator,
-                            name=declarative_stream.name,
-                            json_schema=declarative_stream.get_json_schema(),
-                            availability_strategy=AlwaysAvailableAvailabilityStrategy(),
-                            primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
-                            cursor_field=cursor.cursor_field.cursor_field_key,
-                            logger=self.logger,
-                            cursor=cursor,
-                        )
-                    )
+                elif (
+                    incremental_sync_component_definition
+                    and incremental_sync_component_definition.get("type", "")
+                    == DatetimeBasedCursorModel.__name__
+                    and self._stream_supports_concurrent_partition_processing(
+                        declarative_stream=declarative_stream
+                    )
+                    and hasattr(declarative_stream.retriever, "stream_slicer")
+                    and isinstance(
+                        declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
+                    )
+                ):
+                    stream_state = state_manager.get_stream_state(
+                        stream_name=declarative_stream.name, namespace=declarative_stream.namespace
+                    )
+                    try:
+                        partition_router = declarative_stream.retriever.stream_slicer._partition_router
+                    except AttributeError as e:
+                        raise ValueError(
+                            f"Failed to access partition router for stream {declarative_stream.name}: {str(e)}"
+                        ) from e
+
+                    cursor = self._constructor.create_concurrent_cursor_from_perpartition_cursor(
+                        state_manager=state_manager,
+                        model_type=DatetimeBasedCursorModel,
+                        component_definition=incremental_sync_component_definition,
+                        stream_name=declarative_stream.name,
+                        stream_namespace=declarative_stream.namespace,
+                        config=config or {},
+                        stream_state=stream_state,
+                        partition_router=partition_router,
+                    )
+
+                    partition_generator = StreamSlicerPartitionGenerator(
+                        DeclarativePartitionFactory(
+                            declarative_stream.name,
+                            declarative_stream.get_json_schema(),
+                            self._retriever_factory(
+                                name_to_stream_mapping[declarative_stream.name],
+                                config,
+                                stream_state,
+                            ),
+                            self.message_repository,
+                        ),
+                        cursor,
+                    )
+
+                    concurrent_streams.append(
+                        DefaultStream(
+                            partition_generator=partition_generator,
+                            name=declarative_stream.name,
+                            json_schema=declarative_stream.get_json_schema(),
+                            availability_strategy=AlwaysAvailableAvailabilityStrategy(),
+                            primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
+                            cursor_field=cursor.cursor_field.cursor_field_key,
+                            logger=self.logger,
+                            cursor=cursor,
+                        )
+                    )
-                elif (
-                    incremental_sync_component_definition
-                    and incremental_sync_component_definition.get("type", "")
-                    == DatetimeBasedCursorModel.__name__
-                    and self._stream_supports_concurrent_partition_processing(
-                        declarative_stream=declarative_stream
-                    )
-                    and hasattr(declarative_stream.retriever, "stream_slicer")
-                    and isinstance(
-                        declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
-                    )
-                ):
-                    stream_state = state_manager.get_stream_state(
-                        stream_name=declarative_stream.name, namespace=declarative_stream.namespace
-                    )
-                    partition_router = declarative_stream.retriever.stream_slicer._partition_router
-
-                    cursor = self._constructor.create_concurrent_cursor_from_perpartition_cursor(
-                        state_manager=state_manager,
-                        model_type=DatetimeBasedCursorModel,
-                        component_definition=incremental_sync_component_definition,
-                        stream_name=declarative_stream.name,
-                        stream_namespace=declarative_stream.namespace,
-                        config=config or {},
-                        stream_state=stream_state,
-                        partition_router=partition_router,
-                    )
-
-                    partition_generator = StreamSlicerPartitionGenerator(
-                        DeclarativePartitionFactory(
-                            declarative_stream.name,
-                            declarative_stream.get_json_schema(),
-                            self._retriever_factory(
-                                name_to_stream_mapping[declarative_stream.name],
-                                config,
-                                stream_state,
-                            ),
-                            self.message_repository,
-                        ),
-                        cursor,
-                    )
-
-                    concurrent_streams.append(
-                        DefaultStream(
-                            partition_generator=partition_generator,
-                            name=declarative_stream.name,
-                            json_schema=declarative_stream.get_json_schema(),
-                            availability_strategy=AlwaysAvailableAvailabilityStrategy(),
-                            primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
-                            cursor_field=cursor.cursor_field.cursor_field_key,
-                            logger=self.logger,
-                            cursor=cursor,
-                        )
-                    )
+                elif (
+                    incremental_sync_component_definition
+                    and incremental_sync_component_definition.get("type", "")
+                    == DatetimeBasedCursorModel.__name__
+                    and self._stream_supports_concurrent_partition_processing(
+                        declarative_stream=declarative_stream
+                    )
+                    and hasattr(declarative_stream.retriever, "stream_slicer")
+                    and isinstance(
+                        declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
+                    )
+                ):
+                    stream_state = state_manager.get_stream_state(
+                        stream_name=declarative_stream.name, namespace=declarative_stream.namespace
+                    )
+                    try:
+                        partition_router = declarative_stream.retriever.stream_slicer._partition_router
+                    except AttributeError as e:
+                        raise ValueError(
+                            f"Failed to access partition router for stream {declarative_stream.name}: {str(e)}"
+                        ) from e
+
+                    cursor = self._constructor.create_concurrent_cursor_from_perpartition_cursor(
+                        state_manager=state_manager,
+                        model_type=DatetimeBasedCursorModel,
+                        component_definition=incremental_sync_component_definition,
+                        stream_name=declarative_stream.name,
+                        stream_namespace=declarative_stream.namespace,
+                        config=config or {},
+                        stream_state=stream_state,
+                        partition_router=partition_router,
+                    )
+
+                    partition_generator = StreamSlicerPartitionGenerator(
+                        DeclarativePartitionFactory(
+                            declarative_stream.name,
+                            declarative_stream.get_json_schema(),
+                            self._retriever_factory(
+                                name_to_stream_mapping[declarative_stream.name],
+                                config,
+                                stream_state,
+                            ),
+                            self.message_repository,
+                        ),
+                        cursor,
+                    )
+
+                    concurrent_streams.append(
+                        DefaultStream(
+                            partition_generator=partition_generator,
+                            name=declarative_stream.name,
+                            json_schema=declarative_stream.get_json_schema(),
+                            availability_strategy=AlwaysAvailableAvailabilityStrategy(),
+                            primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
+                            cursor_field=cursor.cursor_field.cursor_field_key,
+                            logger=self.logger,
+                            cursor=cursor,
+                        )
+                    )
                 else:
                     synchronous_streams.append(declarative_stream)
             else:

diff --git a/airbyte_cdk/sources/declarative/extractors/record_filter.py b/airbyte_cdk/sources/declarative/extractors/record_filter.py
@@ -59,13 +59,11 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
 
     def __init__(
         self,
-        date_time_based_cursor: DatetimeBasedCursor,
-        substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
+        cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor],
         **kwargs: Any,
     ):
         super().__init__(**kwargs)
-        self._date_time_based_cursor = date_time_based_cursor
-        self._substream_cursor = substream_cursor
+        self._cursor = cursor
 
     def filter_records(
         self,
@@ -77,7 +75,7 @@ def filter_records(
         records = (
             record
             for record in records
-            if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
+            if self._cursor.should_be_synced(
                 # Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
                 # Record stream name is empty cause it is not used durig the filtering
                 Record(data=record, associated_slice=stream_slice, stream_name="")

diff --git a/airbyte_cdk/sources/declarative/incremental/__init__.py b/airbyte_cdk/sources/declarative/incremental/__init__.py
@@ -2,6 +2,7 @@
 # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
 #
 
+from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import ConcurrentCursorFactory, ConcurrentPerPartitionCursor
 from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
 from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
 from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import GlobalSubstreamCursor
@@ -14,6 +15,8 @@
 
 __all__ = [
     "CursorFactory",
+    "ConcurrentCursorFactory"
+    "ConcurrentPerPartitionCursor",
     "DatetimeBasedCursor",
     "DeclarativeCursor",
     "GlobalSubstreamCursor",