From 6c8605a49203b58c9de760ab8e343a3fe59c6a83 Mon Sep 17 00:00:00 2001 From: Giuseppe Lillo Date: Fri, 9 Jan 2026 16:20:39 +0100 Subject: [PATCH 1/7] poc --- .../bin/test/log4j2.yaml | 38 + ...rg.apache.kafka.connect.sink.SinkConnector | 16 + ...pache.kafka.connect.source.SourceConnector | 16 + connect/file/bin/test/log4j2.yaml | 35 + ...pache.kafka.connect.source.SourceConnector | 18 + connect/mirror/bin/test/log4j2.yaml | 41 + ...policy.ConnectorClientConfigOverridePolicy | 18 + ...org.apache.kafka.connect.storage.Converter | 22 + ...ache.kafka.connect.storage.HeaderConverter | 22 + ...he.kafka.connect.rest.ConnectRestExtension | 16 + ...rg.apache.kafka.connect.sink.SinkConnector | 22 + ...pache.kafka.connect.source.SourceConnector | 26 + ...org.apache.kafka.connect.storage.Converter | 22 + ...ache.kafka.connect.storage.HeaderConverter | 22 + ...he.kafka.connect.transforms.Transformation | 23 + ...ka.connect.transforms.predicates.Predicate | 17 + connect/runtime/bin/test/log4j2.yaml | 48 + ...org.apache.kafka.connect.storage.Converter | 16 + ...org.apache.kafka.connect.storage.Converter | 16 + ...policy.ConnectorClientConfigOverridePolicy | 16 + ...he.kafka.connect.rest.ConnectRestExtension | 16 + ...rg.apache.kafka.connect.sink.SinkConnector | 22 + ...org.apache.kafka.connect.storage.Converter | 20 + ...ache.kafka.connect.storage.HeaderConverter | 17 + ...org.apache.kafka.connect.storage.Converter | 17 + ...org.apache.kafka.connect.storage.Converter | 17 + ...policy.ConnectorClientConfigOverridePolicy | 16 + ...org.apache.kafka.connect.storage.Converter | 16 + .../read-version-from-resource-v1/version | 16 + ...org.apache.kafka.connect.storage.Converter | 16 + .../read-version-from-resource-v2/version | 16 + ...afka.common.config.provider.ConfigProvider | 16 + ...org.apache.kafka.connect.storage.Converter | 16 + ...rg.apache.kafka.connect.sink.SinkConnector | 16 + ...org.apache.kafka.connect.storage.Converter | 16 + ...ache.kafka.connect.storage.HeaderConverter | 16 + ...org.apache.kafka.connect.storage.Converter | 16 + .../services/test.plugins.ServiceLoadedClass | 16 + ...policy.ConnectorClientConfigOverridePolicy | 16 + ...org.apache.kafka.connect.storage.Converter | 16 + ...org.apache.kafka.connect.storage.Converter | 16 + ...ache.kafka.connect.storage.HeaderConverter | 16 + ...ka.connect.transforms.predicates.Predicate | 16 + ...rg.apache.kafka.connect.sink.SinkConnector | 16 + ...pache.kafka.connect.source.SourceConnector | 16 + ...he.kafka.connect.transforms.Transformation | 16 + ...rg.apache.kafka.connect.sink.SinkConnector | 17 + ...pache.kafka.connect.source.SourceConnector | 18 + ...he.kafka.connect.transforms.Transformation | 41 + ...ka.connect.transforms.predicates.Predicate | 18 + .../scala/kafka/server/ConfigHandler.scala | 44 + .../scala/kafka/server/ReplicaManager.scala | 96 +- .../ClassicToDisklessMigrationTest.java | 255 ++++ .../inkless/docker-compose.yml | 6 +- server-common/bin/test/log4j2.yaml | 35 + .../main/common/message/ShareSnapshotKey.json | 31 + .../common/message/ShareSnapshotValue.json | 48 + .../main/common/message/ShareUpdateKey.json | 31 + .../main/common/message/ShareUpdateValue.json | 42 + shell/bin/test/log4j2.yaml | 32 + .../bin/main/message/ProducerSnapshot.json | 85 ++ .../RemoteLogSegmentMetadataRecord.json | 142 +++ ...emoteLogSegmentMetadataSnapshotRecord.json | 108 ++ .../RemoteLogSegmentMetadataUpdateRecord.json | 90 ++ .../RemotePartitionDeleteMetadataRecord.json | 68 ++ storage/bin/test/log4j2.yaml | 57 + .../org/apache/kafka/tiered/storage/README.md | 11 + .../V10__Covering_index_on_batches.sql | 9 + .../V11__Add_diskless_offsets_to_logs.sql | 3 + .../main/db/migration/V1__Create_tables.sql | 1035 +++++++++++++++++ ...eed_up_delete_topic_and_delete_records.sql | 121 ++ .../main/db/migration/V3__Add_logs_size.sql | 300 +++++ .../migration/V4__Retention_enforcement.sql | 133 +++ .../V5__Fix_deadlock_in_delete_records_v1.sql | 90 ++ ...e_understandability_of_delete_topic_v1.sql | 143 +++ .../migration/V7__Find_batches_function.sql | 114 ++ .../V8__Find_batches_function_with_limit.sql | 88 ++ .../V9__Retention_enforcement_with_limits.sql | 132 +++ .../inkless/bin/main/message/CacheKey.json | 52 + .../inkless/bin/main/message/FileExtent.json | 58 + .../net.jqwik.api.providers.ArbitraryProvider | 6 + storage/inkless/bin/test/log4j.properties | 7 + .../bin/test/test_gcs_credentials.json | 6 + .../inkless/control_plane/ControlPlane.java | 4 + .../control_plane/GetDisklessLogRequest.java | 24 + .../control_plane/GetDisklessLogResponse.java | 42 + .../control_plane/InMemoryControlPlane.java | 42 + .../InitLogDisklessStartOffsetRequest.java | 27 + .../postgres/GetDisklessLogJob.java | 114 ++ .../InitLogDisklessStartOffsetJob.java | 79 ++ .../postgres/PostgresControlPlane.java | 23 + .../postgres/PostgresControlPlaneMetrics.java | 12 + .../TopicsAndPartitionsCreateJob.java | 5 +- .../V11__Add_diskless_offsets_to_logs.sql | 3 + .../postgres/CommitFileJobTest.java | 48 +- .../postgres/DeleteRecordsJobTest.java | 8 +- .../postgres/DeleteTopicJobTest.java | 2 +- .../InitLogDisklessStartOffsetJobTest.java | 164 +++ .../TopicsAndPartitionsCreateJobTest.java | 26 +- .../storage/internals/log/LogConfig.java | 14 +- .../integration-tests/bin/test/log4j2.yaml | 65 ++ ...inScalaIntegrationTestImplicitSerdes.scala | 175 +++ .../streams/integration/WordCountTest.scala | 250 ++++ ...mToTableJoinScalaIntegrationTestBase.scala | 144 +++ .../utils/StreamToTableJoinTestData.scala | 60 + .../scala/FunctionsCompatConversions.scala | 141 +++ .../streams/scala/ImplicitConversions.scala | 107 ++ .../kafka/streams/scala/StreamsBuilder.scala | 195 ++++ .../streams/scala/kstream/Branched.scala | 67 ++ .../scala/kstream/BranchedKStream.scala | 118 ++ .../scala/kstream/CogroupedKStream.scala | 113 ++ .../streams/scala/kstream/Consumed.scala | 115 ++ .../kafka/streams/scala/kstream/Grouped.scala | 51 + .../kafka/streams/scala/kstream/Joined.scala | 66 ++ .../scala/kstream/KGroupedStream.scala | 190 +++ .../streams/scala/kstream/KGroupedTable.scala | 145 +++ .../kafka/streams/scala/kstream/KStream.scala | 877 ++++++++++++++ .../kafka/streams/scala/kstream/KTable.scala | 806 +++++++++++++ .../streams/scala/kstream/Materialized.scala | 114 ++ .../streams/scala/kstream/Produced.scala | 60 + .../streams/scala/kstream/Repartitioned.scala | 87 ++ .../SessionWindowedCogroupedKStream.scala | 64 + .../kstream/SessionWindowedKStream.scala | 148 +++ .../streams/scala/kstream/StreamJoined.scala | 91 ++ .../TimeWindowedCogroupedKStream.scala | 62 + .../scala/kstream/TimeWindowedKStream.scala | 142 +++ .../kafka/streams/scala/kstream/package.scala | 31 + .../apache/kafka/streams/scala/package.scala | 26 + .../streams/scala/serialization/Serdes.scala | 89 ++ streams/streams-scala/bin/test/log4j2.yaml | 32 + .../kafka/streams/scala/TopologyTest.scala | 470 ++++++++ .../streams/scala/kstream/ConsumedTest.scala | 74 ++ .../streams/scala/kstream/GroupedTest.scala | 46 + .../streams/scala/kstream/JoinedTest.scala | 44 + .../scala/kstream/KStreamSplitTest.scala | 125 ++ .../streams/scala/kstream/KStreamTest.scala | 419 +++++++ .../streams/scala/kstream/KTableTest.scala | 617 ++++++++++ .../scala/kstream/MaterializedTest.scala | 88 ++ .../streams/scala/kstream/ProducedTest.scala | 61 + .../scala/kstream/RepartitionedTest.scala | 111 ++ .../scala/kstream/StreamJoinedTest.scala | 85 ++ .../streams/scala/utils/TestDriver.scala | 41 + streams/test-utils/bin/test/log4j2.yaml | 35 + .../apache/kafka/common/test/api/README.md | 192 +++ .../bin/test/log4j2.yaml | 35 + .../org.junit.jupiter.api.extension.Extension | 16 + .../test-common-runtime/bin/main/log4j2.yaml | 35 + .../common/message/TransactionLogKey.json | 26 + .../common/message/TransactionLogValue.json | 53 + 149 files changed, 12250 insertions(+), 62 deletions(-) create mode 100644 clients/clients-integration-tests/bin/test/log4j2.yaml create mode 100644 connect/file/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector create mode 100644 connect/file/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector create mode 100644 connect/file/bin/test/log4j2.yaml create mode 100644 connect/mirror/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector create mode 100644 connect/mirror/bin/test/log4j2.yaml create mode 100644 connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy create mode 100644 connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.Converter create mode 100644 connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter create mode 100644 connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension create mode 100644 connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.sink.SinkConnector create mode 100644 connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.source.SourceConnector create mode 100644 connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.Converter create mode 100644 connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter create mode 100644 connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.Transformation create mode 100644 connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate create mode 100644 connect/runtime/bin/test/log4j2.yaml create mode 100644 connect/runtime/bin/test/test-plugins/aliased-static-field/META-INF/services/org.apache.kafka.connect.storage.Converter create mode 100644 connect/runtime/bin/test/test-plugins/always-throw-exception/META-INF/services/org.apache.kafka.connect.storage.Converter create mode 100644 connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy create mode 100644 connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension create mode 100644 connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.sink.SinkConnector create mode 100644 connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.Converter create mode 100644 connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter create mode 100644 connect/runtime/bin/test/test-plugins/classpath-converter/META-INF/services/org.apache.kafka.connect.storage.Converter create mode 100644 connect/runtime/bin/test/test-plugins/multiple-plugins-in-jar/META-INF/services/org.apache.kafka.connect.storage.Converter create mode 100644 connect/runtime/bin/test/test-plugins/non-migrated/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy create mode 100644 connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/META-INF/services/org.apache.kafka.connect.storage.Converter create mode 100644 connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/version create mode 100644 connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/META-INF/services/org.apache.kafka.connect.storage.Converter create mode 100644 connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/version create mode 100644 connect/runtime/bin/test/test-plugins/sampling-config-provider/META-INF/services/org.apache.kafka.common.config.provider.ConfigProvider create mode 100644 connect/runtime/bin/test/test-plugins/sampling-configurable/META-INF/services/org.apache.kafka.connect.storage.Converter create mode 100644 connect/runtime/bin/test/test-plugins/sampling-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector create mode 100644 connect/runtime/bin/test/test-plugins/sampling-converter/META-INF/services/org.apache.kafka.connect.storage.Converter create mode 100644 connect/runtime/bin/test/test-plugins/sampling-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter create mode 100644 connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/org.apache.kafka.connect.storage.Converter create mode 100644 connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/test.plugins.ServiceLoadedClass create mode 100644 connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy create mode 100644 connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.storage.Converter create mode 100644 connect/runtime/bin/test/test-plugins/versioned-converter/META-INF/services/org.apache.kafka.connect.storage.Converter create mode 100644 connect/runtime/bin/test/test-plugins/versioned-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter create mode 100644 connect/runtime/bin/test/test-plugins/versioned-predicate/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate create mode 100644 connect/runtime/bin/test/test-plugins/versioned-sink-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector create mode 100644 connect/runtime/bin/test/test-plugins/versioned-source-connector/META-INF/services/org.apache.kafka.connect.source.SourceConnector create mode 100644 connect/runtime/bin/test/test-plugins/versioned-transformation/META-INF/services/org.apache.kafka.connect.transforms.Transformation create mode 100644 connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector create mode 100644 connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector create mode 100644 connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.Transformation create mode 100644 connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate create mode 100644 core/src/test/java/kafka/server/ClassicToDisklessMigrationTest.java create mode 100644 server-common/bin/test/log4j2.yaml create mode 100644 share-coordinator/bin/main/common/message/ShareSnapshotKey.json create mode 100644 share-coordinator/bin/main/common/message/ShareSnapshotValue.json create mode 100644 share-coordinator/bin/main/common/message/ShareUpdateKey.json create mode 100644 share-coordinator/bin/main/common/message/ShareUpdateValue.json create mode 100644 shell/bin/test/log4j2.yaml create mode 100644 storage/bin/main/message/ProducerSnapshot.json create mode 100644 storage/bin/main/message/RemoteLogSegmentMetadataRecord.json create mode 100644 storage/bin/main/message/RemoteLogSegmentMetadataSnapshotRecord.json create mode 100644 storage/bin/main/message/RemoteLogSegmentMetadataUpdateRecord.json create mode 100644 storage/bin/main/message/RemotePartitionDeleteMetadataRecord.json create mode 100644 storage/bin/test/log4j2.yaml create mode 100644 storage/bin/test/org/apache/kafka/tiered/storage/README.md create mode 100644 storage/inkless/bin/main/db/migration/V10__Covering_index_on_batches.sql create mode 100644 storage/inkless/bin/main/db/migration/V11__Add_diskless_offsets_to_logs.sql create mode 100644 storage/inkless/bin/main/db/migration/V1__Create_tables.sql create mode 100644 storage/inkless/bin/main/db/migration/V2__Speed_up_delete_topic_and_delete_records.sql create mode 100644 storage/inkless/bin/main/db/migration/V3__Add_logs_size.sql create mode 100644 storage/inkless/bin/main/db/migration/V4__Retention_enforcement.sql create mode 100644 storage/inkless/bin/main/db/migration/V5__Fix_deadlock_in_delete_records_v1.sql create mode 100644 storage/inkless/bin/main/db/migration/V6__Improve_understandability_of_delete_topic_v1.sql create mode 100644 storage/inkless/bin/main/db/migration/V7__Find_batches_function.sql create mode 100644 storage/inkless/bin/main/db/migration/V8__Find_batches_function_with_limit.sql create mode 100644 storage/inkless/bin/main/db/migration/V9__Retention_enforcement_with_limits.sql create mode 100644 storage/inkless/bin/main/message/CacheKey.json create mode 100644 storage/inkless/bin/main/message/FileExtent.json create mode 100644 storage/inkless/bin/test/META-INF/services/net.jqwik.api.providers.ArbitraryProvider create mode 100644 storage/inkless/bin/test/log4j.properties create mode 100644 storage/inkless/bin/test/test_gcs_credentials.json create mode 100644 storage/inkless/src/main/java/io/aiven/inkless/control_plane/GetDisklessLogRequest.java create mode 100644 storage/inkless/src/main/java/io/aiven/inkless/control_plane/GetDisklessLogResponse.java create mode 100644 storage/inkless/src/main/java/io/aiven/inkless/control_plane/InitLogDisklessStartOffsetRequest.java create mode 100644 storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/GetDisklessLogJob.java create mode 100644 storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/InitLogDisklessStartOffsetJob.java create mode 100644 storage/inkless/src/main/resources/db/migration/V11__Add_diskless_offsets_to_logs.sql create mode 100644 storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/InitLogDisklessStartOffsetJobTest.java create mode 100644 streams/integration-tests/bin/test/log4j2.yaml create mode 100644 streams/integration-tests/bin/test/org/apache/kafka/streams/integration/StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala create mode 100644 streams/integration-tests/bin/test/org/apache/kafka/streams/integration/WordCountTest.scala create mode 100644 streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinScalaIntegrationTestBase.scala create mode 100644 streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinTestData.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/ImplicitConversions.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/StreamsBuilder.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Branched.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/BranchedKStream.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/CogroupedKStream.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Consumed.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Grouped.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Joined.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedStream.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedTable.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KStream.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KTable.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Materialized.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Produced.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Repartitioned.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedCogroupedKStream.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedKStream.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/StreamJoined.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedCogroupedKStream.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedKStream.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/package.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/package.scala create mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/serialization/Serdes.scala create mode 100644 streams/streams-scala/bin/test/log4j2.yaml create mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/TopologyTest.scala create mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ConsumedTest.scala create mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/GroupedTest.scala create mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/JoinedTest.scala create mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamSplitTest.scala create mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamTest.scala create mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KTableTest.scala create mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/MaterializedTest.scala create mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ProducedTest.scala create mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/RepartitionedTest.scala create mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/StreamJoinedTest.scala create mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/TestDriver.scala create mode 100644 streams/test-utils/bin/test/log4j2.yaml create mode 100644 test-common/test-common-internal-api/bin/main/org/apache/kafka/common/test/api/README.md create mode 100644 test-common/test-common-internal-api/bin/test/log4j2.yaml create mode 100644 test-common/test-common-runtime/bin/main/META-INF/services/org.junit.jupiter.api.extension.Extension create mode 100644 test-common/test-common-runtime/bin/main/log4j2.yaml create mode 100644 transaction-coordinator/bin/main/common/message/TransactionLogKey.json create mode 100644 transaction-coordinator/bin/main/common/message/TransactionLogValue.json diff --git a/clients/clients-integration-tests/bin/test/log4j2.yaml b/clients/clients-integration-tests/bin/test/log4j2.yaml new file mode 100644 index 0000000000..9825ef7c7d --- /dev/null +++ b/clients/clients-integration-tests/bin/test/log4j2.yaml @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Configuration: + Properties: + Property: + - name: "logPattern" + value: "[%d] %p %m (%c:%L)%n" + + Appenders: + Console: + name: STDOUT + PatternLayout: + pattern: "${logPattern}" + + Loggers: + Root: + level: OFF + AppenderRef: + - ref: STDOUT + Logger: + - name: kafka + level: WARN + + - name: org.apache.kafka + level: WARN \ No newline at end of file diff --git a/connect/file/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector b/connect/file/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector new file mode 100644 index 0000000000..4acecd76b5 --- /dev/null +++ b/connect/file/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.file.FileStreamSinkConnector \ No newline at end of file diff --git a/connect/file/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector b/connect/file/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector new file mode 100644 index 0000000000..66a0c5d858 --- /dev/null +++ b/connect/file/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.file.FileStreamSourceConnector \ No newline at end of file diff --git a/connect/file/bin/test/log4j2.yaml b/connect/file/bin/test/log4j2.yaml new file mode 100644 index 0000000000..1e9f550fa6 --- /dev/null +++ b/connect/file/bin/test/log4j2.yaml @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Configuration: + Properties: + Property: + - name: "logPattern" + value: "[%d] %p %X{connector.context}%m (%c:%L)%n" + + Appenders: + Console: + name: STDOUT + PatternLayout: + pattern: "${logPattern}" + + Loggers: + Root: + level: INFO + AppenderRef: + - ref: STDOUT + Logger: + - name: kafka + level: WARN diff --git a/connect/mirror/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector b/connect/mirror/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector new file mode 100644 index 0000000000..4836e08f3e --- /dev/null +++ b/connect/mirror/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector @@ -0,0 +1,18 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.mirror.MirrorCheckpointConnector +org.apache.kafka.connect.mirror.MirrorHeartbeatConnector +org.apache.kafka.connect.mirror.MirrorSourceConnector \ No newline at end of file diff --git a/connect/mirror/bin/test/log4j2.yaml b/connect/mirror/bin/test/log4j2.yaml new file mode 100644 index 0000000000..b63606d0ba --- /dev/null +++ b/connect/mirror/bin/test/log4j2.yaml @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Configuration: + Properties: + Property: + - name: "logPattern" + value: "[%d] %p %X{connector.context}%m (%c:%L)%n" + + Appenders: + Console: + name: STDOUT + PatternLayout: + pattern: "${logPattern}" + + Loggers: + Root: + level: INFO + AppenderRef: + - ref: STDOUT + Logger: + - name: kafka + level: WARN + + - name: state.change.logger + level: "OFF" + + - name: org.apache.kafka.connect + level: DEBUG diff --git a/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy b/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy new file mode 100644 index 0000000000..8b76ce452b --- /dev/null +++ b/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy @@ -0,0 +1,18 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.connector.policy.AllConnectorClientConfigOverridePolicy +org.apache.kafka.connect.connector.policy.PrincipalConnectorClientConfigOverridePolicy +org.apache.kafka.connect.connector.policy.NoneConnectorClientConfigOverridePolicy \ No newline at end of file diff --git a/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.Converter new file mode 100644 index 0000000000..364a2276f1 --- /dev/null +++ b/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.Converter @@ -0,0 +1,22 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.converters.ByteArrayConverter +org.apache.kafka.connect.converters.DoubleConverter +org.apache.kafka.connect.converters.FloatConverter +org.apache.kafka.connect.converters.IntegerConverter +org.apache.kafka.connect.converters.LongConverter +org.apache.kafka.connect.converters.ShortConverter +org.apache.kafka.connect.converters.BooleanConverter diff --git a/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter b/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter new file mode 100644 index 0000000000..364a2276f1 --- /dev/null +++ b/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter @@ -0,0 +1,22 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.converters.ByteArrayConverter +org.apache.kafka.connect.converters.DoubleConverter +org.apache.kafka.connect.converters.FloatConverter +org.apache.kafka.connect.converters.IntegerConverter +org.apache.kafka.connect.converters.LongConverter +org.apache.kafka.connect.converters.ShortConverter +org.apache.kafka.connect.converters.BooleanConverter diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension new file mode 100644 index 0000000000..0a1ef88924 --- /dev/null +++ b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.runtime.isolation.PluginsTest$TestConnectRestExtension \ No newline at end of file diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.sink.SinkConnector b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.sink.SinkConnector new file mode 100644 index 0000000000..818d09e618 --- /dev/null +++ b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.sink.SinkConnector @@ -0,0 +1,22 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.integration.BlockingConnectorTest$BlockingSinkConnector +org.apache.kafka.connect.integration.BlockingConnectorTest$TaskInitializeBlockingSinkConnector +org.apache.kafka.connect.integration.ErrantRecordSinkConnector +org.apache.kafka.connect.integration.MonitorableSinkConnector +org.apache.kafka.connect.integration.TestableSinkConnector +org.apache.kafka.connect.runtime.SampleSinkConnector +org.apache.kafka.connect.integration.ConnectWorkerIntegrationTest$EmptyTaskConfigsConnector \ No newline at end of file diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.source.SourceConnector b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.source.SourceConnector new file mode 100644 index 0000000000..8ff259f887 --- /dev/null +++ b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.source.SourceConnector @@ -0,0 +1,26 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.integration.BlockingConnectorTest$BlockingConnector +org.apache.kafka.connect.integration.BlockingConnectorTest$InitializeBlockingConnector +org.apache.kafka.connect.integration.BlockingConnectorTest$ConfigBlockingConnector +org.apache.kafka.connect.integration.BlockingConnectorTest$ValidateBlockingConnector +org.apache.kafka.connect.integration.BlockingConnectorTest$BlockingSourceConnector +org.apache.kafka.connect.integration.BlockingConnectorTest$TaskInitializeBlockingSourceConnector +org.apache.kafka.connect.integration.ExactlyOnceSourceIntegrationTest$NaughtyConnector +org.apache.kafka.connect.integration.MonitorableSourceConnector +org.apache.kafka.connect.integration.TestableSourceConnector +org.apache.kafka.connect.runtime.SampleSourceConnector +org.apache.kafka.connect.runtime.rest.resources.ConnectorPluginsResourceTest$ConnectorPluginsResourceTestConnector diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.Converter new file mode 100644 index 0000000000..c58e40f243 --- /dev/null +++ b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.Converter @@ -0,0 +1,22 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.runtime.SampleConverterWithHeaders +org.apache.kafka.connect.runtime.ErrorHandlingTaskTest$FaultyConverter +org.apache.kafka.connect.runtime.isolation.PluginsTest$TestConverter +org.apache.kafka.connect.runtime.isolation.PluginsTest$TestInternalConverter +org.apache.kafka.connect.runtime.isolation.PluginUtilsTest$CollidingConverter +org.apache.kafka.connect.integration.ConnectorValidationIntegrationTest$TestConverterWithSinglePropertyConfigDef +org.apache.kafka.connect.integration.ConnectorValidationIntegrationTest$TestConverterWithNoConfigDef \ No newline at end of file diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter new file mode 100644 index 0000000000..b14690acaf --- /dev/null +++ b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter @@ -0,0 +1,22 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.runtime.SampleHeaderConverter +org.apache.kafka.connect.runtime.ErrorHandlingTaskTest$FaultyConverter +org.apache.kafka.connect.runtime.isolation.PluginsTest$TestHeaderConverter +org.apache.kafka.connect.runtime.isolation.PluginsTest$TestInternalConverter +org.apache.kafka.connect.runtime.isolation.PluginUtilsTest$CollidingHeaderConverter +org.apache.kafka.connect.integration.ConnectorValidationIntegrationTest$TestConverterWithSinglePropertyConfigDef +org.apache.kafka.connect.integration.ConnectorValidationIntegrationTest$TestConverterWithNoConfigDef \ No newline at end of file diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.Transformation b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.Transformation new file mode 100644 index 0000000000..6d36ee9088 --- /dev/null +++ b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.Transformation @@ -0,0 +1,23 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.integration.ErrorHandlingIntegrationTest$FaultyPassthrough +org.apache.kafka.connect.runtime.ErrorHandlingTaskTest$FaultyPassthrough +org.apache.kafka.connect.runtime.ConnectorConfigTest$SimpleTransformation +org.apache.kafka.connect.runtime.ConnectorConfigTest$HasDuplicateConfigTransformation +org.apache.kafka.connect.runtime.ConnectorConfigTest$AbstractKeyValueTransformation$Key +org.apache.kafka.connect.runtime.ConnectorConfigTest$AbstractKeyValueTransformation$Value +org.apache.kafka.connect.runtime.SampleTransformation +org.apache.kafka.connect.runtime.isolation.PluginUtilsTest$Colliding diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate new file mode 100644 index 0000000000..b235b1fec5 --- /dev/null +++ b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate @@ -0,0 +1,17 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.runtime.ConnectorConfigTest$TestPredicate +org.apache.kafka.connect.runtime.SamplePredicate \ No newline at end of file diff --git a/connect/runtime/bin/test/log4j2.yaml b/connect/runtime/bin/test/log4j2.yaml new file mode 100644 index 0000000000..45faa63537 --- /dev/null +++ b/connect/runtime/bin/test/log4j2.yaml @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Configuration: + Properties: + Property: + - name: "logPattern" + value: "[%d] %p %X{connector.context}%m (%c:%L)%n" + + Appenders: + Console: + name: STDOUT + PatternLayout: + pattern: "${logPattern}" + + Loggers: + Root: + level: INFO + AppenderRef: + - ref: STDOUT + Logger: + - name: kafka + level: WARN + + - name: state.change.logger + level: "OFF" + + - name: org.apache.kafka.connect + level: DEBUG + + # Troubleshooting KAFKA-17493. + - name: org.apache.kafka.consumer + level: DEBUG + + - name: org.apache.kafka.coordinator.group + level: DEBUG diff --git a/connect/runtime/bin/test/test-plugins/aliased-static-field/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/aliased-static-field/META-INF/services/org.apache.kafka.connect.storage.Converter new file mode 100644 index 0000000000..1cdad430ed --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/aliased-static-field/META-INF/services/org.apache.kafka.connect.storage.Converter @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.AliasedStaticField diff --git a/connect/runtime/bin/test/test-plugins/always-throw-exception/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/always-throw-exception/META-INF/services/org.apache.kafka.connect.storage.Converter new file mode 100644 index 0000000000..084c96f96c --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/always-throw-exception/META-INF/services/org.apache.kafka.connect.storage.Converter @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.AlwaysThrowException diff --git a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy new file mode 100644 index 0000000000..9299056576 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.NoDefaultConstructorOverridePolicy diff --git a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension new file mode 100644 index 0000000000..8b0f488148 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.StaticInitializerThrowsRestExtension \ No newline at end of file diff --git a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.sink.SinkConnector b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.sink.SinkConnector new file mode 100644 index 0000000000..8e843327a6 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.sink.SinkConnector @@ -0,0 +1,22 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.DefaultConstructorPrivateConnector +test.plugins.DefaultConstructorThrowsConnector +test.plugins.InnocuousSinkConnector +test.plugins.NoDefaultConstructorConnector +test.plugins.StaticInitializerThrowsConnector +test.plugins.OuterClass$InnerClass +test.plugins.VersionMethodThrowsConnector diff --git a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.Converter new file mode 100644 index 0000000000..b4696e6999 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.Converter @@ -0,0 +1,20 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.CoLocatedPlugin +test.plugins.DefaultConstructorThrowsConverter +test.plugins.MissingSuperclassConverter +test.plugins.NoDefaultConstructorConverter + diff --git a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter new file mode 100644 index 0000000000..dcf9c16d9a --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter @@ -0,0 +1,17 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.DefaultConstructorThrowsConverter +test.plugins.NoDefaultConstructorConverter diff --git a/connect/runtime/bin/test/test-plugins/classpath-converter/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/classpath-converter/META-INF/services/org.apache.kafka.connect.storage.Converter new file mode 100644 index 0000000000..ae9c2a5820 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/classpath-converter/META-INF/services/org.apache.kafka.connect.storage.Converter @@ -0,0 +1,17 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.converters.ByteArrayConverter + diff --git a/connect/runtime/bin/test/test-plugins/multiple-plugins-in-jar/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/multiple-plugins-in-jar/META-INF/services/org.apache.kafka.connect.storage.Converter new file mode 100644 index 0000000000..ff148703cb --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/multiple-plugins-in-jar/META-INF/services/org.apache.kafka.connect.storage.Converter @@ -0,0 +1,17 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.ThingOne +test.plugins.ThingTwo diff --git a/connect/runtime/bin/test/test-plugins/non-migrated/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy b/connect/runtime/bin/test/test-plugins/non-migrated/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy new file mode 100644 index 0000000000..82400f7255 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/non-migrated/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.NonMigratedMultiPlugin diff --git a/connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/META-INF/services/org.apache.kafka.connect.storage.Converter new file mode 100644 index 0000000000..89b0af611e --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/META-INF/services/org.apache.kafka.connect.storage.Converter @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.ReadVersionFromResource diff --git a/connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/version b/connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/version new file mode 100644 index 0000000000..49584c02d0 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/version @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +1.0.0 \ No newline at end of file diff --git a/connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/META-INF/services/org.apache.kafka.connect.storage.Converter new file mode 100644 index 0000000000..89b0af611e --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/META-INF/services/org.apache.kafka.connect.storage.Converter @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.ReadVersionFromResource diff --git a/connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/version b/connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/version new file mode 100644 index 0000000000..56d688a444 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/version @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +2.0.0 \ No newline at end of file diff --git a/connect/runtime/bin/test/test-plugins/sampling-config-provider/META-INF/services/org.apache.kafka.common.config.provider.ConfigProvider b/connect/runtime/bin/test/test-plugins/sampling-config-provider/META-INF/services/org.apache.kafka.common.config.provider.ConfigProvider new file mode 100644 index 0000000000..62d8df254b --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/sampling-config-provider/META-INF/services/org.apache.kafka.common.config.provider.ConfigProvider @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.SamplingConfigProvider diff --git a/connect/runtime/bin/test/test-plugins/sampling-configurable/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/sampling-configurable/META-INF/services/org.apache.kafka.connect.storage.Converter new file mode 100644 index 0000000000..232b881a39 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/sampling-configurable/META-INF/services/org.apache.kafka.connect.storage.Converter @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.SamplingConfigurable diff --git a/connect/runtime/bin/test/test-plugins/sampling-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector b/connect/runtime/bin/test/test-plugins/sampling-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector new file mode 100644 index 0000000000..e83aba248a --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/sampling-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.SamplingConnector diff --git a/connect/runtime/bin/test/test-plugins/sampling-converter/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/sampling-converter/META-INF/services/org.apache.kafka.connect.storage.Converter new file mode 100644 index 0000000000..00ece8187b --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/sampling-converter/META-INF/services/org.apache.kafka.connect.storage.Converter @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.SamplingConverter diff --git a/connect/runtime/bin/test/test-plugins/sampling-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter b/connect/runtime/bin/test/test-plugins/sampling-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter new file mode 100644 index 0000000000..66291d24c6 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/sampling-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.SamplingHeaderConverter diff --git a/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/org.apache.kafka.connect.storage.Converter new file mode 100644 index 0000000000..3dfaea697b --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/org.apache.kafka.connect.storage.Converter @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.ServiceLoaderPlugin diff --git a/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/test.plugins.ServiceLoadedClass b/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/test.plugins.ServiceLoadedClass new file mode 100644 index 0000000000..b8db865648 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/test.plugins.ServiceLoadedClass @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.ServiceLoadedSubclass \ No newline at end of file diff --git a/connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy b/connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy new file mode 100644 index 0000000000..e4f915c855 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.SubclassOfClasspathOverridePolicy diff --git a/connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.storage.Converter new file mode 100644 index 0000000000..418027308e --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.storage.Converter @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.SubclassOfClasspathConverter diff --git a/connect/runtime/bin/test/test-plugins/versioned-converter/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/versioned-converter/META-INF/services/org.apache.kafka.connect.storage.Converter new file mode 100644 index 0000000000..d37bb90859 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/versioned-converter/META-INF/services/org.apache.kafka.connect.storage.Converter @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.VersionedConverter diff --git a/connect/runtime/bin/test/test-plugins/versioned-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter b/connect/runtime/bin/test/test-plugins/versioned-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter new file mode 100644 index 0000000000..25e4b7665b --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/versioned-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.VersionedHeaderConverter diff --git a/connect/runtime/bin/test/test-plugins/versioned-predicate/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate b/connect/runtime/bin/test/test-plugins/versioned-predicate/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate new file mode 100644 index 0000000000..af841817ab --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/versioned-predicate/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.VersionedPredicate diff --git a/connect/runtime/bin/test/test-plugins/versioned-sink-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector b/connect/runtime/bin/test/test-plugins/versioned-sink-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector new file mode 100644 index 0000000000..a5c560853f --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/versioned-sink-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.VersionedSinkConnector diff --git a/connect/runtime/bin/test/test-plugins/versioned-source-connector/META-INF/services/org.apache.kafka.connect.source.SourceConnector b/connect/runtime/bin/test/test-plugins/versioned-source-connector/META-INF/services/org.apache.kafka.connect.source.SourceConnector new file mode 100644 index 0000000000..efee272749 --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/versioned-source-connector/META-INF/services/org.apache.kafka.connect.source.SourceConnector @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.VersionedSourceConnector diff --git a/connect/runtime/bin/test/test-plugins/versioned-transformation/META-INF/services/org.apache.kafka.connect.transforms.Transformation b/connect/runtime/bin/test/test-plugins/versioned-transformation/META-INF/services/org.apache.kafka.connect.transforms.Transformation new file mode 100644 index 0000000000..7fed78370f --- /dev/null +++ b/connect/runtime/bin/test/test-plugins/versioned-transformation/META-INF/services/org.apache.kafka.connect.transforms.Transformation @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +test.plugins.VersionedTransformation diff --git a/connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector b/connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector new file mode 100644 index 0000000000..170043754d --- /dev/null +++ b/connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector @@ -0,0 +1,17 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.tools.MockSinkConnector +org.apache.kafka.connect.tools.VerifiableSinkConnector diff --git a/connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector b/connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector new file mode 100644 index 0000000000..acc2ddce71 --- /dev/null +++ b/connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector @@ -0,0 +1,18 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.tools.MockSourceConnector +org.apache.kafka.connect.tools.SchemaSourceConnector +org.apache.kafka.connect.tools.VerifiableSourceConnector \ No newline at end of file diff --git a/connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.Transformation b/connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.Transformation new file mode 100644 index 0000000000..cf9646be37 --- /dev/null +++ b/connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.Transformation @@ -0,0 +1,41 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.transforms.Cast$Key +org.apache.kafka.connect.transforms.Cast$Value +org.apache.kafka.connect.transforms.DropHeaders +org.apache.kafka.connect.transforms.ExtractField$Key +org.apache.kafka.connect.transforms.ExtractField$Value +org.apache.kafka.connect.transforms.Filter +org.apache.kafka.connect.transforms.Flatten$Key +org.apache.kafka.connect.transforms.Flatten$Value +org.apache.kafka.connect.transforms.HeaderFrom$Key +org.apache.kafka.connect.transforms.HeaderFrom$Value +org.apache.kafka.connect.transforms.HoistField$Key +org.apache.kafka.connect.transforms.HoistField$Value +org.apache.kafka.connect.transforms.InsertField$Key +org.apache.kafka.connect.transforms.InsertField$Value +org.apache.kafka.connect.transforms.InsertHeader +org.apache.kafka.connect.transforms.MaskField$Key +org.apache.kafka.connect.transforms.MaskField$Value +org.apache.kafka.connect.transforms.RegexRouter +org.apache.kafka.connect.transforms.ReplaceField$Key +org.apache.kafka.connect.transforms.ReplaceField$Value +org.apache.kafka.connect.transforms.SetSchemaMetadata$Key +org.apache.kafka.connect.transforms.SetSchemaMetadata$Value +org.apache.kafka.connect.transforms.TimestampConverter$Key +org.apache.kafka.connect.transforms.TimestampConverter$Value +org.apache.kafka.connect.transforms.TimestampRouter +org.apache.kafka.connect.transforms.ValueToKey diff --git a/connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate b/connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate new file mode 100644 index 0000000000..b451672377 --- /dev/null +++ b/connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate @@ -0,0 +1,18 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +org.apache.kafka.connect.transforms.predicates.HasHeaderKey +org.apache.kafka.connect.transforms.predicates.RecordIsTombstone +org.apache.kafka.connect.transforms.predicates.TopicNameMatches \ No newline at end of file diff --git a/core/src/main/scala/kafka/server/ConfigHandler.scala b/core/src/main/scala/kafka/server/ConfigHandler.scala index 3e82db2909..d6b3404203 100644 --- a/core/src/main/scala/kafka/server/ConfigHandler.scala +++ b/core/src/main/scala/kafka/server/ConfigHandler.scala @@ -17,6 +17,8 @@ package kafka.server +import io.aiven.inkless.control_plane.InitLogDisklessStartOffsetRequest + import java.util.{Collections, Properties} import kafka.server.QuotaFactory.QuotaManagers import kafka.utils.Logging @@ -53,10 +55,52 @@ class TopicConfigHandler(private val replicaManager: ReplicaManager, val logs = logManager.logsByTopic(topic) val wasRemoteLogEnabled = logs.exists(_.remoteLogEnabled()) val wasCopyDisabled = logs.exists(_.config.remoteLogCopyDisable()) + val wasDisklessEnabled = logs.exists(_.config.disklessEnable()) logManager.updateTopicConfig(topic, topicConfig, kafkaConfig.remoteLogManagerConfig.isRemoteStorageSystemEnabled, wasRemoteLogEnabled) maybeUpdateRemoteLogComponents(topic, logs, wasRemoteLogEnabled, wasCopyDisabled) + maybeInitializeDisklessLog(topic, logs, wasDisklessEnabled) + } + + /** + * Initialize the diskless log in the control plane for topics that are being migrated + * from classic (local disk) storage to diskless storage. + * + * This is called when a topic's diskless.enable config changes to true. + * Only the leader partitions need to initialize the diskless log, using each partition's + * log end offset as its diskless start offset. + */ + private[server] def maybeInitializeDisklessLog(topic: String, + logs: Seq[UnifiedLog], + wasDisklessEnabled: Boolean): Unit = { + val isDisklessEnabled = logs.exists(_.config.disklessEnable()) + + // Only initialize if diskless is being enabled (was false, now true) and we have leader partitions + if (isDisklessEnabled && !wasDisklessEnabled) { + val leaderPartitions = logs.flatMap(log => replicaManager.onlinePartition(log.topicPartition)).filter(_.isLeader) + + if (leaderPartitions.nonEmpty) { + replicaManager.getInklessSharedState.foreach { sharedState => + val topicId = replicaManager.metadataCache.getTopicId(topic) + + // Create a request for each leader partition with its own offsets + val requests = leaderPartitions.flatMap { partition => + logs.find(_.topicPartition == partition.topicPartition).map { log => + val logStartOffset = log.logStartOffset + val disklessStartOffset = log.logEndOffset + info(s"Initializing diskless log for partition ${partition.topicPartition} with topicId $topicId, " + + s"logStartOffset $logStartOffset, disklessStartOffset $disklessStartOffset") + new InitLogDisklessStartOffsetRequest(topicId, topic, partition.topicPartition.partition(), logStartOffset, disklessStartOffset) + } + } + + if (requests.nonEmpty) { + sharedState.controlPlane().initLogDisklessStartOffset(requests.toSet.asJava) + } + } + } + } } private[server] def maybeUpdateRemoteLogComponents(topic: String, diff --git a/core/src/main/scala/kafka/server/ReplicaManager.scala b/core/src/main/scala/kafka/server/ReplicaManager.scala index 1c793038a2..ba25484fac 100644 --- a/core/src/main/scala/kafka/server/ReplicaManager.scala +++ b/core/src/main/scala/kafka/server/ReplicaManager.scala @@ -19,7 +19,7 @@ package kafka.server import com.yammer.metrics.core.Meter import io.aiven.inkless.common.SharedState import io.aiven.inkless.consume.{FetchHandler, FetchOffsetHandler} -import io.aiven.inkless.control_plane.{BatchInfo, FindBatchRequest, FindBatchResponse, MetadataView} +import io.aiven.inkless.control_plane.{BatchInfo, FindBatchRequest, FindBatchResponse, GetDisklessLogRequest, InitLogDisklessStartOffsetRequest, MetadataView} import io.aiven.inkless.delete.{DeleteRecordsInterceptor, FileCleaner, RetentionEnforcer} import io.aiven.inkless.merge.FileMerger import io.aiven.inkless.produce.AppendHandler @@ -272,6 +272,8 @@ class ReplicaManager(val config: KafkaConfig, config.shareGroupConfig.shareFetchPurgatoryPurgeIntervalRequests)) private val _inklessMetadataView: MetadataView = inklessMetadataView.getOrElse(new InklessMetadataView(metadataCache.asInstanceOf[KRaftMetadataCache], () => config.extractLogConfigMap)) + def getInklessMetadataView: MetadataView = _inklessMetadataView + def getInklessSharedState: Option[SharedState] = inklessSharedState private val inklessAppendHandler: Option[AppendHandler] = inklessSharedState.map(new AppendHandler(_)) private val inklessFetchHandler: Option[FetchHandler] = inklessSharedState.map(new FetchHandler(_)) private val inklessFetchOffsetHandler: Option[FetchOffsetHandler] = inklessSharedState.map(new FetchOffsetHandler(_)) @@ -1776,16 +1778,17 @@ class ReplicaManager(val config: KafkaConfig, fetchInfos: Seq[(TopicIdPartition, PartitionData)], quota: ReplicaQuota, responseCallback: Seq[(TopicIdPartition, FetchPartitionData)] => Unit): Unit = { + println(fetchInfos) if (fetchInfos.isEmpty) { responseCallback(Seq.empty) return } - val (disklessFetchInfosWithoutTopicId, classicFetchInfos) = fetchInfos.partition { case (k, _) => _inklessMetadataView.isDisklessTopic(k.topic()) } + val (disklessTopicFetchInfos, initialClassicFetchInfos) = fetchInfos.partition { case (k, _) => _inklessMetadataView.isDisklessTopic(k.topic()) } inklessSharedState match { case None => - if (disklessFetchInfosWithoutTopicId.nonEmpty) { - error(s"Received diskless fetch request for topics ${disklessFetchInfosWithoutTopicId.map(_._1.topic()).distinct.mkString(", ")} but diskless storage system is not enabled. " + + if (disklessTopicFetchInfos.nonEmpty) { + error(s"Received diskless fetch request for topics ${disklessTopicFetchInfos.map(_._1.topic()).distinct.mkString(", ")} but diskless storage system is not enabled. " + s"Replying an empty response.") responseCallback(Seq.empty) return @@ -1794,7 +1797,7 @@ class ReplicaManager(val config: KafkaConfig, } // Older fetch versions (<13) don't have topicId in the request -- backfill it for backward compatibility - val disklessFetchInfos = disklessFetchInfosWithoutTopicId.map { disklessFetchInfo => + val disklessTopicFetchInfosWithTopicId = disklessTopicFetchInfos.map { disklessFetchInfo => val (topicIdPartition, partitionData) = disklessFetchInfo if (topicIdPartition.topicId().equals(Uuid.ZERO_UUID)) { _inklessMetadataView.getTopicId(topicIdPartition.topic()) match { @@ -1810,14 +1813,46 @@ class ReplicaManager(val config: KafkaConfig, } } + // For diskless topics, check if the fetch offset is before disklessStartOffset + // If so, redirect to classic fetch (for migrated topics with local data) + val (disklessFetchInfos, classicFetchInfos) = inklessSharedState match { + case Some(sharedState) if disklessTopicFetchInfosWithTopicId.nonEmpty => + val disklessLogRequests = disklessTopicFetchInfosWithTopicId.map { case (tp, _) => + new GetDisklessLogRequest(tp.topicId(), tp.partition()) + }.toList.asJava + + val disklessLogResponses = sharedState.controlPlane().getDisklessLog(disklessLogRequests).asScala + val responseMap = disklessLogResponses.map(r => (r.topicId(), r.partition()) -> r).toMap + + val (needsClassicFetch, canDisklessFetch) = disklessTopicFetchInfosWithTopicId.partition { case (tp, partitionData) => + responseMap.get((tp.topicId(), tp.partition())) match { + case Some(response) if response.error() == Errors.NONE && response.disklessStartOffset() != null => + // If fetch offset is before disklessStartOffset, use classic fetch + partitionData.fetchOffset < response.disklessStartOffset() + case _ => + // If partition not found or no disklessStartOffset, use diskless fetch + false + } + } + println("canDisklessFetch: " + canDisklessFetch) + println("needsClassicFetch: " + needsClassicFetch) - if (params.isFromFollower && disklessFetchInfos.nonEmpty) { - warn("Diskless topics are not supported for follower fetch requests. " + - s"Request from follower ${params.replicaId} contains diskless topics: ${disklessFetchInfos.map(_._1.topic()).mkString(", ")}") - responseCallback(Seq.empty) - return + (canDisklessFetch, initialClassicFetchInfos ++ needsClassicFetch) + case _ => + (disklessTopicFetchInfosWithTopicId, initialClassicFetchInfos) } + println("disklessFetchInfos: " + disklessFetchInfos) + println("classicFetchInfos: " + classicFetchInfos) + + +// if (params.isFromFollower && disklessFetchInfos.nonEmpty) { +// warn("Diskless topics are not supported for follower fetch requests. " + +// s"Request from follower ${params.replicaId} contains diskless topics: ${disklessFetchInfos.map(_._1.topic()).mkString(", ")}") +// responseCallback(Seq.empty) +// return +// } + // Override maxWaitMs and minBytes with lower-bound if there are diskless fetches. Otherwise, leave the consumer-provided values. val maxWaitMs = if (disklessFetchInfos.nonEmpty) Math.max(config.disklessFetchMaxWaitMs.toLong, params.maxWaitMs) else params.maxWaitMs val minBytes = if (disklessFetchInfos.nonEmpty) Math.max(config.disklessFetchMinBytes, params.minBytes) else params.minBytes @@ -1860,6 +1895,7 @@ class ReplicaManager(val config: KafkaConfig, // check if this fetch request can be satisfied right away val logReadResults = readFromLog(classicParams, classicFetchInfos, quota, readFromPurgatory = false) + println("logReadResult: " + logReadResults) var bytesReadable: Long = 0 var errorReadingData = false @@ -3115,7 +3151,11 @@ class ReplicaManager(val config: KafkaConfig, "local leaders.") replicaFetcherManager.removeFetcherForPartitions(localLeaders.keySet) localLeaders.foreachEntry { (tp, info) => - if (!_inklessMetadataView.isDisklessTopic(tp.topic())) + if (_inklessMetadataView.isDisklessTopic(tp.topic())) { + // For diskless topics, check if this is a migrated topic with local logs + // If so, initialize the diskless log in the control plane + maybeInitializeDisklessLogForMigratedTopic(tp, info.topicId) + } else { getOrCreatePartition(tp, delta, info.topicId).foreach { case (partition, isNew) => try { val state = info.partition.toLeaderAndIsrPartitionState(tp, isNew) @@ -3134,6 +3174,40 @@ class ReplicaManager(val config: KafkaConfig, markPartitionOffline(tp) } } + } + } + } + + /** + * For topics that were migrated from classic to diskless storage, we need to ensure + * the diskless log is initialized in the control plane when this broker becomes leader. + * + * This handles the edge case where: + * 1. A topic was created as classic (diskless.enable=false) + * 2. The config was changed to diskless.enable=true + * 3. Another broker processed the config change + * 4. This broker becomes leader later + * + * In this case, the TopicConfigHandler won't fire on this broker, so we need to + * initialize the diskless log here. + */ + private def maybeInitializeDisklessLogForMigratedTopic(tp: TopicPartition, topicId: Uuid): Unit = { + // Check if we have local logs for this topic (indicating it was a classic topic that was migrated) + val logs = logManager.logsByTopic(tp.topic()) + if (logs.nonEmpty) { + inklessSharedState.foreach { sharedState => + logs.find(_.topicPartition == tp).foreach { log => + val logStartOffset = log.logStartOffset + val disklessStartOffset = log.logEndOffset + + stateChangeLogger.info(s"Initializing diskless log for migrated topic ${tp.topic()} " + + s"partition ${tp.partition()} with topicId $topicId, " + + s"logStartOffset $logStartOffset, disklessStartOffset $disklessStartOffset") + + val request = new InitLogDisklessStartOffsetRequest(topicId, tp.topic(), tp.partition(), logStartOffset, disklessStartOffset) + sharedState.controlPlane().initLogDisklessStartOffset(java.util.Set.of(request)) + } + } } } diff --git a/core/src/test/java/kafka/server/ClassicToDisklessMigrationTest.java b/core/src/test/java/kafka/server/ClassicToDisklessMigrationTest.java new file mode 100644 index 0000000000..c760ee4bcc --- /dev/null +++ b/core/src/test/java/kafka/server/ClassicToDisklessMigrationTest.java @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package kafka.server; + +import org.apache.kafka.clients.CommonClientConfigs; +import org.apache.kafka.clients.admin.Admin; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.AlterConfigOp; +import org.apache.kafka.clients.admin.ConfigEntry; +import org.apache.kafka.clients.admin.CreateTopicsResult; +import org.apache.kafka.clients.admin.NewTopic; +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.consumer.internals.AutoOffsetResetStrategy; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.Producer; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.config.ConfigResource; +import org.apache.kafka.common.config.TopicConfig; +import org.apache.kafka.common.record.TimestampType; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.ByteArraySerializer; +import org.apache.kafka.common.test.KafkaClusterTestKit; +import org.apache.kafka.common.test.TestKitNodes; +import org.apache.kafka.coordinator.group.GroupCoordinatorConfig; +import org.apache.kafka.server.config.ReplicationConfigs; +import org.apache.kafka.server.config.ServerConfigs; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import java.time.Duration; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import io.aiven.inkless.config.InklessConfig; +import io.aiven.inkless.control_plane.postgres.PostgresControlPlane; +import io.aiven.inkless.control_plane.postgres.PostgresControlPlaneConfig; +import io.aiven.inkless.storage_backend.s3.S3Storage; +import io.aiven.inkless.storage_backend.s3.S3StorageConfig; +import io.aiven.inkless.test_utils.InklessPostgreSQLContainer; +import io.aiven.inkless.test_utils.MinioContainer; +import io.aiven.inkless.test_utils.PostgreSQLTestContainer; +import io.aiven.inkless.test_utils.S3TestContainer; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +@Testcontainers +public class ClassicToDisklessMigrationTest { + @Container + protected static InklessPostgreSQLContainer pgContainer = PostgreSQLTestContainer.container(); + @Container + protected static MinioContainer s3Container = S3TestContainer.minio(); + + private static final Logger log = LoggerFactory.getLogger(ClassicToDisklessMigrationTest.class); + + private static final int LEADER_BROKER_ID = 0; + private static final int FOLLOWER_BROKER_ID = 1; + + private KafkaClusterTestKit cluster; + + @BeforeEach + public void setup(final TestInfo testInfo) throws Exception { + s3Container.createBucket(testInfo); + pgContainer.createDatabase(testInfo); + + // Configure broker.rack for each broker to enable fetch from follower + Map> perServerProperties = Map.of( + LEADER_BROKER_ID, Map.of(ServerConfigs.BROKER_RACK_CONFIG, String.valueOf(LEADER_BROKER_ID)), + FOLLOWER_BROKER_ID, Map.of(ServerConfigs.BROKER_RACK_CONFIG, String.valueOf(FOLLOWER_BROKER_ID)) + ); + + final TestKitNodes nodes = new TestKitNodes.Builder() + .setCombined(true) + .setNumBrokerNodes(2) + .setNumControllerNodes(1) + .setPerServerProperties(perServerProperties) + .build(); + cluster = new KafkaClusterTestKit.Builder(nodes) + .setConfigProp(GroupCoordinatorConfig.OFFSETS_TOPIC_REPLICATION_FACTOR_CONFIG, "1") + .setConfigProp(ServerConfigs.DISKLESS_STORAGE_SYSTEM_ENABLE_CONFIG, "true") + // Enable fetch from follower + .setConfigProp(ReplicationConfigs.REPLICA_SELECTOR_CLASS_CONFIG, "org.apache.kafka.common.replica.RackAwareReplicaSelector") + // PG control plane config + .setConfigProp(InklessConfig.PREFIX + InklessConfig.CONTROL_PLANE_CLASS_CONFIG, PostgresControlPlane.class.getName()) + .setConfigProp(InklessConfig.PREFIX + InklessConfig.CONTROL_PLANE_PREFIX + PostgresControlPlaneConfig.CONNECTION_STRING_CONFIG, pgContainer.getJdbcUrl()) + .setConfigProp(InklessConfig.PREFIX + InklessConfig.CONTROL_PLANE_PREFIX + PostgresControlPlaneConfig.USERNAME_CONFIG, PostgreSQLTestContainer.USERNAME) + .setConfigProp(InklessConfig.PREFIX + InklessConfig.CONTROL_PLANE_PREFIX + PostgresControlPlaneConfig.PASSWORD_CONFIG, PostgreSQLTestContainer.PASSWORD) + // S3 storage config + .setConfigProp(InklessConfig.PREFIX + InklessConfig.STORAGE_BACKEND_CLASS_CONFIG, S3Storage.class.getName()) + .setConfigProp(InklessConfig.PREFIX + InklessConfig.STORAGE_PREFIX + S3StorageConfig.S3_BUCKET_NAME_CONFIG, s3Container.getBucketName()) + .setConfigProp(InklessConfig.PREFIX + InklessConfig.STORAGE_PREFIX + S3StorageConfig.S3_REGION_CONFIG, s3Container.getRegion()) + .setConfigProp(InklessConfig.PREFIX + InklessConfig.STORAGE_PREFIX + S3StorageConfig.S3_ENDPOINT_URL_CONFIG, s3Container.getEndpoint()) + .setConfigProp(InklessConfig.PREFIX + InklessConfig.STORAGE_PREFIX + S3StorageConfig.S3_PATH_STYLE_ENABLED_CONFIG, "true") + .setConfigProp(InklessConfig.PREFIX + InklessConfig.STORAGE_PREFIX + S3StorageConfig.AWS_ACCESS_KEY_ID_CONFIG, s3Container.getAccessKey()) + .setConfigProp(InklessConfig.PREFIX + InklessConfig.STORAGE_PREFIX + S3StorageConfig.AWS_SECRET_ACCESS_KEY_CONFIG, s3Container.getSecretKey()) + .setConfigProp(InklessConfig.PREFIX + InklessConfig.STORAGE_PREFIX + S3StorageConfig.AWS_SECRET_ACCESS_KEY_CONFIG, s3Container.getSecretKey()) + // Decrease cache block bytes to test cache split due to alignment + .setConfigProp(InklessConfig.PREFIX + InklessConfig.CONSUME_CACHE_BLOCK_BYTES_CONFIG, 16 * 1024) + .build(); + cluster.format(); + cluster.startup(); + cluster.waitForReadyBrokers(); + } + + @AfterEach + public void teardown() throws Exception { + cluster.close(); + } + + @Test + public void migrateClassicToDisklessTopic() throws Exception { + Map clientConfigs = new HashMap<>(); + clientConfigs.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()); + clientConfigs.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, String.valueOf(true)); + clientConfigs.put(ProducerConfig.LINGER_MS_CONFIG, "1000"); + clientConfigs.put(ProducerConfig.BATCH_SIZE_CONFIG, "100000"); + clientConfigs.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); + clientConfigs.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); + clientConfigs.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName()); + clientConfigs.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName()); + // by default is latest and nothing would get consumed. + clientConfigs.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, AutoOffsetResetStrategy.EARLIEST.name()); + clientConfigs.put(ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG, "5000000"); + String topicName = "classic-to-diskless-topic"; + int numRecordsBeforeMigration = 250; + int numRecordsAfterMigration = 250; + int totalRecords = numRecordsBeforeMigration + numRecordsAfterMigration; + + // Step 1: Create topic with diskless.enable=false (classic mode) and replication factor 2 + try (Admin admin = AdminClient.create(clientConfigs)) { + final NewTopic topic = new NewTopic(topicName, 1, (short) 2) + .configs(Map.of( + TopicConfig.DISKLESS_ENABLE_CONFIG, "false", + TopicConfig.MESSAGE_TIMESTAMP_TYPE_CONFIG, TimestampType.CREATE_TIME.name + )); + CreateTopicsResult topics = admin.createTopics(Collections.singletonList(topic)); + topics.all().get(10, TimeUnit.SECONDS); + } + + // Step 2: Produce messages to classic topic + AtomicInteger recordsProducedBeforeMigration = new AtomicInteger(); + final long now = System.currentTimeMillis(); + try (Producer producer = new KafkaProducer<>(clientConfigs)) { + for (int i = 0; i < numRecordsBeforeMigration; i++) { + byte[] value = new byte[10000]; + final ProducerRecord record = new ProducerRecord<>(topicName, 0, now, null, value); + producer.send(record, (metadata, exception) -> { + if (exception != null) { + log.error("Failed to send record", exception); + } else { + log.info("Committed value at offset {} at {}", metadata.offset(), now); + recordsProducedBeforeMigration.incrementAndGet(); + } + }); + } + producer.flush(); + } + assertEquals(numRecordsBeforeMigration, recordsProducedBeforeMigration.get()); + + // Step 2: Consume messages from classic topic to verify it works + consumeWithSubscription(TimestampType.CREATE_TIME, clientConfigs, topicName, now, numRecordsBeforeMigration); + + // Step 3: Migrate topic to diskless mode by changing config + try (Admin admin = AdminClient.create(clientConfigs)) { + ConfigResource topicResource = new ConfigResource(ConfigResource.Type.TOPIC, topicName); + AlterConfigOp alterConfigOp = new AlterConfigOp( + new ConfigEntry(TopicConfig.DISKLESS_ENABLE_CONFIG, "true"), + AlterConfigOp.OpType.SET + ); + admin.incrementalAlterConfigs(Map.of(topicResource, Collections.singletonList(alterConfigOp))) + .all().get(10, TimeUnit.SECONDS); + } + + // Step 4: Produce more messages after migration + AtomicInteger recordsProducedAfterMigration = new AtomicInteger(); + try (Producer producer = new KafkaProducer<>(clientConfigs)) { + for (int i = 0; i < numRecordsAfterMigration; i++) { + byte[] value = new byte[10000]; + final ProducerRecord record = new ProducerRecord<>(topicName, 0, now, null, value); + producer.send(record, (metadata, exception) -> { + if (exception != null) { + log.error("Failed to send record", exception); + } else { + log.info("Committed value at offset {} at {}", metadata.offset(), now); + recordsProducedAfterMigration.incrementAndGet(); + } + }); + } + producer.flush(); + } + assertEquals(numRecordsAfterMigration, recordsProducedAfterMigration.get()); + + // Step 4: Consume from the beginning to verify all messages are available + consumeWithSubscription(TimestampType.CREATE_TIME, clientConfigs, topicName, now, totalRecords); + } + + private static void consumeWithSubscription(TimestampType timestampType, Map clientConfigs, String topicName, long now, int numRecords) { + final Map consumerConfigs = new HashMap<>(clientConfigs); + consumerConfigs.put(ConsumerConfig.GROUP_ID_CONFIG, java.util.UUID.randomUUID().toString()); + // Set client.rack to match the follower broker's rack to enable fetch from follower + consumerConfigs.put(ConsumerConfig.CLIENT_RACK_CONFIG, String.valueOf(FOLLOWER_BROKER_ID)); + int recordsConsumed; + try (Consumer consumer = new KafkaConsumer<>(consumerConfigs)) { + consumer.subscribe(Collections.singletonList(topicName)); + recordsConsumed = poll(consumer, timestampType, now, numRecords); + } + assertEquals(numRecords, recordsConsumed); + } + + private static int poll(Consumer consumer, TimestampType timestampType, long now, int expectedRecords) { + int recordsConsumed = 0; + long deadline = System.currentTimeMillis() + 30_000; + while (recordsConsumed < expectedRecords && System.currentTimeMillis() < deadline) { + ConsumerRecords poll = consumer.poll(Duration.ofSeconds(5)); + for (ConsumerRecord record : poll) { + log.info("Received record {} at {}", recordsConsumed, record.timestamp()); + switch (timestampType) { + case CREATE_TIME -> assertEquals(now, record.timestamp()); + case LOG_APPEND_TIME -> assertTrue(record.timestamp() > now); + } + recordsConsumed++; + } + } + return recordsConsumed; + } +} diff --git a/docker/examples/docker-compose-files/inkless/docker-compose.yml b/docker/examples/docker-compose-files/inkless/docker-compose.yml index 2a1dd0739c..c0473de914 100644 --- a/docker/examples/docker-compose-files/inkless/docker-compose.yml +++ b/docker/examples/docker-compose-files/inkless/docker-compose.yml @@ -11,7 +11,9 @@ services: KAFKA_PROCESS_ROLES: "broker,controller" KAFKA_NODE_ID: 1 # Add rack to the broker to simulate AZ placement - # KAFKA_BROKER_RACK: "az1" + KAFKA_BROKER_RACK: "az1" + # Enable fetch from follower with rack-aware replica selector + KAFKA_REPLICA_SELECTOR_CLASS: "org.apache.kafka.common.replica.RackAwareReplicaSelector" KAFKA_CONTROLLER_QUORUM_VOTERS: "1@broker:29090,2@broker2:29090" KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: "CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT" KAFKA_LISTENERS: "CONTROLLER://:29090,PLAINTEXT_HOST://:9092,PLAINTEXT://:19092" @@ -67,7 +69,7 @@ services: <<: *base-broker-env KAFKA_NODE_ID: 2 # Add rack to the broker to simulate AZ placement - # KAFKA_BROKER_RACK: "az2" + KAFKA_BROKER_RACK: "az2" KAFKA_LISTENERS: "CONTROLLER://:29090,PLAINTEXT_HOST://:9093,PLAINTEXT://:19093" KAFKA_ADVERTISED_LISTENERS: "PLAINTEXT_HOST://localhost:9093,PLAINTEXT://broker2:19093" diff --git a/server-common/bin/test/log4j2.yaml b/server-common/bin/test/log4j2.yaml new file mode 100644 index 0000000000..be546a18b5 --- /dev/null +++ b/server-common/bin/test/log4j2.yaml @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Configuration: + Properties: + Property: + - name: "logPattern" + value: "[%d] %p %m (%c:%L)%n" + + Appenders: + Console: + name: STDOUT + PatternLayout: + pattern: "${logPattern}" + + Loggers: + Root: + level: INFO + AppenderRef: + - ref: STDOUT + Logger: + - name: org.apache.kafka + level: INFO diff --git a/share-coordinator/bin/main/common/message/ShareSnapshotKey.json b/share-coordinator/bin/main/common/message/ShareSnapshotKey.json new file mode 100644 index 0000000000..feeb6d4ee1 --- /dev/null +++ b/share-coordinator/bin/main/common/message/ShareSnapshotKey.json @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "apiKey": 0, + "type": "coordinator-key", + "name": "ShareSnapshotKey", + "validVersions": "0", + "flexibleVersions": "none", + "fields": [ + { "name": "GroupId", "type": "string", "versions": "0", + "about": "The group id." }, + { "name": "TopicId", "type": "uuid", "versions": "0", + "about": "The topic id." }, + { "name": "Partition", "type": "int32", "versions": "0", + "about": "The partition index." } + ] +} + diff --git a/share-coordinator/bin/main/common/message/ShareSnapshotValue.json b/share-coordinator/bin/main/common/message/ShareSnapshotValue.json new file mode 100644 index 0000000000..6126cfd0a2 --- /dev/null +++ b/share-coordinator/bin/main/common/message/ShareSnapshotValue.json @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "apiKey": 0, + "type": "coordinator-value", + "name": "ShareSnapshotValue", + "validVersions": "0", + "flexibleVersions": "0+", + "fields": [ + { "name": "SnapshotEpoch", "type": "uint16", "versions": "0+", + "about": "The snapshot epoch." }, + { "name": "StateEpoch", "type": "int32", "versions": "0+", + "about": "The state epoch for this share-partition." }, + { "name": "LeaderEpoch", "type": "int32", "versions": "0+", + "about": "The leader epoch of the share-partition." }, + { "name": "StartOffset", "type": "int64", "versions": "0+", + "about": "The share-partition start offset." }, + { "name": "CreateTimestamp", "type": "int64", "versions": "0+", + "about": "The time at which the state was created." }, + { "name": "WriteTimestamp", "type": "int64", "versions": "0+", + "about": "The time at which the state was written or rewritten." }, + { "name": "StateBatches", "type": "[]StateBatch", "versions": "0+", + "about": "The state batches.", "fields": [ + { "name": "FirstOffset", "type": "int64", "versions": "0+", + "about": "The first offset of this state batch." }, + { "name": "LastOffset", "type": "int64", "versions": "0+", + "about": "The last offset of this state batch." }, + { "name": "DeliveryState", "type": "int8", "versions": "0+", + "about": "The delivery state - 0:Available,2:Acked,4:Archived." }, + { "name": "DeliveryCount", "type": "int16", "versions": "0+", + "about": "The delivery count." } + ]} + ] +} + diff --git a/share-coordinator/bin/main/common/message/ShareUpdateKey.json b/share-coordinator/bin/main/common/message/ShareUpdateKey.json new file mode 100644 index 0000000000..f026b840bc --- /dev/null +++ b/share-coordinator/bin/main/common/message/ShareUpdateKey.json @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "apiKey": 1, + "type": "coordinator-key", + "name": "ShareUpdateKey", + "validVersions": "0", + "flexibleVersions": "none", + "fields": [ + { "name": "GroupId", "type": "string", "versions": "0", + "about": "The group id." }, + { "name": "TopicId", "type": "uuid", "versions": "0", + "about": "The topic id." }, + { "name": "Partition", "type": "int32", "versions": "0", + "about": "The partition index." } + ] +} + diff --git a/share-coordinator/bin/main/common/message/ShareUpdateValue.json b/share-coordinator/bin/main/common/message/ShareUpdateValue.json new file mode 100644 index 0000000000..35e31462a9 --- /dev/null +++ b/share-coordinator/bin/main/common/message/ShareUpdateValue.json @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "apiKey": 1, + "type": "coordinator-value", + "name": "ShareUpdateValue", + "validVersions": "0", + "flexibleVersions": "0+", + "fields": [ + { "name": "SnapshotEpoch", "type": "uint16", "versions": "0+", + "about": "The snapshot epoch." }, + { "name": "LeaderEpoch", "type": "int32", "versions": "0+", + "about": "The leader epoch of the share-partition." }, + { "name": "StartOffset", "type": "int64", "versions": "0+", + "about": "The share-partition start offset, or -1 if the start offset is not being updated." }, + { "name": "StateBatches", "type": "[]StateBatch", "versions": "0+", + "about": "The state batches that have been updated.", "fields": [ + { "name": "FirstOffset", "type": "int64", "versions": "0+", + "about": "The first offset of this state batch." }, + { "name": "LastOffset", "type": "int64", "versions": "0+", + "about": "The last offset of this state batch." }, + { "name": "DeliveryState", "type": "int8", "versions": "0+", + "about": "The delivery state - 0:Available,2:Acked,4:Archived." }, + { "name": "DeliveryCount", "type": "int16", "versions": "0+", + "about": "The delivery count." } + ]} + ] +} + diff --git a/shell/bin/test/log4j2.yaml b/shell/bin/test/log4j2.yaml new file mode 100644 index 0000000000..c229cbce31 --- /dev/null +++ b/shell/bin/test/log4j2.yaml @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Configuration: + Properties: + Property: + - name: "logPattern" + value: "[%d] %p %m (%c:%L)%n" + + Appenders: + Console: + name: STDOUT + PatternLayout: + pattern: "${logPattern}" + + Loggers: + Root: + level: DEBUG + AppenderRef: + - ref: STDOUT diff --git a/storage/bin/main/message/ProducerSnapshot.json b/storage/bin/main/message/ProducerSnapshot.json new file mode 100644 index 0000000000..cdb9da8330 --- /dev/null +++ b/storage/bin/main/message/ProducerSnapshot.json @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "type": "data", + "name": "ProducerSnapshot", + "validVersions": "1", + "flexibleVersions": "none", + "fields": [ + { + "name": "Crc", + "type": "uint32", + "versions": "1", + "about": "CRC of the snapshot data" + }, + { + "name": "ProducerEntries", + "type": "[]ProducerEntry", + "versions": "1", + "about": "The entries in the producer table", + "fields": [ + { + "name": "ProducerId", + "type": "int64", + "versions": "1", + "about": "The producer ID" + }, + { + "name": "Epoch", + "type": "int16", + "versions": "1", + "about": "Current epoch of the producer" + }, + { + "name": "LastSequence", + "type": "int32", + "versions": "1", + "about": "Last written sequence of the producer" + }, + { + "name": "LastOffset", + "type": "int64", + "versions": "1", + "about": "Last written offset of the producer" + }, + { + "name": "OffsetDelta", + "type": "int32", + "versions": "1", + "about": "The difference of the last sequence and first sequence in the last written batch" + }, + { + "name": "Timestamp", + "type": "int64", + "versions": "1", + "about": "Max timestamp from the last written entry" + }, + { + "name": "CoordinatorEpoch", + "type": "int32", + "versions": "1", + "about": "The epoch of the last transaction coordinator to send an end transaction marker" + }, + { + "name": "CurrentTxnFirstOffset", + "type": "int64", + "versions": "1", + "about": "The first offset of the on-going transaction (-1 if there is none)" + } + ] + } + ] +} diff --git a/storage/bin/main/message/RemoteLogSegmentMetadataRecord.json b/storage/bin/main/message/RemoteLogSegmentMetadataRecord.json new file mode 100644 index 0000000000..9c035f5263 --- /dev/null +++ b/storage/bin/main/message/RemoteLogSegmentMetadataRecord.json @@ -0,0 +1,142 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "apiKey": 0, + "type": "metadata", + "name": "RemoteLogSegmentMetadataRecord", + "validVersions": "0", + "flexibleVersions": "0+", + "fields": [ + { + "name": "RemoteLogSegmentId", + "type": "RemoteLogSegmentIdEntry", + "versions": "0+", + "about": "Unique representation of the remote log segment.", + "fields": [ + { + "name": "TopicIdPartition", + "type": "TopicIdPartitionEntry", + "versions": "0+", + "about": "Represents unique topic partition.", + "fields": [ + { + "name": "Name", + "type": "string", + "versions": "0+", + "about": "Topic name." + }, + { + "name": "Id", + "type": "uuid", + "versions": "0+", + "about": "Unique identifier of the topic." + }, + { + "name": "Partition", + "type": "int32", + "versions": "0+", + "about": "Partition number." + } + ] + }, + { + "name": "Id", + "type": "uuid", + "versions": "0+", + "about": "Unique identifier of the remote log segment." + } + ] + }, + { + "name": "StartOffset", + "type": "int64", + "versions": "0+", + "about": "Start offset of the segment." + }, + { + "name": "EndOffset", + "type": "int64", + "versions": "0+", + "about": "End offset of the segment." + }, + { + "name": "BrokerId", + "type": "int32", + "versions": "0+", + "about": "Broker id from which this event is generated." + }, + { + "name": "MaxTimestampMs", + "type": "int64", + "versions": "0+", + "about": "Maximum timestamp in milli seconds with in this segment." + }, + { + "name": "EventTimestampMs", + "type": "int64", + "versions": "0+", + "about": "Epoch time in milli seconds at which this event is generated." + }, + { + "name": "SegmentLeaderEpochs", + "type": "[]SegmentLeaderEpochEntry", + "versions": "0+", + "about": "Leader epoch to start-offset mappings for the records with in this segment.", + "fields": [ + { + "name": "LeaderEpoch", + "type": "int32", + "versions": "0+", + "about": "Leader epoch" + }, + { + "name": "Offset", + "type": "int64", + "versions": "0+", + "about": "Start offset for the leader epoch." + } + ] + }, + { + "name": "SegmentSizeInBytes", + "type": "int32", + "versions": "0+", + "about": "Segment size in bytes." + }, + { + "name": "CustomMetadata", + "type": "bytes", + "default": "null", + "versions": "0+", + "nullableVersions": "0+", + "about": "Custom metadata." + }, + { + "name": "RemoteLogSegmentState", + "type": "int8", + "versions": "0+", + "about": "State identifier of the remote log segment, which is RemoteLogSegmentState.id()." + }, + { + "name": "TxnIndexEmpty", + "type": "bool", + "versions": "0+", + "about": "Flag to indicate if the transaction index is empty.", + "taggedVersions": "0+", + "tag": 0 + } + ] +} \ No newline at end of file diff --git a/storage/bin/main/message/RemoteLogSegmentMetadataSnapshotRecord.json b/storage/bin/main/message/RemoteLogSegmentMetadataSnapshotRecord.json new file mode 100644 index 0000000000..f4a1f19dca --- /dev/null +++ b/storage/bin/main/message/RemoteLogSegmentMetadataSnapshotRecord.json @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "apiKey": 3, + "type": "metadata", + "name": "RemoteLogSegmentMetadataSnapshotRecord", + "validVersions": "0", + "flexibleVersions": "0+", + "fields": [ + { + "name": "SegmentId", + "type": "uuid", + "versions": "0+", + "about": "Unique identifier of the log segment" + }, + { + "name": "StartOffset", + "type": "int64", + "versions": "0+", + "about": "Start offset of the segment." + }, + { + "name": "EndOffset", + "type": "int64", + "versions": "0+", + "about": "End offset of the segment." + }, + { + "name": "BrokerId", + "type": "int32", + "versions": "0+", + "about": "Broker (controller or leader) id from which this event is created or updated." + }, + { + "name": "MaxTimestampMs", + "type": "int64", + "versions": "0+", + "about": "Maximum timestamp with in this segment." + }, + { + "name": "EventTimestampMs", + "type": "int64", + "versions": "0+", + "about": "Event timestamp of this segment." + }, + { + "name": "SegmentLeaderEpochs", + "type": "[]SegmentLeaderEpochEntry", + "versions": "0+", + "about": "Leader epochs of this segment.", + "fields": [ + { + "name": "LeaderEpoch", + "type": "int32", + "versions": "0+", + "about": "Leader epoch" + }, + { + "name": "Offset", + "type": "int64", + "versions": "0+", + "about": "Start offset for the leader epoch" + } + ] + }, + { + "name": "SegmentSizeInBytes", + "type": "int32", + "versions": "0+", + "about": "Segment size in bytes" + }, + { + "name": "CustomMetadata", + "type": "bytes", + "default": "null", + "versions": "0+", + "nullableVersions": "0+", + "about": "Custom metadata." + }, + { + "name": "RemoteLogSegmentState", + "type": "int8", + "versions": "0+", + "about": "State of the remote log segment" + }, + { + "name": "TxnIndexEmpty", + "type": "bool", + "versions": "0+", + "about": "Flag to indicate if the transaction index is empty.", + "taggedVersions": "0+", + "tag": 0 + } + ] +} \ No newline at end of file diff --git a/storage/bin/main/message/RemoteLogSegmentMetadataUpdateRecord.json b/storage/bin/main/message/RemoteLogSegmentMetadataUpdateRecord.json new file mode 100644 index 0000000000..48aa34d4e9 --- /dev/null +++ b/storage/bin/main/message/RemoteLogSegmentMetadataUpdateRecord.json @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "apiKey": 1, + "type": "metadata", + "name": "RemoteLogSegmentMetadataUpdateRecord", + "validVersions": "0", + "flexibleVersions": "0+", + "fields": [ + { + "name": "RemoteLogSegmentId", + "type": "RemoteLogSegmentIdEntry", + "versions": "0+", + "about": "Unique representation of the remote log segment.", + "fields": [ + { + "name": "TopicIdPartition", + "type": "TopicIdPartitionEntry", + "versions": "0+", + "about": "Represents unique topic partition.", + "fields": [ + { + "name": "Name", + "type": "string", + "versions": "0+", + "about": "Topic name." + }, + { + "name": "Id", + "type": "uuid", + "versions": "0+", + "about": "Unique identifier of the topic." + }, + { + "name": "Partition", + "type": "int32", + "versions": "0+", + "about": "Partition number." + } + ] + }, + { + "name": "Id", + "type": "uuid", + "versions": "0+", + "about": "Unique identifier of the remote log segment." + } + ] + }, + { + "name": "BrokerId", + "type": "int32", + "versions": "0+", + "about": "Broker id from which this event is generated." + }, + { + "name": "EventTimestampMs", + "type": "int64", + "versions": "0+", + "about": "Epoch time in milli seconds at which this event is generated." + }, + { + "name": "CustomMetadata", + "type": "bytes", + "default": "null", + "versions": "0+", + "nullableVersions": "0+", + "about": "Custom metadata." + }, + { + "name": "RemoteLogSegmentState", + "type": "int8", + "versions": "0+", + "about": "State identifier of the remote log segment, which is RemoteLogSegmentState.id()." + } + ] +} \ No newline at end of file diff --git a/storage/bin/main/message/RemotePartitionDeleteMetadataRecord.json b/storage/bin/main/message/RemotePartitionDeleteMetadataRecord.json new file mode 100644 index 0000000000..f5e955bcea --- /dev/null +++ b/storage/bin/main/message/RemotePartitionDeleteMetadataRecord.json @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "apiKey": 2, + "type": "metadata", + "name": "RemotePartitionDeleteMetadataRecord", + "validVersions": "0", + "flexibleVersions": "0+", + "fields": [ + { + "name": "TopicIdPartition", + "type": "TopicIdPartitionEntry", + "versions": "0+", + "about": "Represents unique topic partition.", + "fields": [ + { + "name": "Name", + "type": "string", + "versions": "0+", + "about": "Topic name." + }, + { + "name": "Id", + "type": "uuid", + "versions": "0+", + "about": "Unique identifier of the topic." + }, + { + "name": "Partition", + "type": "int32", + "versions": "0+", + "about": "Partition number." + } + ] + }, + { + "name": "BrokerId", + "type": "int32", + "versions": "0+", + "about": "Broker (controller or leader) id from which this event is created. DELETE_PARTITION_MARKED is sent by the controller. DELETE_PARTITION_STARTED and DELETE_PARTITION_FINISHED are sent by remote log metadata topic partition leader." + }, + { + "name": "EventTimestampMs", + "type": "int64", + "versions": "0+", + "about": "Epoch time in milli seconds at which this event is generated." + }, + { + "name": "RemotePartitionDeleteState", + "type": "int8", + "versions": "0+", + "about": "Deletion state identifier of the remote partition, which is RemotePartitionDeleteState.id()." + } + ] +} \ No newline at end of file diff --git a/storage/bin/test/log4j2.yaml b/storage/bin/test/log4j2.yaml new file mode 100644 index 0000000000..2d017d3b58 --- /dev/null +++ b/storage/bin/test/log4j2.yaml @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Configuration: + Properties: + Property: + - name: "logPattern" + value: "[%d] %p %m (%c:%L)%n" + - name: "fileLogPattern" + value: "%d [%t] %-5p %c %x - %m%n" + + Appenders: + Console: + name: STDOUT + PatternLayout: + pattern: "${logPattern}" + RollingFile: + - name: FileAppender + fileName: build/kafka-storage-test/storage.log + filePattern: "build/kafka-storage-test/storage-%d{yyyy-MM-dd}.log" + PatternLayout: + pattern: "${fileLogPattern}" + TimeBasedTriggeringPolicy: + interval: 1 + + Loggers: + Root: + level: OFF + AppenderRef: + - ref: STDOUT + Logger: + - name: org.apache.kafka.server.log.remote.storage + level: INFO + AppenderRef: + - ref: FileAppender + + - name: org.apache.kafka.server.log.remote.metadata.storage + level: INFO + AppenderRef: + - ref: FileAppender + + - name: kafka.log.remote + level: INFO + AppenderRef: + - ref: FileAppender diff --git a/storage/bin/test/org/apache/kafka/tiered/storage/README.md b/storage/bin/test/org/apache/kafka/tiered/storage/README.md new file mode 100644 index 0000000000..ddfc102167 --- /dev/null +++ b/storage/bin/test/org/apache/kafka/tiered/storage/README.md @@ -0,0 +1,11 @@ +# The Test Flow + +Step 1: For every test, setup is done via TieredStorageTestHarness which extends IntegrationTestHarness and sets up a cluster with TS enabled on it. + +Step 2: The test is written as a specification consisting of sequential actions and assertions. The spec for the complete test is written down first which creates "actions" to be executed. + +Step 3: Once we have the test spec in-place (which includes assertion actions), we execute the test which will execute each action sequentially. + +Step 4: The test execution stops when any of the action throws an exception (or an assertion error). + +Step 5: Clean-up for the test is performed on test exit \ No newline at end of file diff --git a/storage/inkless/bin/main/db/migration/V10__Covering_index_on_batches.sql b/storage/inkless/bin/main/db/migration/V10__Covering_index_on_batches.sql new file mode 100644 index 0000000000..7ddd08aa3c --- /dev/null +++ b/storage/inkless/bin/main/db/migration/V10__Covering_index_on_batches.sql @@ -0,0 +1,9 @@ +-- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ + +-- Create index on the same columns as batches_by_last_offset_idx, but including also other +-- columns that are useful to speed up the scans done by RetentionEnforcer. +CREATE INDEX batches_by_last_offset_covering_idx ON batches (topic_id, partition, last_offset) + INCLUDE (base_offset, byte_size, timestamp_type, batch_max_timestamp, log_append_timestamp); + +-- This index now is a duplicate and it's not needed anymore. +DROP INDEX batches_by_last_offset_idx; diff --git a/storage/inkless/bin/main/db/migration/V11__Add_diskless_offsets_to_logs.sql b/storage/inkless/bin/main/db/migration/V11__Add_diskless_offsets_to_logs.sql new file mode 100644 index 0000000000..1d6cc731c9 --- /dev/null +++ b/storage/inkless/bin/main/db/migration/V11__Add_diskless_offsets_to_logs.sql @@ -0,0 +1,3 @@ +-- Copyright (c) 2024-2025 Aiven, Helsinki, Finland. https://aiven.io/ +ALTER TABLE logs ADD COLUMN diskless_start_offset offset_nullable_t DEFAULT NULL; +ALTER TABLE logs ADD COLUMN diskless_end_offset offset_nullable_t DEFAULT NULL; \ No newline at end of file diff --git a/storage/inkless/bin/main/db/migration/V1__Create_tables.sql b/storage/inkless/bin/main/db/migration/V1__Create_tables.sql new file mode 100644 index 0000000000..3eec1ae8db --- /dev/null +++ b/storage/inkless/bin/main/db/migration/V1__Create_tables.sql @@ -0,0 +1,1035 @@ +-- Copyright (c) 2024-2025 Aiven, Helsinki, Finland. https://aiven.io/ +CREATE DOMAIN broker_id_t AS INT NOT NULL; + +CREATE DOMAIN topic_id_t AS UUID NOT NULL; + +CREATE DOMAIN partition_t AS INT NOT NULL +CHECK (VALUE >= 0); + +CREATE DOMAIN topic_name_t VARCHAR(255) NOT NULL; + +CREATE DOMAIN magic_t AS SMALLINT NOT NULL +CHECK (VALUE >= 0 AND VALUE <= 2); + +CREATE DOMAIN format_t AS SMALLINT NOT NULL +CHECK (value >= 1 AND VALUE <= 3); + +CREATE DOMAIN offset_nullable_t BIGINT +CHECK (VALUE IS NULL OR VALUE >= 0); +CREATE DOMAIN offset_t AS offset_nullable_t +CHECK (VALUE IS NOT NULL); +CREATE DOMAIN offset_with_minus_one_t BIGINT +CHECK (VALUE IS NOT NULL AND VALUE >= -1); + +CREATE DOMAIN byte_offset_t BIGINT NOT NULL +CHECK (VALUE >= 0); + +CREATE DOMAIN byte_size_t BIGINT NOT NULL +CHECK (VALUE >= 0); + +CREATE DOMAIN object_key_t AS VARCHAR(1024) NOT NULL; + +CREATE DOMAIN timestamp_type_t AS SMALLINT NOT NULL +CHECK (VALUE >= -1 AND VALUE <= 1); + +CREATE DOMAIN timestamp_t AS BIGINT NOT NULL +CHECK (VALUE >= -5); + +CREATE DOMAIN producer_id_t AS BIGINT NOT NULL +CHECK (VALUE >= -1); + +CREATE DOMAIN producer_epoch_t AS SMALLINT NOT NULL +CHECK (VALUE >= -1); + +CREATE DOMAIN sequence_t AS INT NOT NULL +CHECK (VALUE >= -1); + +CREATE TABLE logs ( + topic_id topic_id_t, + partition partition_t, + topic_name topic_name_t, + log_start_offset offset_t, + high_watermark offset_t, + PRIMARY KEY(topic_id, partition) +); + +-- The reasons why a file on the remote storage exists. +CREATE TYPE file_reason_t AS ENUM ( + -- Uploaded by a broker as the result of producing. + 'produce', + -- Uploaded by a broker as the result of merging. + 'merge' +); + +CREATE TYPE file_state_t AS ENUM ( + -- Uploaded by a broker, in use, etc. + 'uploaded', + -- Marked for deletion. + 'deleting' +); + +CREATE TABLE files ( + file_id BIGSERIAL PRIMARY KEY, + object_key object_key_t UNIQUE NOT NULL, + format format_t, + reason file_reason_t NOT NULL, + state file_state_t NOT NULL, + uploader_broker_id broker_id_t, + committed_at TIMESTAMP WITH TIME ZONE, + marked_for_deletion_at TIMESTAMP WITH TIME ZONE, + size byte_size_t +); + +CREATE INDEX files_by_state_only_deleting_idx ON files (state) WHERE state = 'deleting'; + +CREATE TABLE batches ( + batch_id BIGSERIAL PRIMARY KEY, + magic magic_t, + topic_id topic_id_t, + partition partition_t, + base_offset offset_t, + last_offset offset_t, + file_id BIGINT NOT NULL, + byte_offset byte_offset_t, + byte_size byte_size_t, + timestamp_type timestamp_type_t, + log_append_timestamp timestamp_t, + batch_max_timestamp timestamp_t, + CONSTRAINT fk_batches_logs FOREIGN KEY (topic_id, partition) REFERENCES logs(topic_id, partition) + ON DELETE NO ACTION ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED, -- allow deleting logs before batches + CONSTRAINT fk_batches_files FOREIGN KEY (file_id) REFERENCES files(file_id) ON DELETE RESTRICT ON UPDATE CASCADE +); +-- This index should also cover fk_batches_logs. +CREATE INDEX batches_by_last_offset_idx ON batches (topic_id, partition, last_offset); +-- This index covers fk_batches_files. +CREATE INDEX batches_by_file ON batches (file_id); + +CREATE TABLE producer_state ( + topic_id topic_id_t, + partition partition_t, + producer_id producer_id_t, + row_id BIGSERIAL, + producer_epoch producer_epoch_t, + base_sequence sequence_t, + last_sequence sequence_t, + assigned_offset offset_t, + batch_max_timestamp timestamp_t, + PRIMARY KEY (topic_id, partition, producer_id, row_id) +); + +CREATE TYPE commit_batch_request_v1 AS ( + magic magic_t, + topic_id topic_id_t, + partition partition_t, + byte_offset byte_offset_t, + byte_size byte_size_t, + base_offset offset_t, + last_offset offset_t, + timestamp_type timestamp_type_t, + batch_max_timestamp timestamp_t, + producer_id producer_id_t, + producer_epoch producer_epoch_t, + base_sequence sequence_t, + last_sequence sequence_t +); + +CREATE TYPE commit_batch_response_error_v1 AS ENUM ( + 'none', + -- errors + 'nonexistent_log', + 'invalid_producer_epoch', + 'sequence_out_of_order', + 'duplicate_batch' +); + +CREATE TYPE commit_batch_response_v1 AS ( + topic_id topic_id_t, + partition partition_t, + log_start_offset offset_nullable_t, + assigned_base_offset offset_nullable_t, + batch_timestamp timestamp_t, + error commit_batch_response_error_v1 +); + +CREATE FUNCTION commit_file_v1( + arg_object_key object_key_t, + arg_format format_t, + arg_uploader_broker_id broker_id_t, + arg_file_size byte_size_t, + arg_now TIMESTAMP WITH TIME ZONE, + arg_requests commit_batch_request_v1[] +) +RETURNS SETOF commit_batch_response_v1 LANGUAGE plpgsql VOLATILE AS $$ +DECLARE + l_new_file_id BIGINT; + l_request RECORD; + l_log logs%ROWTYPE; + l_duplicate RECORD; + l_assigned_offset offset_nullable_t; + l_new_high_watermark offset_nullable_t; + l_last_sequence_in_producer_epoch BIGINT; +BEGIN + INSERT INTO files (object_key, format, reason, state, uploader_broker_id, committed_at, size) + VALUES (arg_object_key, arg_format, 'produce', 'uploaded', arg_uploader_broker_id, arg_now, arg_file_size) + RETURNING file_id + INTO l_new_file_id; + + -- We use this temporary table to perform the write operations in loop on it first + -- and only then dump the result on the real table. This reduces the WAL pressure and latency of the function. + CREATE TEMPORARY TABLE logs_tmp + ON COMMIT DROP + AS + -- Extract the relevant logs into the temporary table and simultaneously lock them. + -- topic_name and log_start_offset aren't technically needed, but having them allows declaring `l_log logs%ROWTYPE`. + SELECT * + FROM logs + WHERE (topic_id, partition) IN (SELECT DISTINCT topic_id, partition FROM unnest(arg_requests)) + ORDER BY topic_id, partition -- ordering is important to prevent deadlocks + FOR UPDATE; + + FOR l_request IN + SELECT * + FROM unnest(arg_requests) + LOOP + -- A small optimization: select the log into a variable only if it's a different topic-partition. + -- Batches are sorted by topic-partitions, so this makes sense. + IF l_log.topic_id IS DISTINCT FROM l_request.topic_id + OR l_log.partition IS DISTINCT FROM l_request.partition THEN + + SELECT * + FROM logs_tmp + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + INTO l_log; + + IF NOT FOUND THEN + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'nonexistent_log')::commit_batch_response_v1; + CONTINUE; + END IF; + END IF; + + l_assigned_offset = l_log.high_watermark; + + -- Validate that the new request base sequence is not larger than the previous batch last sequence + IF l_request.producer_id > -1 AND l_request.producer_epoch > -1 + THEN + -- If there are previous batches for the producer, check that the producer epoch is not smaller than the last batch + IF EXISTS ( + SELECT 1 + FROM producer_state + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND producer_id = l_request.producer_id + AND producer_epoch > l_request.producer_epoch + ) THEN + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'invalid_producer_epoch')::commit_batch_response_v1; + CONTINUE; + END IF; + + SELECT MAX(last_sequence) + INTO l_last_sequence_in_producer_epoch + FROM producer_state + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND producer_id = l_request.producer_id + AND producer_epoch = l_request.producer_epoch; + + -- If there are previous batches for the producer + IF l_last_sequence_in_producer_epoch IS NULL THEN + -- If there are no previous batches for the producer, the base sequence must be 0 + IF l_request.base_sequence <> 0 + THEN + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'sequence_out_of_order')::commit_batch_response_v1; + CONTINUE; + END IF; + ELSE + -- Check for duplicates + SELECT * + FROM producer_state + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND producer_id = l_request.producer_id + AND producer_epoch = l_request.producer_epoch + AND base_sequence = l_request.base_sequence + AND last_sequence = l_request.last_sequence + INTO l_duplicate; + IF FOUND THEN + RETURN NEXT (l_request.topic_id, l_request.partition, l_log.log_start_offset, l_duplicate.assigned_offset, l_duplicate.batch_max_timestamp, 'duplicate_batch')::commit_batch_response_v1; + CONTINUE; + END IF; + + -- Check that the sequence is not out of order. + -- A sequence is out of order if the base sequence is not a continuation of the last sequence + -- or, in case of wraparound, the base sequence must be 0 and the last sequence must be 2147483647 (Integer.MAX_VALUE). + IF (l_request.base_sequence - 1) <> l_last_sequence_in_producer_epoch OR (l_last_sequence_in_producer_epoch = 2147483647 AND l_request.base_sequence <> 0) THEN + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'sequence_out_of_order')::commit_batch_response_v1; + CONTINUE; + END IF; + END IF; + + INSERT INTO producer_state ( + topic_id, partition, producer_id, + producer_epoch, base_sequence, last_sequence, assigned_offset, batch_max_timestamp + ) + VALUES ( + l_request.topic_id, l_request.partition, l_request.producer_id, + l_request.producer_epoch, l_request.base_sequence, l_request.last_sequence, l_assigned_offset, l_request.batch_max_timestamp + ); + -- Keep only the last 5 records. + -- 5 == org.apache.kafka.storage.internals.log.ProducerStateEntry.NUM_BATCHES_TO_RETAIN + DELETE FROM producer_state + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND producer_id = l_request.producer_id + AND row_id <= ( + SELECT row_id + FROM producer_state + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND producer_id = l_request.producer_id + ORDER BY row_id DESC + LIMIT 1 + OFFSET 5 + ); + END IF; + + UPDATE logs_tmp + SET high_watermark = high_watermark + (l_request.last_offset - l_request.base_offset + 1) + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + RETURNING high_watermark + INTO l_new_high_watermark; + + l_log.high_watermark = l_new_high_watermark; + + INSERT INTO batches ( + magic, + topic_id, partition, + base_offset, + last_offset, + file_id, + byte_offset, byte_size, + timestamp_type, log_append_timestamp, batch_max_timestamp + ) + VALUES ( + l_request.magic, + l_request.topic_id, l_request.partition, + l_assigned_offset, + l_new_high_watermark - 1, + l_new_file_id, + l_request.byte_offset, l_request.byte_size, + l_request.timestamp_type, + (EXTRACT(EPOCH FROM arg_now AT TIME ZONE 'UTC') * 1000)::BIGINT, + l_request.batch_max_timestamp + ); + + RETURN NEXT (l_request.topic_id, l_request.partition, l_log.log_start_offset, l_assigned_offset, l_request.batch_max_timestamp, 'none')::commit_batch_response_v1; + END LOOP; + + -- Transfer from the temporary to real table. + UPDATE logs + SET high_watermark = logs_tmp.high_watermark + FROM logs_tmp + WHERE logs.topic_id = logs_tmp.topic_id + AND logs.partition = logs_tmp.partition; + + IF NOT EXISTS (SELECT 1 FROM batches WHERE file_id = l_new_file_id LIMIT 1) THEN + PERFORM mark_file_to_delete_v1(arg_now, l_new_file_id); + END IF; +END; +$$ +; + +CREATE FUNCTION delete_topic_v1( + arg_now TIMESTAMP WITH TIME ZONE, + arg_topic_ids UUID[] +) +RETURNS VOID LANGUAGE plpgsql VOLATILE AS $$ +DECLARE + l_log RECORD; +BEGIN + FOR l_log IN + DELETE FROM logs + WHERE topic_id = ANY(arg_topic_ids) + RETURNING logs.* + LOOP + PERFORM delete_batch_v1(arg_now, batch_id) + FROM batches + WHERE topic_id = l_log.topic_id + AND partition = l_log.partition; + END LOOP; +END; +$$ +; + +CREATE DOMAIN bigint_not_nullable_t BIGINT +CHECK (VALUE IS NOT NULL); +CREATE TYPE delete_records_request_v1 AS ( + topic_id topic_id_t, + partition partition_t, + -- We need to accept values lower than -1 so we can return the correct offset_out_of_range error for them. + "offset" bigint_not_nullable_t +); + +CREATE TYPE delete_records_response_error_v1 AS ENUM ( + 'unknown_topic_or_partition', 'offset_out_of_range' +); + +CREATE TYPE delete_records_response_v1 AS ( + topic_id topic_id_t, + partition partition_t, + error delete_records_response_error_v1, + log_start_offset offset_nullable_t +); + +CREATE FUNCTION delete_records_v1( + arg_now TIMESTAMP WITH TIME ZONE, + arg_requests delete_records_request_v1[] +) +RETURNS SETOF delete_records_response_v1 LANGUAGE plpgsql VOLATILE AS $$ +DECLARE + l_request RECORD; + l_log RECORD; + l_converted_offset BIGINT = -1; +BEGIN + FOR l_request IN + SELECT * + FROM unnest(arg_requests) + LOOP + SELECT * + FROM logs + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + ORDER BY topic_id, partition -- ordering is important to prevent deadlocks + FOR UPDATE + INTO l_log; + + IF NOT FOUND THEN + RETURN NEXT (l_request.topic_id, l_request.partition, 'unknown_topic_or_partition', NULL)::delete_records_response_v1; + CONTINUE; + END IF; + + l_converted_offset = CASE + -- -1 = org.apache.kafka.common.requests.DeleteRecordsRequest.HIGH_WATERMARK + WHEN l_request.offset = -1 THEN l_log.high_watermark + ELSE l_request.offset + END; + + IF l_converted_offset < 0 OR l_converted_offset > l_log.high_watermark THEN + RETURN NEXT (l_request.topic_id, l_request.partition, 'offset_out_of_range', NULL)::delete_records_response_v1; + CONTINUE; + END IF; + + IF l_converted_offset > l_log.log_start_offset THEN + UPDATE logs + SET log_start_offset = l_converted_offset + WHERE topic_id = l_log.topic_id + AND partition = l_log.partition; + l_log.log_start_offset = l_converted_offset; + END IF; + + PERFORM delete_batch_v1(arg_now, batches.batch_id) + FROM batches + WHERE topic_id = l_log.topic_id + AND partition = l_log.partition + AND last_offset < l_log.log_start_offset; + + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, l_log.log_start_offset)::delete_records_response_v1; + END LOOP; +END; +$$ +; + +CREATE FUNCTION delete_batch_v1( + arg_now TIMESTAMP WITH TIME ZONE, + arg_batch_id BIGINT +) +RETURNS VOID LANGUAGE plpgsql VOLATILE AS $$ +DECLARE + l_file_id BIGINT; +BEGIN + DELETE FROM batches + WHERE batch_id = arg_batch_id + RETURNING file_id + INTO l_file_id; + + IF NOT EXISTS (SELECT 1 FROM batches WHERE file_id = l_file_id LIMIT 1) THEN + PERFORM mark_file_to_delete_v1(arg_now, l_file_id); + END IF; +END; +$$ +; + +CREATE FUNCTION mark_file_to_delete_v1( + arg_now TIMESTAMP WITH TIME ZONE, + arg_file_id BIGINT +) +RETURNS VOID LANGUAGE plpgsql VOLATILE AS $$ +BEGIN + UPDATE files + SET state = 'deleting', + marked_for_deletion_at = arg_now + WHERE file_id = arg_file_id; +END; +$$ +; + +CREATE FUNCTION delete_files_v1( + arg_paths object_key_t[] +) +RETURNS VOID LANGUAGE plpgsql VOLATILE AS $$ +BEGIN + WITH file_ids_to_delete AS ( + SELECT file_id + FROM files + WHERE object_key = ANY(arg_paths) + AND state = 'deleting' + ), + deleted_work_items AS ( + DELETE FROM file_merge_work_item_files + WHERE file_id IN (SELECT file_id FROM file_ids_to_delete) + ) + DELETE FROM files + WHERE file_id IN (SELECT file_id FROM file_ids_to_delete); +END; +$$ +; + +CREATE TYPE list_offsets_request_v1 AS ( + topic_id topic_id_t, + partition partition_t, + timestamp timestamp_t +); + +CREATE TYPE list_offsets_response_error_v1 AS ENUM ( + 'none', + -- errors + 'unknown_topic_or_partition', + 'unsupported_special_timestamp' +); + +CREATE TYPE list_offsets_response_v1 AS ( + topic_id topic_id_t, + partition partition_t, + timestamp timestamp_t, + "offset" offset_with_minus_one_t, + error list_offsets_response_error_v1 +); + +CREATE FUNCTION list_offsets_v1( + arg_requests list_offsets_request_v1[] +) +RETURNS SETOF list_offsets_response_v1 LANGUAGE plpgsql STABLE AS $$ +DECLARE + l_request RECORD; + l_log RECORD; + l_max_timestamp BIGINT = NULL; + l_found_timestamp BIGINT = NULL; + l_found_timestamp_offset BIGINT = NULL; +BEGIN + FOR l_request IN + SELECT * + FROM unnest(arg_requests) + LOOP + -- Note that we're not doing locking ("FOR UPDATE") here, as it's not really needed for this read-only function. + SELECT * + FROM logs + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + INTO l_log; + + IF NOT FOUND THEN + -- -1 = org.apache.kafka.common.record.RecordBatch.NO_TIMESTAMP + RETURN NEXT (l_request.topic_id, l_request.partition, -1, -1, 'unknown_topic_or_partition')::list_offsets_response_v1; + CONTINUE; + END IF; + + -- -2 = org.apache.kafka.common.requests.ListOffsetsRequest.EARLIEST_TIMESTAMP + -- -4 = org.apache.kafka.common.requests.ListOffsetsRequest.EARLIEST_LOCAL_TIMESTAMP + IF l_request.timestamp = -2 OR l_request.timestamp = -4 THEN + -- -1 = org.apache.kafka.common.record.RecordBatch.NO_TIMESTAMP + RETURN NEXT (l_request.topic_id, l_request.partition, -1, l_log.log_start_offset, 'none')::list_offsets_response_v1; + CONTINUE; + END IF; + + -- -1 = org.apache.kafka.common.requests.ListOffsetsRequest.LATEST_TIMESTAMP + IF l_request.timestamp = -1 THEN + -- -1 = org.apache.kafka.common.record.RecordBatch.NO_TIMESTAMP + RETURN NEXT (l_request.topic_id, l_request.partition, -1, l_log.high_watermark, 'none')::list_offsets_response_v1; + CONTINUE; + END IF; + + -- -3 = org.apache.kafka.common.requests.ListOffsetsRequest.MAX_TIMESTAMP + IF l_request.timestamp = -3 THEN + SELECT MAX(batch_timestamp(timestamp_type, batch_max_timestamp, log_append_timestamp)) + INTO l_max_timestamp + FROM batches + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition; + + SELECT last_offset + INTO l_found_timestamp_offset + FROM batches + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND batch_timestamp(timestamp_type, batch_max_timestamp, log_append_timestamp) = l_max_timestamp + ORDER BY batch_id + LIMIT 1; + + IF l_found_timestamp_offset IS NULL THEN + -- -1 = org.apache.kafka.common.record.RecordBatch.NO_TIMESTAMP + RETURN NEXT (l_request.topic_id, l_request.partition, -1, -1, 'none')::list_offsets_response_v1; + ELSE + RETURN NEXT (l_request.topic_id, l_request.partition, l_max_timestamp, l_found_timestamp_offset, 'none')::list_offsets_response_v1; + END IF; + CONTINUE; + END IF; + + -- -5 = org.apache.kafka.common.requests.ListOffsetsRequest.LATEST_TIERED_TIMESTAMP + IF l_request.timestamp = -5 THEN + -- -1 = org.apache.kafka.common.record.RecordBatch.NO_TIMESTAMP + RETURN NEXT (l_request.topic_id, l_request.partition, -1, -1, 'none')::list_offsets_response_v1; + CONTINUE; + END IF; + + IF l_request.timestamp < 0 THEN + -- Unsupported special timestamp. + -- -1 = org.apache.kafka.common.record.RecordBatch.NO_TIMESTAMP + RETURN NEXT (l_request.topic_id, l_request.partition, -1, -1, 'unsupported_special_timestamp')::list_offsets_response_v1; + CONTINUE; + END IF; + + SELECT batch_timestamp(timestamp_type, batch_max_timestamp, log_append_timestamp), base_offset + INTO l_found_timestamp, l_found_timestamp_offset + FROM batches + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND batch_timestamp(timestamp_type, batch_max_timestamp, log_append_timestamp) >= l_request.timestamp + ORDER BY batch_id + LIMIT 1; + + IF l_found_timestamp_offset IS NULL THEN + -- -1 = org.apache.kafka.common.record.RecordBatch.NO_TIMESTAMP + RETURN NEXT (l_request.topic_id, l_request.partition, -1, -1, 'none')::list_offsets_response_v1; + ELSE + RETURN NEXT ( + l_request.topic_id, l_request.partition, l_found_timestamp, + GREATEST(l_found_timestamp_offset, l_log.log_start_offset), + 'none' + )::list_offsets_response_v1; + END IF; + CONTINUE; + END LOOP; +END; +$$ +; + +CREATE TABLE file_merge_work_items ( + work_item_id BIGSERIAL PRIMARY KEY, + created_at TIMESTAMP WITH TIME ZONE +); + +CREATE TABLE file_merge_work_item_files ( + work_item_id BIGINT REFERENCES file_merge_work_items(work_item_id), + file_id BIGINT REFERENCES files(file_id), + PRIMARY KEY (work_item_id, file_id) +); +CREATE INDEX file_merge_work_item_files_by_file ON file_merge_work_item_files (file_id); + +CREATE TYPE batch_metadata_v1 AS ( + magic magic_t, + topic_id topic_id_t, + topic_name topic_name_t, + partition partition_t, + byte_offset byte_offset_t, + byte_size byte_size_t, + base_offset offset_t, + last_offset offset_t, + log_append_timestamp timestamp_t, + batch_max_timestamp timestamp_t, + timestamp_type timestamp_type_t +); + +CREATE TYPE file_merge_work_item_response_batch_v1 AS ( + batch_id BIGINT, + object_key object_key_t, + metadata batch_metadata_v1 +); + +CREATE TYPE file_merge_work_item_response_file_v1 AS ( + file_id BIGINT, + object_key object_key_t, + format format_t, + size byte_size_t, + batches file_merge_work_item_response_batch_v1[] +); + +CREATE TYPE file_merge_work_item_response_v1 AS ( + work_item_id BIGINT, + created_at TIMESTAMP WITH TIME ZONE, + file_ids file_merge_work_item_response_file_v1[] +); + +CREATE FUNCTION get_file_merge_work_item_v1( + arg_now TIMESTAMP WITH TIME ZONE, + arg_expiration_interval INTERVAL, + arg_merge_file_size_threshold byte_size_t +) +RETURNS SETOF file_merge_work_item_response_v1 LANGUAGE plpgsql VOLATILE AS $$ +DECLARE + l_expired_work_item RECORD; + l_file_ids BIGINT[]; + l_new_work_item_id BIGINT; + l_existing_file_id BIGINT; +BEGIN + -- Delete any expired work items + FOR l_expired_work_item IN + SELECT * + FROM file_merge_work_items + WHERE created_at <= arg_now - arg_expiration_interval + LOOP + DELETE FROM file_merge_work_item_files + WHERE work_item_id = l_expired_work_item.work_item_id; + + DELETE FROM file_merge_work_items + WHERE work_item_id = l_expired_work_item.work_item_id; + END LOOP; + + -- Identify files to merge based on threshold size + WITH file_candidates AS ( + SELECT + file_id, + committed_at, + size + FROM files + WHERE state = 'uploaded' + AND reason != 'merge' + AND NOT EXISTS ( + SELECT 1 + FROM file_merge_work_item_files + WHERE file_id = files.file_id + ) + ), + running_sums AS ( + SELECT + file_id, + size, + SUM(size) OVER ( + ORDER BY committed_at, file_id + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) as cumulative_size, + SUM(size) OVER ( + ORDER BY committed_at, file_id + ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING + ) as previous_sum + FROM file_candidates + ), + threshold_point AS ( + SELECT MIN(file_id) as last_file_id + FROM running_sums + WHERE cumulative_size >= arg_merge_file_size_threshold + ) + SELECT array_agg(rs.file_id ORDER BY rs.file_id) + INTO l_file_ids + FROM running_sums rs + WHERE rs.file_id <= (SELECT last_file_id FROM threshold_point); + + -- Return if no files to merge + IF l_file_ids IS NULL OR array_length(l_file_ids, 1) = 0 THEN + RETURN; + END IF; + + -- Create new work item + INSERT INTO file_merge_work_items(created_at) + VALUES (arg_now) + RETURNING work_item_id + INTO l_new_work_item_id; + + -- Add files to work item + FOREACH l_existing_file_id IN ARRAY l_file_ids + LOOP + INSERT INTO file_merge_work_item_files(work_item_id, file_id) + VALUES (l_new_work_item_id, l_existing_file_id); + END LOOP; + + -- Return work item + RETURN NEXT ( + l_new_work_item_id, + arg_now, + ARRAY( + SELECT ( + f.file_id, + files.object_key, + files.format, + files.size, + ARRAY( + SELECT ( + batches.batch_id, + files.object_key, + ( + batches.magic, + logs.topic_id, + logs.topic_name, + batches.partition, + batches.byte_offset, + batches.byte_size, + batches.base_offset, + batches.last_offset, + batches.log_append_timestamp, + batches.batch_max_timestamp, + batches.timestamp_type + )::batch_metadata_v1 + )::file_merge_work_item_response_batch_v1 + FROM batches + JOIN files ON batches.file_id = files.file_id + JOIN logs ON batches.topic_id = logs.topic_id AND batches.partition = logs.partition + WHERE batches.file_id = f.file_id + ) + )::file_merge_work_item_response_file_v1 + FROM unnest(l_file_ids) AS f(file_id) + JOIN files ON f.file_id = files.file_id + ) + )::file_merge_work_item_response_v1; +END; +$$ +; + +CREATE TYPE commit_file_merge_work_item_batch_v1 AS ( + metadata batch_metadata_v1, + parent_batch_ids BIGINT[] +); + +CREATE TYPE commit_file_merge_work_item_error_v1 AS ENUM ( + 'none', + 'file_merge_work_item_not_found', + 'invalid_parent_batch_count', + 'batch_not_part_of_work_item' +); + +CREATE TYPE commit_file_merge_work_item_response_v1 AS ( + error commit_file_merge_work_item_error_v1, + error_batch commit_file_merge_work_item_batch_v1 +); + +CREATE FUNCTION commit_file_merge_work_item_v1( + arg_now TIMESTAMP WITH TIME ZONE, + arg_existing_work_item_id BIGINT, + arg_object_key object_key_t, + arg_format format_t, + arg_uploader_broker_id broker_id_t, + arg_file_size byte_size_t, + arg_merge_file_batches commit_file_merge_work_item_batch_v1[] +) +RETURNS commit_file_merge_work_item_response_v1 LANGUAGE plpgsql VOLATILE AS $$ +DECLARE + l_work_item RECORD; + l_new_file_id BIGINT; + l_found_batches_size BIGINT; + l_work_item_file RECORD; + l_merge_file_batch commit_file_merge_work_item_batch_v1; +BEGIN + -- check that the work item exists + SELECT * FROM file_merge_work_items + WHERE work_item_id = arg_existing_work_item_id + FOR UPDATE + INTO l_work_item; + + IF NOT FOUND THEN + -- do not remove the file if this condition is hit because it may be a retry from a valid work item + -- only delete the object key when a failure condition is found + + RETURN ROW('file_merge_work_item_not_found'::commit_file_merge_work_item_error_v1, NULL)::commit_file_merge_work_item_response_v1; + END IF; + + -- check that the number of parent batches is 1 (limitation of the current implementation) + FOR l_merge_file_batch IN + SELECT * + FROM unnest(arg_merge_file_batches) b + LOOP + IF array_length(l_merge_file_batch.parent_batch_ids, 1) IS NULL OR array_length(l_merge_file_batch.parent_batch_ids, 1) != 1 THEN + -- insert new empty file to be deleted + INSERT INTO files (object_key, format, reason, state, uploader_broker_id, committed_at, size) + VALUES (arg_object_key, arg_format, 'merge', 'uploaded', arg_uploader_broker_id, arg_now, 0) + RETURNING file_id + INTO l_new_file_id; + PERFORM mark_file_to_delete_v1(arg_now, l_new_file_id); + + -- Do not remove the work item, because another non-buggy worker may eventually succeed. + + RETURN ROW('invalid_parent_batch_count'::commit_file_merge_work_item_error_v1, l_merge_file_batch)::commit_file_merge_work_item_response_v1; + END IF; + END LOOP; + + -- Lock logs to prevent concurrent modifications. + PERFORM + FROM logs + WHERE (topic_id, partition) IN ( + SELECT logs.topic_id, logs.partition + FROM unnest(arg_merge_file_batches) AS mfb + INNER JOIN batches ON mfb.parent_batch_ids[1] = batches.batch_id + INNER JOIN logs ON batches.topic_id = logs.topic_id AND batches.partition = logs.partition + ) + ORDER BY topic_id, partition -- ordering is important to prevent deadlocks + FOR UPDATE; + + -- filter arg_merge_file_batches to only include the ones where logs exist + arg_merge_file_batches := ARRAY( + SELECT b + FROM unnest(arg_merge_file_batches) b + JOIN batches ON b.parent_batch_ids[1] = batches.batch_id + JOIN logs ON batches.topic_id = logs.topic_id AND batches.partition = logs.partition + ); + + -- check if the found batch file id is part of the work item + SELECT SUM(batches.byte_size) + FROM batches + WHERE EXISTS ( + SELECT 1 + FROM unnest(arg_merge_file_batches) b + WHERE batch_id = ANY(b.parent_batch_ids) + ) + INTO l_found_batches_size; + + IF l_found_batches_size IS NULL THEN + -- insert new empty file + INSERT INTO files (object_key, format, reason, state, uploader_broker_id, committed_at, size) + VALUES (arg_object_key, arg_format, 'merge', 'uploaded', arg_uploader_broker_id, arg_now, 0) + RETURNING file_id + INTO l_new_file_id; + PERFORM mark_file_to_delete_v1(arg_now, l_new_file_id); + + -- delete work item + PERFORM release_file_merge_work_item_v1(arg_existing_work_item_id); + + RETURN ROW('none'::commit_file_merge_work_item_error_v1, NULL)::commit_file_merge_work_item_response_v1; + END IF; + + -- check that all parent batch files are part of work item files + FOR l_merge_file_batch IN + SELECT * + FROM unnest(arg_merge_file_batches) b + WHERE NOT EXISTS ( + SELECT 1 + FROM file_merge_work_item_files + JOIN batches ON file_merge_work_item_files.file_id = batches.file_id + WHERE work_item_id = arg_existing_work_item_id + AND batch_id = ANY(b.parent_batch_ids) + ) + LOOP + -- insert new empty file to be deleted + INSERT INTO files (object_key, format, reason, state, uploader_broker_id, committed_at, size) + VALUES (arg_object_key, arg_format, 'merge', 'uploaded', arg_uploader_broker_id, arg_now, 0) + RETURNING file_id + INTO l_new_file_id; + PERFORM mark_file_to_delete_v1(arg_now, l_new_file_id); + + -- Do not remove the work item, because another non-buggy worker may eventually succeed. + + RETURN ROW('batch_not_part_of_work_item'::commit_file_merge_work_item_error_v1, l_merge_file_batch)::commit_file_merge_work_item_response_v1; + END LOOP; + + -- delete old files + PERFORM mark_file_to_delete_v1(arg_now, file_id) + FROM file_merge_work_item_files + WHERE work_item_id = arg_existing_work_item_id; + + -- insert new file + INSERT INTO files (object_key, format, reason, state, uploader_broker_id, committed_at, size) + VALUES (arg_object_key, arg_format, 'merge', 'uploaded', arg_uploader_broker_id, arg_now, arg_file_size) + RETURNING file_id + INTO l_new_file_id; + + -- delete old batches + DELETE FROM batches + WHERE EXISTS ( + SELECT 1 + FROM unnest(arg_merge_file_batches) b + WHERE batch_id = ANY(b.parent_batch_ids) + ); + + -- insert new batches + INSERT INTO batches ( + magic, + topic_id, partition, + base_offset, + last_offset, + file_id, + byte_offset, byte_size, + log_append_timestamp, + batch_max_timestamp, + timestamp_type + ) + SELECT DISTINCT + (unnest(arg_merge_file_batches)).metadata.magic, + (unnest(arg_merge_file_batches)).metadata.topic_id, + (unnest(arg_merge_file_batches)).metadata.partition, + (unnest(arg_merge_file_batches)).metadata.base_offset, + (unnest(arg_merge_file_batches)).metadata.last_offset, + l_new_file_id, + (unnest(arg_merge_file_batches)).metadata.byte_offset, + (unnest(arg_merge_file_batches)).metadata.byte_size, + (unnest(arg_merge_file_batches)).metadata.log_append_timestamp, + (unnest(arg_merge_file_batches)).metadata.batch_max_timestamp, + (unnest(arg_merge_file_batches)).metadata.timestamp_type + FROM unnest(arg_merge_file_batches) + ORDER BY (unnest(arg_merge_file_batches)).metadata.topic_id, + (unnest(arg_merge_file_batches)).metadata.partition, + (unnest(arg_merge_file_batches)).metadata.base_offset; + + -- delete work item + PERFORM release_file_merge_work_item_v1(arg_existing_work_item_id); + + RETURN ROW('none'::commit_file_merge_work_item_error_v1, NULL)::commit_file_merge_work_item_response_v1; +END; +$$ +; + +CREATE TYPE release_file_merge_work_item_error_v1 AS ENUM ( + 'none', + 'file_merge_work_item_not_found' +); + +CREATE TYPE release_file_merge_work_item_response_v1 AS ( + error release_file_merge_work_item_error_v1 +); + +CREATE FUNCTION release_file_merge_work_item_v1( + arg_existing_work_item_id BIGINT +) +RETURNS release_file_merge_work_item_response_v1 LANGUAGE plpgsql VOLATILE AS $$ +BEGIN + PERFORM * FROM file_merge_work_items + WHERE work_item_id = arg_existing_work_item_id + FOR UPDATE; + + IF NOT FOUND THEN + RETURN ROW('file_merge_work_item_not_found'::release_file_merge_work_item_error_v1)::release_file_merge_work_item_response_v1; + END IF; + + DELETE FROM file_merge_work_item_files + WHERE work_item_id = arg_existing_work_item_id; + + DELETE FROM file_merge_work_items + WHERE work_item_id = arg_existing_work_item_id; + + RETURN ROW('none'::release_file_merge_work_item_error_v1)::release_file_merge_work_item_response_v1; +END; +$$ +; + +CREATE FUNCTION batch_timestamp( + arg_timestamp_type timestamp_type_t, + arg_batch_max_timestamp timestamp_t, + arg_log_append_timestamp timestamp_t +) +RETURNS timestamp_t LANGUAGE plpgsql IMMUTABLE AS $$ +BEGIN + -- See how timestamps are assigned in + -- https://github.com/aiven/inkless/blob/e124d3975bdb3a9ec85eee2fba7a1b0a6967d3a6/storage/src/main/java/org/apache/kafka/storage/internals/log/LogValidator.java#L271-L276 + RETURN CASE arg_timestamp_type + WHEN 1 THEN arg_log_append_timestamp -- org.apache.kafka.common.record.TimestampType.LOG_APPEND_TIME + ELSE arg_batch_max_timestamp + END; +END +$$ +; diff --git a/storage/inkless/bin/main/db/migration/V2__Speed_up_delete_topic_and_delete_records.sql b/storage/inkless/bin/main/db/migration/V2__Speed_up_delete_topic_and_delete_records.sql new file mode 100644 index 0000000000..f4db7f1171 --- /dev/null +++ b/storage/inkless/bin/main/db/migration/V2__Speed_up_delete_topic_and_delete_records.sql @@ -0,0 +1,121 @@ +-- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ + +CREATE OR REPLACE FUNCTION delete_topic_v1( + arg_now TIMESTAMP WITH TIME ZONE, + arg_topic_ids UUID[] +) +RETURNS VOID LANGUAGE plpgsql VOLATILE AS $$ +BEGIN + -- First delete the logs of the deleted topics. + DELETE FROM logs + WHERE topic_id = ANY(arg_topic_ids); + + DROP TABLE IF EXISTS affected_files; + -- Delete the affected batches and remember what files are affected. + -- We need to separate deleting batches and finding empty files because if they are in the same requests as CTE, + -- the query below will see the MVCC snapshot from before deleting batches. + CREATE TEMPORARY TABLE affected_files + ON COMMIT DROP + AS + WITH deleted_batches AS ( + DELETE FROM batches + WHERE topic_id = ANY(arg_topic_ids) + RETURNING file_id + ) + SELECT file_id + FROM deleted_batches; + + -- Out of the affected files, select those that are now empty (i.e. no batch refers to them) + -- and mark them for deletion. + PERFORM mark_file_to_delete_v1(arg_now, file_id) + FROM ( + SELECT DISTINCT af.file_id + FROM affected_files AS af + LEFT JOIN batches AS b ON af.file_id = b.file_id + WHERE b.batch_id IS NULL + ); +END; +$$ +; + +CREATE OR REPLACE FUNCTION delete_records_v1( + arg_now TIMESTAMP WITH TIME ZONE, + arg_requests delete_records_request_v1[] +) +RETURNS SETOF delete_records_response_v1 LANGUAGE plpgsql VOLATILE AS $$ +DECLARE + l_request RECORD; + l_log RECORD; + l_converted_offset BIGINT = -1; +BEGIN + + DROP TABLE IF EXISTS affected_files; + CREATE TEMPORARY TABLE affected_files ( + file_id BIGINT PRIMARY KEY + ) + ON COMMIT DROP; + + FOR l_request IN + SELECT * + FROM unnest(arg_requests) + LOOP + SELECT * + FROM logs + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + ORDER BY topic_id, partition -- ordering is important to prevent deadlocks + FOR UPDATE + INTO l_log; + + IF NOT FOUND THEN + RETURN NEXT (l_request.topic_id, l_request.partition, 'unknown_topic_or_partition', NULL)::delete_records_response_v1; + CONTINUE; + END IF; + + l_converted_offset = CASE + -- -1 = org.apache.kafka.common.requests.DeleteRecordsRequest.HIGH_WATERMARK + WHEN l_request.offset = -1 THEN l_log.high_watermark + ELSE l_request.offset + END; + + IF l_converted_offset < 0 OR l_converted_offset > l_log.high_watermark THEN + RETURN NEXT (l_request.topic_id, l_request.partition, 'offset_out_of_range', NULL)::delete_records_response_v1; + CONTINUE; + END IF; + + IF l_converted_offset > l_log.log_start_offset THEN + UPDATE logs + SET log_start_offset = l_converted_offset + WHERE topic_id = l_log.topic_id + AND partition = l_log.partition; + l_log.log_start_offset = l_converted_offset; + END IF; + + -- Delete the affected batches and remember what files are affected. + WITH affected_files_local AS ( + DELETE FROM batches + WHERE topic_id = l_log.topic_id + AND partition = l_log.partition + AND last_offset < l_log.log_start_offset + RETURNING file_id + ) + INSERT INTO affected_files (file_id) + SELECT DISTINCT file_id + FROM affected_files_local + ON CONFLICT DO NOTHING; -- ignore duplicates + + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, l_log.log_start_offset)::delete_records_response_v1; + END LOOP; + + -- Out of the affected files, select those that are now empty (i.e. no batch refers to them) + -- and mark them for deletion. + PERFORM mark_file_to_delete_v1(arg_now, file_id) + FROM ( + SELECT DISTINCT af.file_id + FROM affected_files AS af + LEFT JOIN batches AS b ON af.file_id = b.file_id + WHERE b.batch_id IS NULL + ); +END; +$$ +; diff --git a/storage/inkless/bin/main/db/migration/V3__Add_logs_size.sql b/storage/inkless/bin/main/db/migration/V3__Add_logs_size.sql new file mode 100644 index 0000000000..f32e9539e7 --- /dev/null +++ b/storage/inkless/bin/main/db/migration/V3__Add_logs_size.sql @@ -0,0 +1,300 @@ +-- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ + +-- 1. Introduce the `byte_size` column. + +ALTER TABLE logs +ADD COLUMN byte_size byte_size_t DEFAULT 0; + +UPDATE logs +SET byte_size = aggregated.total_byte_size +FROM ( + SELECT topic_id, partition, SUM(byte_size) AS total_byte_size + FROM batches + GROUP BY topic_id, partition +) AS aggregated +WHERE logs.topic_id = aggregated.topic_id + AND logs.partition = aggregated.partition; + +-- 2. Update functions to support `byte_size`. + +CREATE OR REPLACE FUNCTION commit_file_v1( + arg_object_key object_key_t, + arg_format format_t, + arg_uploader_broker_id broker_id_t, + arg_file_size byte_size_t, + arg_now TIMESTAMP WITH TIME ZONE, + arg_requests commit_batch_request_v1[] +) +RETURNS SETOF commit_batch_response_v1 LANGUAGE plpgsql VOLATILE AS $$ +DECLARE + l_new_file_id BIGINT; + l_request RECORD; + l_log logs%ROWTYPE; + l_duplicate RECORD; + l_assigned_offset offset_nullable_t; + l_new_high_watermark offset_nullable_t; + l_last_sequence_in_producer_epoch BIGINT; +BEGIN + INSERT INTO files (object_key, format, reason, state, uploader_broker_id, committed_at, size) + VALUES (arg_object_key, arg_format, 'produce', 'uploaded', arg_uploader_broker_id, arg_now, arg_file_size) + RETURNING file_id + INTO l_new_file_id; + + -- We use this temporary table to perform the write operations in loop on it first + -- and only then dump the result on the real table. This reduces the WAL pressure and latency of the function. + DROP TABLE IF EXISTS logs_tmp; + CREATE TEMPORARY TABLE logs_tmp + ON COMMIT DROP + AS + -- Extract the relevant logs into the temporary table and simultaneously lock them. + -- topic_name and log_start_offset aren't technically needed, but having them allows declaring `l_log logs%ROWTYPE`. + SELECT * + FROM logs + WHERE (topic_id, partition) IN (SELECT DISTINCT topic_id, partition FROM unnest(arg_requests)) + ORDER BY topic_id, partition -- ordering is important to prevent deadlocks + FOR UPDATE; + + FOR l_request IN + SELECT * + FROM unnest(arg_requests) + LOOP + -- A small optimization: select the log into a variable only if it's a different topic-partition. + -- Batches are sorted by topic-partitions, so this makes sense. + IF l_log.topic_id IS DISTINCT FROM l_request.topic_id + OR l_log.partition IS DISTINCT FROM l_request.partition THEN + + SELECT * + FROM logs_tmp + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + INTO l_log; + + IF NOT FOUND THEN + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'nonexistent_log')::commit_batch_response_v1; + CONTINUE; + END IF; + END IF; + + l_assigned_offset = l_log.high_watermark; + + -- Validate that the new request base sequence is not larger than the previous batch last sequence + IF l_request.producer_id > -1 AND l_request.producer_epoch > -1 + THEN + -- If there are previous batches for the producer, check that the producer epoch is not smaller than the last batch + IF EXISTS ( + SELECT 1 + FROM producer_state + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND producer_id = l_request.producer_id + AND producer_epoch > l_request.producer_epoch + ) THEN + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'invalid_producer_epoch')::commit_batch_response_v1; + CONTINUE; + END IF; + + SELECT MAX(last_sequence) + INTO l_last_sequence_in_producer_epoch + FROM producer_state + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND producer_id = l_request.producer_id + AND producer_epoch = l_request.producer_epoch; + + -- If there are previous batches for the producer + IF l_last_sequence_in_producer_epoch IS NULL THEN + -- If there are no previous batches for the producer, the base sequence must be 0 + IF l_request.base_sequence <> 0 + THEN + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'sequence_out_of_order')::commit_batch_response_v1; + CONTINUE; + END IF; + ELSE + -- Check for duplicates + SELECT * + FROM producer_state + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND producer_id = l_request.producer_id + AND producer_epoch = l_request.producer_epoch + AND base_sequence = l_request.base_sequence + AND last_sequence = l_request.last_sequence + INTO l_duplicate; + IF FOUND THEN + RETURN NEXT (l_request.topic_id, l_request.partition, l_log.log_start_offset, l_duplicate.assigned_offset, l_duplicate.batch_max_timestamp, 'duplicate_batch')::commit_batch_response_v1; + CONTINUE; + END IF; + + -- Check that the sequence is not out of order. + -- A sequence is out of order if the base sequence is not a continuation of the last sequence + -- or, in case of wraparound, the base sequence must be 0 and the last sequence must be 2147483647 (Integer.MAX_VALUE). + IF (l_request.base_sequence - 1) <> l_last_sequence_in_producer_epoch OR (l_last_sequence_in_producer_epoch = 2147483647 AND l_request.base_sequence <> 0) THEN + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'sequence_out_of_order')::commit_batch_response_v1; + CONTINUE; + END IF; + END IF; + + INSERT INTO producer_state ( + topic_id, partition, producer_id, + producer_epoch, base_sequence, last_sequence, assigned_offset, batch_max_timestamp + ) + VALUES ( + l_request.topic_id, l_request.partition, l_request.producer_id, + l_request.producer_epoch, l_request.base_sequence, l_request.last_sequence, l_assigned_offset, l_request.batch_max_timestamp + ); + -- Keep only the last 5 records. + -- 5 == org.apache.kafka.storage.internals.log.ProducerStateEntry.NUM_BATCHES_TO_RETAIN + DELETE FROM producer_state + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND producer_id = l_request.producer_id + AND row_id <= ( + SELECT row_id + FROM producer_state + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND producer_id = l_request.producer_id + ORDER BY row_id DESC + LIMIT 1 + OFFSET 5 + ); + END IF; + + UPDATE logs_tmp + SET high_watermark = high_watermark + (l_request.last_offset - l_request.base_offset + 1), + byte_size = byte_size + l_request.byte_size + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + RETURNING high_watermark + INTO l_new_high_watermark; + + l_log.high_watermark = l_new_high_watermark; + + INSERT INTO batches ( + magic, + topic_id, partition, + base_offset, + last_offset, + file_id, + byte_offset, byte_size, + timestamp_type, log_append_timestamp, batch_max_timestamp + ) + VALUES ( + l_request.magic, + l_request.topic_id, l_request.partition, + l_assigned_offset, + l_new_high_watermark - 1, + l_new_file_id, + l_request.byte_offset, l_request.byte_size, + l_request.timestamp_type, + (EXTRACT(EPOCH FROM arg_now AT TIME ZONE 'UTC') * 1000)::BIGINT, + l_request.batch_max_timestamp + ); + + RETURN NEXT (l_request.topic_id, l_request.partition, l_log.log_start_offset, l_assigned_offset, l_request.batch_max_timestamp, 'none')::commit_batch_response_v1; + END LOOP; + + -- Transfer from the temporary to real table. + UPDATE logs + SET high_watermark = logs_tmp.high_watermark, + byte_size = logs_tmp.byte_size + FROM logs_tmp + WHERE logs.topic_id = logs_tmp.topic_id + AND logs.partition = logs_tmp.partition; + + IF NOT EXISTS (SELECT 1 FROM batches WHERE file_id = l_new_file_id LIMIT 1) THEN + PERFORM mark_file_to_delete_v1(arg_now, l_new_file_id); + END IF; +END; +$$ +; + + +CREATE OR REPLACE FUNCTION delete_records_v1( + arg_now TIMESTAMP WITH TIME ZONE, + arg_requests delete_records_request_v1[] +) +RETURNS SETOF delete_records_response_v1 LANGUAGE plpgsql VOLATILE AS $$ +DECLARE + l_request RECORD; + l_log RECORD; + l_converted_offset BIGINT = -1; + l_deleted_bytes BIGINT; +BEGIN + + DROP TABLE IF EXISTS affected_files; + CREATE TEMPORARY TABLE affected_files ( + file_id BIGINT PRIMARY KEY + ) + ON COMMIT DROP; + + FOR l_request IN + SELECT * + FROM unnest(arg_requests) + LOOP + SELECT * + FROM logs + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + ORDER BY topic_id, partition -- ordering is important to prevent deadlocks + FOR UPDATE + INTO l_log; + + IF NOT FOUND THEN + RETURN NEXT (l_request.topic_id, l_request.partition, 'unknown_topic_or_partition', NULL)::delete_records_response_v1; + CONTINUE; + END IF; + + l_converted_offset = CASE + -- -1 = org.apache.kafka.common.requests.DeleteRecordsRequest.HIGH_WATERMARK + WHEN l_request.offset = -1 THEN l_log.high_watermark + ELSE l_request.offset + END; + + IF l_converted_offset < 0 OR l_converted_offset > l_log.high_watermark THEN + RETURN NEXT (l_request.topic_id, l_request.partition, 'offset_out_of_range', NULL)::delete_records_response_v1; + CONTINUE; + END IF; + + l_converted_offset = GREATEST(l_converted_offset, l_log.log_start_offset); + + -- Delete the affected batches. + WITH deleted_batches AS ( + DELETE FROM batches + WHERE topic_id = l_log.topic_id + AND partition = l_log.partition + AND last_offset < l_converted_offset + RETURNING file_id, byte_size + ), + -- Remember what files were affected. + _1 AS ( + INSERT INTO affected_files (file_id) + SELECT DISTINCT file_id + FROM deleted_batches + ON CONFLICT DO NOTHING -- ignore duplicates + ) + SELECT COALESCE(SUM(byte_size), 0) + FROM deleted_batches + INTO l_deleted_bytes; + + UPDATE logs + SET log_start_offset = l_converted_offset, + byte_size = byte_size - l_deleted_bytes + WHERE topic_id = l_log.topic_id + AND partition = l_log.partition; + + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, l_converted_offset)::delete_records_response_v1; + END LOOP; + + -- Out of the affected files, select those that are now empty (i.e. no batch refers to them) + -- and mark them for deletion. + PERFORM mark_file_to_delete_v1(arg_now, file_id) + FROM ( + SELECT DISTINCT af.file_id + FROM affected_files AS af + LEFT JOIN batches AS b ON af.file_id = b.file_id + WHERE b.batch_id IS NULL + ); +END; +$$ +; diff --git a/storage/inkless/bin/main/db/migration/V4__Retention_enforcement.sql b/storage/inkless/bin/main/db/migration/V4__Retention_enforcement.sql new file mode 100644 index 0000000000..a5db67cce8 --- /dev/null +++ b/storage/inkless/bin/main/db/migration/V4__Retention_enforcement.sql @@ -0,0 +1,133 @@ +-- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ + +CREATE DOMAIN retention_t AS BIGINT NOT NULL +CHECK (VALUE >= -1); + +CREATE TYPE enforce_retention_request_v1 AS ( + topic_id topic_id_t, + partition partition_t, + retention_bytes retention_t, + retention_ms retention_t +); + +CREATE TYPE enforce_retention_response_error_v1 AS ENUM ( + 'unknown_topic_or_partition' +); + +CREATE TYPE enforce_retention_response_v1 AS ( + topic_id topic_id_t, + partition partition_t, + error enforce_retention_response_error_v1, + batches_deleted INT, + bytes_deleted BIGINT, + log_start_offset offset_nullable_t +); + +CREATE FUNCTION enforce_retention_v1( + arg_now TIMESTAMP WITH TIME ZONE, + arg_requests enforce_retention_request_v1[] +) +RETURNS SETOF enforce_retention_response_v1 LANGUAGE plpgsql VOLATILE AS $$ +DECLARE + l_request RECORD; + l_log logs%ROWTYPE; + l_base_offset_of_first_batch_to_keep offset_nullable_t; + l_batches_deleted INT; + l_bytes_deleted BIGINT; + l_delete_records_response delete_records_response_v1; +BEGIN + FOR l_request IN + SELECT * + FROM unnest(arg_requests) + ORDER BY topic_id, partition -- ordering is important to prevent deadlocks + LOOP + SELECT * + FROM logs + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + INTO l_log + FOR UPDATE; + + IF NOT FOUND THEN + RETURN NEXT (l_request.topic_id, l_request.partition, 'unknown_topic_or_partition', NULL, NULL, NULL)::enforce_retention_response_v1; + CONTINUE; + END IF; + + l_base_offset_of_first_batch_to_keep = NULL; + + IF l_request.retention_bytes >= 0 OR l_request.retention_ms >= 0 THEN + WITH augmented_batches AS ( + -- For retention by size: + -- Associate with each batch the number of bytes that the log would have if this batch and later batches are retained. + -- In other words, this is the reverse aggregated size (counted from the end to the beginning). + -- An example: + -- Batch size | Aggregated | Reverse aggregated | + -- (in order) | size | size | + -- 1 | 1 | 10 - 1 + 1 = 10 | + -- 2 | 1 + 2 = 3 | 10 - 3 + 2 = 9 | + -- 3 | 3 + 3 = 6 | 10 - 6 + 3 = 7 | + -- 4 | 6 + 4 = 10 | 10 - 10 + 4 = 4 | + -- The reverse aggregated size is equal to what the aggregated size would be if the sorting order is reverse, + -- but doing so explicitly might be costly, hence the formula. + -- For retention by time: + -- Associate with each batch its effective timestamp. + SELECT topic_id, partition, last_offset, + base_offset, + l_log.byte_size - SUM(byte_size) OVER (ORDER BY topic_id, partition, last_offset) + byte_size AS reverse_agg_byte_size, + batch_timestamp(timestamp_type, batch_max_timestamp, log_append_timestamp) AS effective_timestamp + FROM batches + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + ORDER BY topic_id, partition, last_offset + ) + -- Look for the first batch that complies with both retention policies (if they are enabled): + -- For size: + -- The first batch which being retained with the subsequent batches would make the total log size <= retention_bytes. + -- For time: + -- The first batch which effective timestamp is greater or equal to the last timestamp to retain. + SELECT base_offset + FROM augmented_batches + WHERE (l_request.retention_bytes < 0 OR reverse_agg_byte_size <= l_request.retention_bytes) + AND (l_request.retention_ms < 0 OR effective_timestamp >= (EXTRACT(EPOCH FROM arg_now AT TIME ZONE 'UTC') * 1000)::BIGINT - l_request.retention_ms) + ORDER BY topic_id, partition, last_offset + LIMIT 1 + INTO l_base_offset_of_first_batch_to_keep; + + -- No batch satisfy the retention policy == delete everything, i.e. up to HWM. + l_base_offset_of_first_batch_to_keep = COALESCE(l_base_offset_of_first_batch_to_keep, l_log.high_watermark); + END IF; + + -- Nothing to delete. + IF l_base_offset_of_first_batch_to_keep IS NULL THEN + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, 0, 0::BIGINT, l_log.log_start_offset)::enforce_retention_response_v1; + CONTINUE; + END IF; + + SELECT COUNT(*), SUM(byte_size) + FROM batches + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND last_offset < l_base_offset_of_first_batch_to_keep + INTO l_batches_deleted, l_bytes_deleted; + + SELECT * + FROM delete_records_v1(arg_now, array[ROW(l_request.topic_id, l_request.partition, l_base_offset_of_first_batch_to_keep)::delete_records_request_v1]) + INTO l_delete_records_response; + + -- This should never happen, just fail. + IF l_delete_records_response.error IS DISTINCT FROM NULL THEN + RAISE 'delete_records_v1 returned unexpected error: %', l_delete_records_response; + END IF; + + RETURN NEXT ( + l_request.topic_id, + l_request.partition, + NULL::enforce_retention_response_error_v1, + COALESCE(l_batches_deleted, 0), + COALESCE(l_bytes_deleted, 0), + l_delete_records_response.log_start_offset + )::enforce_retention_response_v1; + END LOOP; +END; +$$ +; diff --git a/storage/inkless/bin/main/db/migration/V5__Fix_deadlock_in_delete_records_v1.sql b/storage/inkless/bin/main/db/migration/V5__Fix_deadlock_in_delete_records_v1.sql new file mode 100644 index 0000000000..675d463677 --- /dev/null +++ b/storage/inkless/bin/main/db/migration/V5__Fix_deadlock_in_delete_records_v1.sql @@ -0,0 +1,90 @@ +-- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ + +CREATE OR REPLACE FUNCTION delete_records_v1( + arg_now TIMESTAMP WITH TIME ZONE, + arg_requests delete_records_request_v1[] +) +RETURNS SETOF delete_records_response_v1 LANGUAGE plpgsql VOLATILE AS $$ +DECLARE + l_request RECORD; + l_log RECORD; + l_converted_offset BIGINT = -1; + l_deleted_bytes BIGINT; +BEGIN + + DROP TABLE IF EXISTS affected_files; + CREATE TEMPORARY TABLE affected_files ( + file_id BIGINT PRIMARY KEY + ) + ON COMMIT DROP; + + FOR l_request IN + SELECT * + FROM unnest(arg_requests) + ORDER BY topic_id, partition -- ordering is important to prevent deadlocks + LOOP + SELECT * + FROM logs + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + FOR UPDATE + INTO l_log; + + IF NOT FOUND THEN + RETURN NEXT (l_request.topic_id, l_request.partition, 'unknown_topic_or_partition', NULL)::delete_records_response_v1; + CONTINUE; + END IF; + + l_converted_offset = CASE + -- -1 = org.apache.kafka.common.requests.DeleteRecordsRequest.HIGH_WATERMARK + WHEN l_request.offset = -1 THEN l_log.high_watermark + ELSE l_request.offset + END; + + IF l_converted_offset < 0 OR l_converted_offset > l_log.high_watermark THEN + RETURN NEXT (l_request.topic_id, l_request.partition, 'offset_out_of_range', NULL)::delete_records_response_v1; + CONTINUE; + END IF; + + l_converted_offset = GREATEST(l_converted_offset, l_log.log_start_offset); + + -- Delete the affected batches. + WITH deleted_batches AS ( + DELETE FROM batches + WHERE topic_id = l_log.topic_id + AND partition = l_log.partition + AND last_offset < l_converted_offset + RETURNING file_id, byte_size + ), + -- Remember what files were affected. + _1 AS ( + INSERT INTO affected_files (file_id) + SELECT DISTINCT file_id + FROM deleted_batches + ON CONFLICT DO NOTHING -- ignore duplicates + ) + SELECT COALESCE(SUM(byte_size), 0) + FROM deleted_batches + INTO l_deleted_bytes; + + UPDATE logs + SET log_start_offset = l_converted_offset, + byte_size = byte_size - l_deleted_bytes + WHERE topic_id = l_log.topic_id + AND partition = l_log.partition; + + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, l_converted_offset)::delete_records_response_v1; + END LOOP; + + -- Out of the affected files, select those that are now empty (i.e. no batch refers to them) + -- and mark them for deletion. + PERFORM mark_file_to_delete_v1(arg_now, file_id) + FROM ( + SELECT DISTINCT af.file_id + FROM affected_files AS af + LEFT JOIN batches AS b ON af.file_id = b.file_id + WHERE b.batch_id IS NULL + ); +END; +$$ +; diff --git a/storage/inkless/bin/main/db/migration/V6__Improve_understandability_of_delete_topic_v1.sql b/storage/inkless/bin/main/db/migration/V6__Improve_understandability_of_delete_topic_v1.sql new file mode 100644 index 0000000000..85f8cc914f --- /dev/null +++ b/storage/inkless/bin/main/db/migration/V6__Improve_understandability_of_delete_topic_v1.sql @@ -0,0 +1,143 @@ +-- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ + +ALTER TABLE batches +ALTER CONSTRAINT fk_batches_logs NOT DEFERRABLE; + +CREATE OR REPLACE FUNCTION delete_topic_v1( + arg_now TIMESTAMP WITH TIME ZONE, + arg_topic_ids UUID[] +) +RETURNS VOID LANGUAGE plpgsql VOLATILE AS $$ +BEGIN + -- Ensure no other transaction commits or does anything else to the affected partitions while this transaction is in progress. + PERFORM + FROM logs + WHERE topic_id = ANY(arg_topic_ids) + ORDER BY topic_id, partition -- ordering is important to prevent deadlocks + FOR UPDATE; + + DROP TABLE IF EXISTS affected_files; + -- Delete the affected batches and remember what files are affected. + -- We need to separate deleting batches and finding empty files because if they are in the same requests as CTE, + -- the query below will see the MVCC snapshot from before deleting batches. + CREATE TEMPORARY TABLE affected_files + ON COMMIT DROP + AS + WITH deleted_batches AS ( + DELETE FROM batches + WHERE topic_id = ANY(arg_topic_ids) + RETURNING file_id + ) + SELECT file_id + FROM deleted_batches; + + DELETE FROM logs + WHERE topic_id = ANY(arg_topic_ids); + + -- Out of the affected files, select those that are now empty (i.e. no batch refers to them) + -- and mark them for deletion. + PERFORM mark_file_to_delete_v1(arg_now, file_id) + FROM ( + SELECT DISTINCT af.file_id + FROM affected_files AS af + WHERE NOT EXISTS ( + SELECT 1 + FROM batches AS b + WHERE b.file_id = af.file_id + ) + ); +END; +$$ +; + +CREATE OR REPLACE FUNCTION delete_records_v1( + arg_now TIMESTAMP WITH TIME ZONE, + arg_requests delete_records_request_v1[] +) +RETURNS SETOF delete_records_response_v1 LANGUAGE plpgsql VOLATILE AS $$ +DECLARE + l_request RECORD; + l_log RECORD; + l_converted_offset BIGINT = -1; + l_deleted_bytes BIGINT; +BEGIN + + DROP TABLE IF EXISTS affected_files; + CREATE TEMPORARY TABLE affected_files ( + file_id BIGINT PRIMARY KEY + ) + ON COMMIT DROP; + + FOR l_request IN + SELECT * + FROM unnest(arg_requests) + ORDER BY topic_id, partition -- ordering is important to prevent deadlocks + LOOP + SELECT * + FROM logs + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + FOR UPDATE + INTO l_log; + + IF NOT FOUND THEN + RETURN NEXT (l_request.topic_id, l_request.partition, 'unknown_topic_or_partition', NULL)::delete_records_response_v1; + CONTINUE; + END IF; + + l_converted_offset = CASE + -- -1 = org.apache.kafka.common.requests.DeleteRecordsRequest.HIGH_WATERMARK + WHEN l_request.offset = -1 THEN l_log.high_watermark + ELSE l_request.offset + END; + + IF l_converted_offset < 0 OR l_converted_offset > l_log.high_watermark THEN + RETURN NEXT (l_request.topic_id, l_request.partition, 'offset_out_of_range', NULL)::delete_records_response_v1; + CONTINUE; + END IF; + + l_converted_offset = GREATEST(l_converted_offset, l_log.log_start_offset); + + -- Delete the affected batches. + WITH deleted_batches AS ( + DELETE FROM batches + WHERE topic_id = l_log.topic_id + AND partition = l_log.partition + AND last_offset < l_converted_offset + RETURNING file_id, byte_size + ), + -- Remember what files were affected. + _1 AS ( + INSERT INTO affected_files (file_id) + SELECT DISTINCT file_id + FROM deleted_batches + ON CONFLICT DO NOTHING -- ignore duplicates + ) + SELECT COALESCE(SUM(byte_size), 0) + FROM deleted_batches + INTO l_deleted_bytes; + + UPDATE logs + SET log_start_offset = l_converted_offset, + byte_size = byte_size - l_deleted_bytes + WHERE topic_id = l_log.topic_id + AND partition = l_log.partition; + + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, l_converted_offset)::delete_records_response_v1; + END LOOP; + + -- Out of the affected files, select those that are now empty (i.e. no batch refers to them) + -- and mark them for deletion. + PERFORM mark_file_to_delete_v1(arg_now, file_id) + FROM ( + SELECT DISTINCT af.file_id + FROM affected_files AS af + WHERE NOT EXISTS ( + SELECT 1 + FROM batches AS b + WHERE b.file_id = af.file_id + ) + ); +END; +$$ +; diff --git a/storage/inkless/bin/main/db/migration/V7__Find_batches_function.sql b/storage/inkless/bin/main/db/migration/V7__Find_batches_function.sql new file mode 100644 index 0000000000..4b26f377ed --- /dev/null +++ b/storage/inkless/bin/main/db/migration/V7__Find_batches_function.sql @@ -0,0 +1,114 @@ +-- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ + +CREATE TYPE find_batches_request_v1 AS ( + topic_id topic_id_t, + partition partition_t, + starting_offset BIGINT, + max_partition_fetch_bytes INT +); + +CREATE TYPE batch_info_v1 AS ( + batch_id BIGINT, + object_key object_key_t, + batch_metadata batch_metadata_v1 +); + +CREATE TYPE find_batches_response_error_v1 AS ENUM ( + 'offset_out_of_range', + 'unknown_topic_or_partition' +); + +CREATE TYPE find_batches_response_v1 AS ( + topic_id topic_id_t, + partition partition_t, + log_start_offset offset_with_minus_one_t, + high_watermark offset_with_minus_one_t, + batches batch_info_v1[], + error find_batches_response_error_v1 +); + +CREATE OR REPLACE FUNCTION find_batches_v1( + arg_requests find_batches_request_v1[], + fetch_max_bytes INT +) +RETURNS SETOF find_batches_response_v1 LANGUAGE sql STABLE AS $$ + WITH + requests AS ( + SELECT + r.topic_id, + r.partition, + r.starting_offset, + r.max_partition_fetch_bytes, + r.ordinality AS idx -- for preserving original order + FROM unnest(arg_requests) WITH ORDINALITY AS r(topic_id, partition, starting_offset, max_partition_fetch_bytes, ordinality) + ), + requests_with_log_info AS ( + SELECT + r.idx, r.topic_id, r.partition, r.starting_offset, r.max_partition_fetch_bytes, + l.log_start_offset, l.high_watermark, l.topic_name, + CASE + WHEN l.topic_id IS NULL THEN 'unknown_topic_or_partition'::find_batches_response_error_v1 + WHEN r.starting_offset < 0 OR r.starting_offset > l.high_watermark THEN 'offset_out_of_range'::find_batches_response_error_v1 + ELSE NULL + END AS error + FROM requests r + LEFT JOIN logs l ON r.topic_id = l.topic_id AND r.partition = l.partition + ), + all_batches_with_metadata AS ( + SELECT + r.idx, + ( + b.batch_id, + f.object_key, + ( + b.magic, b.topic_id, r.topic_name, b.partition, b.byte_offset, b.byte_size, + b.base_offset, b.last_offset, b.log_append_timestamp, b.batch_max_timestamp, + b.timestamp_type + )::batch_metadata_v1 + )::batch_info_v1 AS batch_data, + b.byte_size, b.base_offset, r.max_partition_fetch_bytes, + ROW_NUMBER() OVER (PARTITION BY r.idx ORDER BY b.base_offset) as rn, + SUM(b.byte_size) OVER (PARTITION BY r.idx ORDER BY b.base_offset) as partition_cumulative_bytes + FROM requests_with_log_info r + JOIN batches b ON r.topic_id = b.topic_id AND r.partition = b.partition + JOIN files f ON b.file_id = f.file_id + WHERE r.error IS NULL + AND b.last_offset >= r.starting_offset + AND b.base_offset < r.high_watermark + ), + per_partition_limited_batches AS ( + SELECT idx, batch_data, byte_size, base_offset, rn + FROM all_batches_with_metadata + WHERE rn = 1 -- each partition gets always at least one batch + -- include also last batch, even if it overflows max.partition.fetch.bytes + OR (partition_cumulative_bytes - byte_size) < max_partition_fetch_bytes + ), + final_batch_set AS ( + SELECT idx, batch_data, base_offset, rn + FROM ( + SELECT *, SUM(byte_size) OVER (ORDER BY idx, base_offset) as global_cumulative_bytes + FROM per_partition_limited_batches + ) AS sized_batches + WHERE rn = 1 OR -- each partition gets always at least one batch + -- include also last batch, even if it overflows fetch.max.bytes + (global_cumulative_bytes - byte_size) < fetch_max_bytes + ), + aggregated_batches AS ( + SELECT + idx, + array_agg(batch_data ORDER BY base_offset) AS batches + FROM final_batch_set + GROUP BY idx + ) + SELECT + r.topic_id, + r.partition, + COALESCE(r.log_start_offset, -1), + COALESCE(r.high_watermark, -1), + CASE WHEN r.error IS NULL THEN COALESCE(ab.batches, '{}'::batch_info_v1[]) ELSE NULL END, + r.error + FROM requests_with_log_info r + LEFT JOIN aggregated_batches ab ON r.idx = ab.idx + ORDER BY r.idx; +$$; + diff --git a/storage/inkless/bin/main/db/migration/V8__Find_batches_function_with_limit.sql b/storage/inkless/bin/main/db/migration/V8__Find_batches_function_with_limit.sql new file mode 100644 index 0000000000..62a4ea57b6 --- /dev/null +++ b/storage/inkless/bin/main/db/migration/V8__Find_batches_function_with_limit.sql @@ -0,0 +1,88 @@ +-- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ +CREATE OR REPLACE FUNCTION find_batches_v2( + arg_requests find_batches_request_v1[], + fetch_max_bytes INT, + max_batches_per_partition INT DEFAULT 0 +) +RETURNS SETOF find_batches_response_v1 LANGUAGE sql STABLE AS $$ + WITH + requests AS ( + SELECT + r.topic_id, + r.partition, + r.starting_offset, + r.max_partition_fetch_bytes, + r.ordinality AS idx -- for preserving original order + FROM unnest(arg_requests) WITH ORDINALITY AS r(topic_id, partition, starting_offset, max_partition_fetch_bytes, ordinality) + ), + requests_with_log_info AS ( + SELECT + r.idx, r.topic_id, r.partition, r.starting_offset, r.max_partition_fetch_bytes, + l.log_start_offset, l.high_watermark, l.topic_name, + CASE + WHEN l.topic_id IS NULL THEN 'unknown_topic_or_partition'::find_batches_response_error_v1 + WHEN r.starting_offset < 0 OR r.starting_offset > l.high_watermark THEN 'offset_out_of_range'::find_batches_response_error_v1 + ELSE NULL + END AS error + FROM requests r + LEFT JOIN logs l ON r.topic_id = l.topic_id AND r.partition = l.partition + ), + all_batches_with_metadata AS ( + SELECT + r.idx, + ( + b.batch_id, + f.object_key, + ( + b.magic, b.topic_id, r.topic_name, b.partition, b.byte_offset, b.byte_size, + b.base_offset, b.last_offset, b.log_append_timestamp, b.batch_max_timestamp, + b.timestamp_type + )::batch_metadata_v1 + )::batch_info_v1 AS batch_data, + b.byte_size, b.base_offset, r.max_partition_fetch_bytes, + ROW_NUMBER() OVER (PARTITION BY r.idx ORDER BY b.base_offset) as rn, + SUM(b.byte_size) OVER (PARTITION BY r.idx ORDER BY b.base_offset) as partition_cumulative_bytes + FROM requests_with_log_info r + JOIN batches b ON r.topic_id = b.topic_id AND r.partition = b.partition + JOIN files f ON b.file_id = f.file_id + WHERE r.error IS NULL + AND b.last_offset >= r.starting_offset + AND b.base_offset < r.high_watermark + ), + per_partition_limited_batches AS ( + SELECT idx, batch_data, byte_size, base_offset, rn + FROM all_batches_with_metadata + WHERE (rn = 1 -- each partition gets always at least one batch + -- include also last batch, even if it overflows max.partition.fetch.bytes + OR (partition_cumulative_bytes - byte_size) < max_partition_fetch_bytes + ) AND (max_batches_per_partition = 0 OR rn <= max_batches_per_partition) + ), + final_batch_set AS ( + SELECT idx, batch_data, base_offset, rn + FROM ( + SELECT *, SUM(byte_size) OVER (ORDER BY idx, base_offset) as global_cumulative_bytes + FROM per_partition_limited_batches + ) AS sized_batches + WHERE rn = 1 OR -- each partition gets always at least one batch + -- include also last batch, even if it overflows fetch.max.bytes + (global_cumulative_bytes - byte_size) < fetch_max_bytes + ), + aggregated_batches AS ( + SELECT + idx, + array_agg(batch_data ORDER BY base_offset) AS batches + FROM final_batch_set + GROUP BY idx + ) + SELECT + r.topic_id, + r.partition, + COALESCE(r.log_start_offset, -1), + COALESCE(r.high_watermark, -1), + CASE WHEN r.error IS NULL THEN COALESCE(ab.batches, '{}'::batch_info_v1[]) ELSE NULL END, + r.error + FROM requests_with_log_info r + LEFT JOIN aggregated_batches ab ON r.idx = ab.idx + ORDER BY r.idx; +$$; + diff --git a/storage/inkless/bin/main/db/migration/V9__Retention_enforcement_with_limits.sql b/storage/inkless/bin/main/db/migration/V9__Retention_enforcement_with_limits.sql new file mode 100644 index 0000000000..83c80baba2 --- /dev/null +++ b/storage/inkless/bin/main/db/migration/V9__Retention_enforcement_with_limits.sql @@ -0,0 +1,132 @@ +-- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ +-- Similar to v1, with the addition of max_batches_per_request to limit the number of batches to be deleted per partition +-- as a way to cope with the performance impact of executing the updates and further deletion of batches. +CREATE FUNCTION enforce_retention_v2( + arg_now TIMESTAMP WITH TIME ZONE, + arg_requests enforce_retention_request_v1[], + max_batches_per_request INT DEFAULT 0 +) +RETURNS SETOF enforce_retention_response_v1 LANGUAGE plpgsql VOLATILE AS $$ +DECLARE + l_request RECORD; + l_log logs%ROWTYPE; + l_base_offset_of_first_batch_to_keep offset_nullable_t; + l_batches_deleted INT; + l_bytes_deleted BIGINT; + l_delete_records_response delete_records_response_v1; +BEGIN + FOR l_request IN + SELECT * + FROM unnest(arg_requests) + ORDER BY topic_id, partition -- ordering is important to prevent deadlocks + LOOP + SELECT * + FROM logs + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + INTO l_log + FOR UPDATE; + + IF NOT FOUND THEN + RETURN NEXT (l_request.topic_id, l_request.partition, 'unknown_topic_or_partition', NULL, NULL, NULL)::enforce_retention_response_v1; + CONTINUE; + END IF; + + l_base_offset_of_first_batch_to_keep = NULL; + + IF l_request.retention_bytes >= 0 OR l_request.retention_ms >= 0 THEN + WITH augmented_batches AS ( + -- For retention by size: + -- Associate with each batch the number of bytes that the log would have if this batch and later batches are retained. + -- In other words, this is the reverse aggregated size (counted from the end to the beginning). + -- An example: + -- Batch size | Aggregated | Reverse aggregated | + -- (in order) | size | size | + -- 1 | 1 | 10 - 1 + 1 = 10 | + -- 2 | 1 + 2 = 3 | 10 - 3 + 2 = 9 | + -- 3 | 3 + 3 = 6 | 10 - 6 + 3 = 7 | + -- 4 | 6 + 4 = 10 | 10 - 10 + 4 = 4 | + -- The reverse aggregated size is equal to what the aggregated size would be if the sorting order is reverse, + -- but doing so explicitly might be costly, hence the formula. + -- For retention by time: + -- Associate with each batch its effective timestamp. + SELECT topic_id, partition, last_offset, + base_offset, + l_log.byte_size - SUM(byte_size) OVER (ORDER BY topic_id, partition, last_offset) + byte_size AS reverse_agg_byte_size, + batch_timestamp(timestamp_type, batch_max_timestamp, log_append_timestamp) AS effective_timestamp + FROM batches + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + ORDER BY topic_id, partition, last_offset + ) + -- Look for the first batch that complies with both retention policies (if they are enabled): + -- For size: + -- The first batch which being retained with the subsequent batches would make the total log size <= retention_bytes. + -- For time: + -- The first batch which effective timestamp is greater or equal to the last timestamp to retain. + SELECT base_offset + FROM augmented_batches + WHERE (l_request.retention_bytes < 0 OR reverse_agg_byte_size <= l_request.retention_bytes) + AND (l_request.retention_ms < 0 OR effective_timestamp >= (EXTRACT(EPOCH FROM arg_now AT TIME ZONE 'UTC') * 1000)::BIGINT - l_request.retention_ms) + ORDER BY topic_id, partition, last_offset + LIMIT 1 + INTO l_base_offset_of_first_batch_to_keep; + + -- No batch satisfy the retention policy == delete everything, i.e. up to HWM. + l_base_offset_of_first_batch_to_keep = COALESCE(l_base_offset_of_first_batch_to_keep, l_log.high_watermark); + + -- Enforce the limit of batches per request if needed + IF max_batches_per_request > 0 THEN + -- Find the base offset of the batch that would be the Nth one to delete + -- This effectively limits how many batches we'll delete in one call + WITH batches_to_delete AS ( + SELECT base_offset + FROM batches + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND last_offset < l_base_offset_of_first_batch_to_keep + ORDER BY topic_id, partition, last_offset + ) + SELECT + CASE + WHEN COUNT(*) <= max_batches_per_request THEN l_base_offset_of_first_batch_to_keep + ELSE (SELECT base_offset FROM batches_to_delete ORDER BY base_offset LIMIT 1 OFFSET max_batches_per_request) + END INTO l_base_offset_of_first_batch_to_keep + FROM batches_to_delete; + END IF; + END IF; + + -- Nothing to delete. + IF l_base_offset_of_first_batch_to_keep IS NULL THEN + RETURN NEXT (l_request.topic_id, l_request.partition, NULL, 0, 0::BIGINT, l_log.log_start_offset)::enforce_retention_response_v1; + CONTINUE; + END IF; + + SELECT COUNT(*), SUM(byte_size) + FROM batches + WHERE topic_id = l_request.topic_id + AND partition = l_request.partition + AND last_offset < l_base_offset_of_first_batch_to_keep + INTO l_batches_deleted, l_bytes_deleted; + + SELECT * + FROM delete_records_v1(arg_now, array[ROW(l_request.topic_id, l_request.partition, l_base_offset_of_first_batch_to_keep)::delete_records_request_v1]) + INTO l_delete_records_response; + + -- This should never happen, just fail. + IF l_delete_records_response.error IS DISTINCT FROM NULL THEN + RAISE 'delete_records_v1 returned unexpected error: %', l_delete_records_response; + END IF; + + RETURN NEXT ( + l_request.topic_id, + l_request.partition, + NULL::enforce_retention_response_error_v1, + COALESCE(l_batches_deleted, 0), + COALESCE(l_bytes_deleted, 0), + l_delete_records_response.log_start_offset + )::enforce_retention_response_v1; + END LOOP; +END; +$$ +; diff --git a/storage/inkless/bin/main/message/CacheKey.json b/storage/inkless/bin/main/message/CacheKey.json new file mode 100644 index 0000000000..37f45f2459 --- /dev/null +++ b/storage/inkless/bin/main/message/CacheKey.json @@ -0,0 +1,52 @@ +/* + * Inkless + * Copyright (C) 2024 - 2025 Aiven OY + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +{ + "apiKey": 1, + "name": "CacheKey", + "type": "data", + "validVersions": "0", + "flexibleVersions": "0+", + "fields": [ + { + "name": "Object", + "type": "string", + "versions": "0", + "about": "Identifier for object in backing storage" + }, + { + "name": "Range", + "type": "ByteRange", + "versions": "0", + "about": "A range of bytes within the specified file", + "fields": [ + { + "name": "Offset", + "type": "int64", + "versions": "0", + "about": "The index of the first byte in the object" + }, + { + "name": "Length", + "type": "int64", + "versions": "0", + "about": "Number of bytes" + } + ] + } + ] +} diff --git a/storage/inkless/bin/main/message/FileExtent.json b/storage/inkless/bin/main/message/FileExtent.json new file mode 100644 index 0000000000..1af9d01afe --- /dev/null +++ b/storage/inkless/bin/main/message/FileExtent.json @@ -0,0 +1,58 @@ +/* + * Inkless + * Copyright (C) 2024 - 2025 Aiven OY + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +{ + "apiKey": 2, + "name": "FileExtent", + "type": "data", + "validVersions": "0", + "flexibleVersions": "0+", + "fields": [ + { + "name": "Object", + "type": "string", + "versions": "0", + "about": "Object identifier" + }, + { + "name": "Range", + "type": "ByteRange", + "versions": "0", + "about": "Range of bytes stored in the specified buffer", + "fields": [ + { + "name": "Offset", + "type": "int64", + "versions": "0", + "about": "The index of the first byte in the object" + }, + { + "name": "Length", + "type": "int64", + "versions": "0", + "about": "Number of bytes" + } + ] + }, + { + "name": "Data", + "type": "bytes", + "versions": "0+", + "about": "Start offset of the segment." + } + ] +} \ No newline at end of file diff --git a/storage/inkless/bin/test/META-INF/services/net.jqwik.api.providers.ArbitraryProvider b/storage/inkless/bin/test/META-INF/services/net.jqwik.api.providers.ArbitraryProvider new file mode 100644 index 0000000000..1742cba093 --- /dev/null +++ b/storage/inkless/bin/test/META-INF/services/net.jqwik.api.providers.ArbitraryProvider @@ -0,0 +1,6 @@ +# Copyright (c) 2024 Aiven, Helsinki, Finland. https://aiven.io/ +io.aiven.inkless.test_utils.HeaderProvider +io.aiven.inkless.test_utils.SimpleRecordProvider +io.aiven.inkless.test_utils.RecordsProvider +io.aiven.inkless.test_utils.TopicIdPartitionProvider +io.aiven.inkless.test_utils.DataLayout$DataLayoutArbitraryProvider diff --git a/storage/inkless/bin/test/log4j.properties b/storage/inkless/bin/test/log4j.properties new file mode 100644 index 0000000000..0d46d239df --- /dev/null +++ b/storage/inkless/bin/test/log4j.properties @@ -0,0 +1,7 @@ +log4j.rootLogger=INFO, stdout + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n + +log4j.logger.io.aiven.inkless.control_plane.InMemoryControlPlane=WARN diff --git a/storage/inkless/bin/test/test_gcs_credentials.json b/storage/inkless/bin/test/test_gcs_credentials.json new file mode 100644 index 0000000000..f73e506cb8 --- /dev/null +++ b/storage/inkless/bin/test/test_gcs_credentials.json @@ -0,0 +1,6 @@ +{ + "client_id": "test-client-id", + "client_secret": "test-client-secret", + "refresh_token": "x", + "type": "authorized_user" +} diff --git a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/ControlPlane.java b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/ControlPlane.java index a008b02154..35370be50f 100644 --- a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/ControlPlane.java +++ b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/ControlPlane.java @@ -55,6 +55,8 @@ List findBatches( void createTopicAndPartitions(Set requests); + void initLogDisklessStartOffset(Set requests); + List deleteRecords(List requests); void deleteTopics(Set topicIds); @@ -94,6 +96,8 @@ static ControlPlane create(final InklessConfig config, final Time time) { boolean isSafeToDeleteFile(String objectKeyPath); + List getDisklessLog(List requests); + // used for testing purposes only List getLogInfo(List requests); } diff --git a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/GetDisklessLogRequest.java b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/GetDisklessLogRequest.java new file mode 100644 index 0000000000..a84b15e64f --- /dev/null +++ b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/GetDisklessLogRequest.java @@ -0,0 +1,24 @@ +/* + * Inkless + * Copyright (C) 2024 - 2025 Aiven OY + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +package io.aiven.inkless.control_plane; + +import org.apache.kafka.common.Uuid; + +public record GetDisklessLogRequest(Uuid topicId, + int partition) { +} diff --git a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/GetDisklessLogResponse.java b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/GetDisklessLogResponse.java new file mode 100644 index 0000000000..d148b941f2 --- /dev/null +++ b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/GetDisklessLogResponse.java @@ -0,0 +1,42 @@ +/* + * Inkless + * Copyright (C) 2024 - 2025 Aiven OY + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +package io.aiven.inkless.control_plane; + +import org.apache.kafka.common.Uuid; +import org.apache.kafka.common.protocol.Errors; + +public record GetDisklessLogResponse(Uuid topicId, + int partition, + Errors error, + long logStartOffset, + long highWatermark, + Long disklessStartOffset) { + public static final long INVALID_OFFSET = -1L; + + public static GetDisklessLogResponse success(final Uuid topicId, + final int partition, + final long logStartOffset, + final long highWatermark, + final Long disklessStartOffset) { + return new GetDisklessLogResponse(topicId, partition, Errors.NONE, logStartOffset, highWatermark, disklessStartOffset); + } + + public static GetDisklessLogResponse unknownTopicOrPartition(final Uuid topicId, final int partition) { + return new GetDisklessLogResponse(topicId, partition, Errors.UNKNOWN_TOPIC_OR_PARTITION, INVALID_OFFSET, INVALID_OFFSET, null); + } +} diff --git a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/InMemoryControlPlane.java b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/InMemoryControlPlane.java index aa99e79e84..e1192d9f36 100644 --- a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/InMemoryControlPlane.java +++ b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/InMemoryControlPlane.java @@ -90,6 +90,26 @@ public synchronized void createTopicAndPartitions(final Set requests) { + for (final InitLogDisklessStartOffsetRequest request : requests) { + final TopicIdPartition topicIdPartition = new TopicIdPartition( + request.topicId(), request.partition(), request.topicName()); + + // Only create if not already exists + if (!logs.containsKey(topicIdPartition)) { + LOGGER.info("Initializing {} with logStartOffset {}, disklessStartOffset {}", + topicIdPartition, request.logStartOffset(), request.disklessStartOffset()); + final LogInfo logInfo = new LogInfo(); + logInfo.logStartOffset = request.logStartOffset(); + logInfo.highWatermark = request.disklessStartOffset(); + logInfo.disklessStartOffset = request.disklessStartOffset(); + logs.put(topicIdPartition, logInfo); + batches.putIfAbsent(topicIdPartition, new TreeMap<>()); + } + } + } + @Override protected synchronized Iterator commitFileForValidRequests( final String objectKey, @@ -644,6 +664,27 @@ public boolean isSafeToDeleteFile(String objectKeyPath) { return !files.containsKey(objectKeyPath); } + @Override + public synchronized List getDisklessLog(final List requests) { + final List result = new ArrayList<>(); + for (final GetDisklessLogRequest request : requests) { + final TopicIdPartition tidp = findTopicIdPartition(request.topicId(), request.partition()); + final LogInfo logInfo; + if (tidp == null || (logInfo = logs.get(tidp)) == null) { + result.add(GetDisklessLogResponse.unknownTopicOrPartition(request.topicId(), request.partition())); + } else { + result.add(GetDisklessLogResponse.success( + request.topicId(), + request.partition(), + logInfo.logStartOffset, + logInfo.highWatermark, + logInfo.disklessStartOffset + )); + } + } + return result; + } + @Override public synchronized List getLogInfo(final List requests) { final List result = new ArrayList<>(); @@ -679,6 +720,7 @@ private static class LogInfo { long logStartOffset = 0; long highWatermark = 0; long byteSize = 0; + Long disklessStartOffset = null; } private static class FileInfo { diff --git a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/InitLogDisklessStartOffsetRequest.java b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/InitLogDisklessStartOffsetRequest.java new file mode 100644 index 0000000000..adc83501d9 --- /dev/null +++ b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/InitLogDisklessStartOffsetRequest.java @@ -0,0 +1,27 @@ +/* + * Inkless + * Copyright (C) 2024 - 2025 Aiven OY + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +package io.aiven.inkless.control_plane; + +import org.apache.kafka.common.Uuid; + +public record InitLogDisklessStartOffsetRequest(Uuid topicId, + String topicName, + int partition, + long logStartOffset, + long disklessStartOffset) { +} diff --git a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/GetDisklessLogJob.java b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/GetDisklessLogJob.java new file mode 100644 index 0000000000..53588c48c2 --- /dev/null +++ b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/GetDisklessLogJob.java @@ -0,0 +1,114 @@ +/* + * Inkless + * Copyright (C) 2024 - 2025 Aiven OY + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +package io.aiven.inkless.control_plane.postgres; + +import org.apache.kafka.common.Uuid; +import org.apache.kafka.common.utils.Time; + +import org.jooq.Configuration; +import org.jooq.DSLContext; +import org.jooq.Field; +import org.jooq.Row2; + +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.function.Consumer; + +import io.aiven.inkless.control_plane.GetDisklessLogRequest; +import io.aiven.inkless.control_plane.GetDisklessLogResponse; +import io.aiven.inkless.control_plane.postgres.converters.UUIDtoUuidConverter; + +import static org.jooq.generated.Tables.LOGS; +import static org.jooq.impl.DSL.field; +import static org.jooq.impl.DSL.name; +import static org.jooq.impl.DSL.row; +import static org.jooq.impl.DSL.values; + +public class GetDisklessLogJob implements Callable> { + private static final Field REQUEST_TOPIC_ID = field(name("topic_id"), LOGS.TOPIC_ID.getDataType()); + private static final Field REQUEST_PARTITION = field(name("partition"), LOGS.PARTITION.getDataType()); + + private final Time time; + private final DSLContext jooqCtx; + private final List requests; + private final Consumer durationCallback; + + public GetDisklessLogJob(final Time time, + final DSLContext jooqCtx, + final List requests, + final Consumer durationCallback) { + this.time = time; + this.jooqCtx = jooqCtx; + this.requests = requests; + this.durationCallback = durationCallback; + } + + @Override + public List call() throws Exception { + return JobUtils.run(this::runOnce, time, durationCallback); + } + + private List runOnce() throws Exception { + return jooqCtx.transactionResult((final Configuration conf) -> { + final DSLContext context = conf.dsl(); + + final UUIDtoUuidConverter uuidConverter = new UUIDtoUuidConverter(); + final var requestRows = requests.stream() + .map(req -> row(uuidConverter.to(req.topicId()), req.partition())) + .toArray(Row2[]::new); + @SuppressWarnings("unchecked") + final var requestsTable = values(requestRows) + .as("requests", REQUEST_TOPIC_ID.getName(), REQUEST_PARTITION.getName()); + + final var select = context.select( + requestsTable.field(REQUEST_TOPIC_ID), + requestsTable.field(REQUEST_PARTITION), + LOGS.LOG_START_OFFSET, + LOGS.HIGH_WATERMARK, + LOGS.DISKLESS_START_OFFSET + ).from(requestsTable) + .leftJoin(LOGS).on(LOGS.TOPIC_ID.eq(requestsTable.field(REQUEST_TOPIC_ID)) + .and(LOGS.PARTITION.eq(requestsTable.field(REQUEST_PARTITION)))); + + final List responses = new ArrayList<>(); + try (final var cursor = select.fetchSize(1000).fetchLazy()) { + for (final var record : cursor) { + // The synthetic table stores raw java.util.UUID, need to convert explicitly + final UUID rawTopicId = (UUID) (Object) record.get(requestsTable.field(REQUEST_TOPIC_ID)); + final Uuid topicId = uuidConverter.from(rawTopicId); + final Integer partition = record.get(requestsTable.field(REQUEST_PARTITION)); + final Long logStartOffset = record.get(LOGS.LOG_START_OFFSET); + if (logStartOffset == null) { + responses.add(GetDisklessLogResponse.unknownTopicOrPartition(topicId, partition)); + } else { + responses.add(GetDisklessLogResponse.success( + topicId, + partition, + logStartOffset, + record.get(LOGS.HIGH_WATERMARK), + record.get(LOGS.DISKLESS_START_OFFSET) + )); + } + } + } + return responses; + }); + } +} diff --git a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/InitLogDisklessStartOffsetJob.java b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/InitLogDisklessStartOffsetJob.java new file mode 100644 index 0000000000..8a55b2e18e --- /dev/null +++ b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/InitLogDisklessStartOffsetJob.java @@ -0,0 +1,79 @@ +/* + * Inkless + * Copyright (C) 2024 - 2025 Aiven OY + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +package io.aiven.inkless.control_plane.postgres; + +import org.apache.kafka.common.utils.Time; + +import org.jooq.Configuration; +import org.jooq.DSLContext; + +import java.util.Set; +import java.util.function.Consumer; + +import io.aiven.inkless.control_plane.InitLogDisklessStartOffsetRequest; + +import static org.jooq.generated.Tables.LOGS; + +public class InitLogDisklessStartOffsetJob implements Runnable { + private final Time time; + private final DSLContext jooqCtx; + private final Set requests; + private final Consumer durationCallback; + + InitLogDisklessStartOffsetJob(final Time time, + final DSLContext jooqCtx, + final Set requests, + final Consumer durationCallback) { + this.time = time; + this.jooqCtx = jooqCtx; + this.requests = requests; + this.durationCallback = durationCallback; + } + + @Override + public void run() { + if (requests.isEmpty()) { + return; + } + JobUtils.run(this::runOnce, time, durationCallback); + } + + private void runOnce() { + jooqCtx.transaction((final Configuration conf) -> { + var insertStep = conf.dsl().insertInto(LOGS, + LOGS.TOPIC_ID, + LOGS.PARTITION, + LOGS.TOPIC_NAME, + LOGS.LOG_START_OFFSET, + LOGS.HIGH_WATERMARK, + LOGS.BYTE_SIZE, + LOGS.DISKLESS_START_OFFSET); + for (final var request : requests) { + insertStep = insertStep.values( + request.topicId(), + request.partition(), + request.topicName(), + request.logStartOffset(), + request.disklessStartOffset(), + 0L, + request.disklessStartOffset()); + } + insertStep.onConflictDoNothing().execute(); + }); + } +} diff --git a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/PostgresControlPlane.java b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/PostgresControlPlane.java index e76c869315..1e647d8613 100644 --- a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/PostgresControlPlane.java +++ b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/PostgresControlPlane.java @@ -57,8 +57,11 @@ import io.aiven.inkless.control_plane.FileToDelete; import io.aiven.inkless.control_plane.FindBatchRequest; import io.aiven.inkless.control_plane.FindBatchResponse; +import io.aiven.inkless.control_plane.GetDisklessLogRequest; +import io.aiven.inkless.control_plane.GetDisklessLogResponse; import io.aiven.inkless.control_plane.GetLogInfoRequest; import io.aiven.inkless.control_plane.GetLogInfoResponse; +import io.aiven.inkless.control_plane.InitLogDisklessStartOffsetRequest; import io.aiven.inkless.control_plane.ListOffsetsRequest; import io.aiven.inkless.control_plane.ListOffsetsResponse; import io.aiven.inkless.control_plane.MergedFileBatch; @@ -149,6 +152,12 @@ public void createTopicAndPartitions(final Set new TopicsAndPartitionsCreateJob(time, jobsJooqCtx, requests, pgMetrics::onTopicCreateCompleted).run(); } + @Override + public void initLogDisklessStartOffset(final Set requests) { + // Expected to be performed synchronously + new InitLogDisklessStartOffsetJob(time, jobsJooqCtx, requests, pgMetrics::onInitLogDisklessStartOffsetCompleted).run(); + } + @Override protected Iterator commitFileForValidRequests( final String objectKey, @@ -333,6 +342,20 @@ public boolean isSafeToDeleteFile(String objectKeyPath) { } } + @Override + public List getDisklessLog(final List requests) { + try { + final GetDisklessLogJob job = new GetDisklessLogJob(time, readJooqCtx, requests, pgMetrics::onGetDisklessLogCompleted); + return job.call(); + } catch (final Exception e) { + if (e instanceof ControlPlaneException) { + throw (ControlPlaneException) e; + } else { + throw new ControlPlaneException("Failed to get diskless log", e); + } + } + } + @Override public List getLogInfo(final List requests) { try { diff --git a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/PostgresControlPlaneMetrics.java b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/PostgresControlPlaneMetrics.java index 3f8d55738e..4fe7085577 100644 --- a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/PostgresControlPlaneMetrics.java +++ b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/PostgresControlPlaneMetrics.java @@ -36,6 +36,7 @@ public class PostgresControlPlaneMetrics implements Closeable { private final QueryMetrics commitFileMetrics = new QueryMetrics("CommitFile"); private final QueryMetrics commitFileMergeWorkItemMetrics = new QueryMetrics("CommitFileMergeWorkItem"); private final QueryMetrics topicCreateMetrics = new QueryMetrics("TopicCreate"); + private final QueryMetrics initLogDisklessStartOffsetMetrics = new QueryMetrics("InitLogDisklessStartOffset"); private final QueryMetrics topicDeleteMetrics = new QueryMetrics("TopicDelete"); private final QueryMetrics fileDeleteMetrics = new QueryMetrics("FilesDelete"); private final QueryMetrics listOffsetsMetrics = new QueryMetrics("ListOffsets"); @@ -46,6 +47,7 @@ public class PostgresControlPlaneMetrics implements Closeable { private final QueryMetrics releaseFileMergeWorkItemMetrics = new QueryMetrics("ReleaseFileMergeWorkItem"); private final QueryMetrics safeDeleteFileCheckMetrics = new QueryMetrics("SafeDeleteFileCheck"); private final QueryMetrics getLogInfoMetrics = new QueryMetrics("GetLogInfo"); + private final QueryMetrics getDisklessLogMetrics = new QueryMetrics("GetDisklessLog"); public PostgresControlPlaneMetrics(Time time) { this.time = Objects.requireNonNull(time, "time cannot be null"); @@ -75,6 +77,10 @@ public void onTopicCreateCompleted(Long duration) { topicCreateMetrics.record(duration); } + public void onInitLogDisklessStartOffsetCompleted(Long duration) { + initLogDisklessStartOffsetMetrics.record(duration); + } + public void onFilesDeleteCompleted(Long duration) { fileDeleteMetrics.record(duration); } @@ -111,6 +117,10 @@ public void onGetLogInfoCompleted(Long duration) { getLogInfoMetrics.record(duration); } + public void onGetDisklessLogCompleted(Long duration) { + getDisklessLogMetrics.record(duration); + } + @Override public void close() { findBatchesMetrics.remove(); @@ -118,6 +128,7 @@ public void close() { commitFileMetrics.remove(); commitFileMergeWorkItemMetrics.remove(); topicCreateMetrics.remove(); + initLogDisklessStartOffsetMetrics.remove(); topicDeleteMetrics.remove(); fileDeleteMetrics.remove(); listOffsetsMetrics.remove(); @@ -128,6 +139,7 @@ public void close() { releaseFileMergeWorkItemMetrics.remove(); safeDeleteFileCheckMetrics.remove(); getLogInfoMetrics.remove(); + getDisklessLogMetrics.remove(); } private class QueryMetrics { diff --git a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/TopicsAndPartitionsCreateJob.java b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/TopicsAndPartitionsCreateJob.java index 6e77d0aad4..c699f04dc8 100644 --- a/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/TopicsAndPartitionsCreateJob.java +++ b/storage/inkless/src/main/java/io/aiven/inkless/control_plane/postgres/TopicsAndPartitionsCreateJob.java @@ -69,10 +69,11 @@ private void runOnce() { LOGS.TOPIC_NAME, LOGS.LOG_START_OFFSET, LOGS.HIGH_WATERMARK, - LOGS.BYTE_SIZE); + LOGS.BYTE_SIZE, + LOGS.DISKLESS_START_OFFSET); for (final var request : requests) { for (int partition = 0; partition < request.numPartitions(); partition++) { - insertStep = insertStep.values(request.topicId(), partition, request.topicName(), 0L, 0L, 0L); + insertStep = insertStep.values(request.topicId(), partition, request.topicName(), 0L, 0L, 0L, 0L); } } final int rowsInserted = insertStep.onConflictDoNothing().execute(); diff --git a/storage/inkless/src/main/resources/db/migration/V11__Add_diskless_offsets_to_logs.sql b/storage/inkless/src/main/resources/db/migration/V11__Add_diskless_offsets_to_logs.sql new file mode 100644 index 0000000000..1d6cc731c9 --- /dev/null +++ b/storage/inkless/src/main/resources/db/migration/V11__Add_diskless_offsets_to_logs.sql @@ -0,0 +1,3 @@ +-- Copyright (c) 2024-2025 Aiven, Helsinki, Finland. https://aiven.io/ +ALTER TABLE logs ADD COLUMN diskless_start_offset offset_nullable_t DEFAULT NULL; +ALTER TABLE logs ADD COLUMN diskless_end_offset offset_nullable_t DEFAULT NULL; \ No newline at end of file diff --git a/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/CommitFileJobTest.java b/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/CommitFileJobTest.java index 77daba9322..4bfd1388ec 100644 --- a/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/CommitFileJobTest.java +++ b/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/CommitFileJobTest.java @@ -112,9 +112,9 @@ void simpleCommit() { assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())) .containsExactlyInAnyOrder( - new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 15L, 100L), - new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 27L, 50L) + new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 15L, 100L, null, null), + new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 27L, 50L, null, null) ); assertThat(DBUtils.getAllFiles(pgContainer.getDataSource())) @@ -168,9 +168,9 @@ void commitMultipleFiles() { assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())) .containsExactlyInAnyOrder( - new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 159L, 111L), - new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 15L + 245, 322L), - new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 27L, 50L) + new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 159L, 111L, null, null), + new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 15L + 245, 322L, null, null), + new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 27L, 50L, null, null) ); assertThat(DBUtils.getAllFiles(pgContainer.getDataSource())) @@ -215,9 +215,9 @@ void nonExistentPartition() { assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())) .containsExactlyInAnyOrder( - new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 15L, 100L), - new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 27L, 50L) + new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 15L, 100L, null, null), + new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 27L, 50L, null, null) ); assertThat(DBUtils.getAllFiles(pgContainer.getDataSource())) @@ -249,9 +249,9 @@ void simpleIdempotentCommit() { assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())) .containsExactlyInAnyOrder( - new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 15L, 100L), - new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 27L, 50L) + new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 15L, 100L, null, null), + new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 27L, 50L, null, null) ); assertThat(DBUtils.getAllFiles(pgContainer.getDataSource())) @@ -283,9 +283,9 @@ void inSequenceCommit() { assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())) .containsExactlyInAnyOrder( - new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 27L, 150L), - new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 0L, 0L) + new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 27L, 150L, null, null), + new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 0L, 0L, null, null) ); assertThat(DBUtils.getAllFiles(pgContainer.getDataSource())) @@ -325,9 +325,9 @@ void outOfOrderCommit(int lastBatchSequence, int firstBatchSequence) { assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())) .containsExactlyInAnyOrder( - new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 15L, 100L), - new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 0L, 0L) + new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 15L, 100L, null, null), + new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 0L, 0L, null, null) ); assertThat(DBUtils.getAllFiles(pgContainer.getDataSource())) @@ -357,9 +357,9 @@ void outOfOrderCommitNewEpoch() { assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())) .containsExactlyInAnyOrder( - new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 0L, 0L) + new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 0L, 0L, null, null) ); // The file will be deleted because its only batch is rejected. @@ -389,9 +389,9 @@ void invalidProducerEpoch() { assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())) .containsExactlyInAnyOrder( - new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 15L, 100L), - new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 0L, 0L) + new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 0L, 15L, 100L, null, null), + new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 0L, 0L, null, null) ); assertThat(DBUtils.getAllFiles(pgContainer.getDataSource())) diff --git a/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/DeleteRecordsJobTest.java b/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/DeleteRecordsJobTest.java index b6eb541544..7762890f39 100644 --- a/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/DeleteRecordsJobTest.java +++ b/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/DeleteRecordsJobTest.java @@ -185,10 +185,10 @@ void deleteRecordsFromMultipleTopics(final List order) { assertThat(responses).containsExactlyElementsOf(expectedResponses); assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())).containsExactlyInAnyOrder( - new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 18L, 36L, (long) file2Batch1Size + file3Batch1Size), - new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 24L, 24L, 0L), - new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID_2, 0, TOPIC_2, 0L, 24L, (long) file2Batch2Size + file3Batch3Size) + new LogsRecord(TOPIC_ID_0, 0, TOPIC_0, 18L, 36L, (long) file2Batch1Size + file3Batch1Size, null, null), + new LogsRecord(TOPIC_ID_0, 1, TOPIC_0, 24L, 24L, 0L, null, null), + new LogsRecord(TOPIC_ID_1, 0, TOPIC_1, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID_2, 0, TOPIC_2, 0L, 24L, (long) file2Batch2Size + file3Batch3Size, null, null) ); assertThat(DBUtils.getAllBatches(pgContainer.getDataSource())).containsExactlyInAnyOrder( diff --git a/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/DeleteTopicJobTest.java b/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/DeleteTopicJobTest.java index 522d8d6592..f25e5ec4e7 100644 --- a/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/DeleteTopicJobTest.java +++ b/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/DeleteTopicJobTest.java @@ -149,7 +149,7 @@ void deleteMultipleTopics() { // The logs of the deleted topics must be gone, i.e. only TOPIC_2 remains. assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())).containsExactly( - new LogsRecord(TOPIC_ID_2, 0, TOPIC_2, 0L, 24L, (long) file2Batch2Size + file3Batch3Size) + new LogsRecord(TOPIC_ID_2, 0, TOPIC_2, 0L, 24L, (long) file2Batch2Size + file3Batch3Size, null, null) ); // The batches of the deleted topics must be gone, i.e. only TOPIC_2 remains. diff --git a/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/InitLogDisklessStartOffsetJobTest.java b/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/InitLogDisklessStartOffsetJobTest.java new file mode 100644 index 0000000000..c793de728a --- /dev/null +++ b/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/InitLogDisklessStartOffsetJobTest.java @@ -0,0 +1,164 @@ +/* + * Inkless + * Copyright (C) 2024 - 2025 Aiven OY + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +package io.aiven.inkless.control_plane.postgres; + +import org.apache.kafka.common.Uuid; +import org.apache.kafka.common.utils.Time; + +import org.jooq.DSLContext; +import org.jooq.SQLDialect; +import org.jooq.generated.tables.records.LogsRecord; +import org.jooq.impl.DSL; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInfo; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import java.sql.Connection; +import java.sql.SQLException; +import java.util.Set; + +import io.aiven.inkless.control_plane.InitLogDisklessStartOffsetRequest; +import io.aiven.inkless.test_utils.InklessPostgreSQLContainer; +import io.aiven.inkless.test_utils.PostgreSQLTestContainer; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.jooq.generated.Tables.LOGS; + +@Testcontainers +@ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.STRICT_STUBS) +class InitLogDisklessStartOffsetJobTest { + @Container + static final InklessPostgreSQLContainer pgContainer = PostgreSQLTestContainer.container(); + + static final String TOPIC_1 = "topic1"; + static final String TOPIC_2 = "topic2"; + static final Uuid TOPIC_ID1 = new Uuid(10, 12); + static final Uuid TOPIC_ID2 = new Uuid(555, 333); + + @BeforeEach + void setUp(final TestInfo testInfo) { + pgContainer.createDatabase(testInfo); + pgContainer.migrate(); + } + + @AfterEach + void tearDown() { + pgContainer.tearDown(); + } + + @Test + void empty() { + final InitLogDisklessStartOffsetJob job = new InitLogDisklessStartOffsetJob( + Time.SYSTEM, pgContainer.getJooqCtx(), Set.of(), durationMs -> {}); + job.run(); + assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())).isEmpty(); + } + + @Test + void createLogsWithDisklessStartOffset() { + final Set requests = Set.of( + // logStartOffset, disklessStartOffset (highWatermark) + new InitLogDisklessStartOffsetRequest(TOPIC_ID1, TOPIC_1, 0, 50L, 100L), + new InitLogDisklessStartOffsetRequest(TOPIC_ID1, TOPIC_1, 1, 150L, 200L), + new InitLogDisklessStartOffsetRequest(TOPIC_ID2, TOPIC_2, 0, 25L, 50L) + ); + final InitLogDisklessStartOffsetJob job = new InitLogDisklessStartOffsetJob( + Time.SYSTEM, pgContainer.getJooqCtx(), requests, durationMs -> {}); + job.run(); + + assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())).containsExactlyInAnyOrder( + // LogsRecord: topicId, partition, topicName, logStartOffset, highWatermark, byteSize, disklessStartOffset, disklessEndOffset + new LogsRecord(TOPIC_ID1, 0, TOPIC_1, 50L, 100L, 0L, 100L, null), + new LogsRecord(TOPIC_ID1, 1, TOPIC_1, 150L, 200L, 0L, 200L, null), + new LogsRecord(TOPIC_ID2, 0, TOPIC_2, 25L, 50L, 0L, 50L, null) + ); + } + + @Test + void doesNotOverwriteExistingLog() throws SQLException { + // Create log that already exists + try (final Connection connection = pgContainer.getDataSource().getConnection()) { + final DSLContext ctx = DSL.using(connection, SQLDialect.POSTGRES); + ctx.insertInto(LOGS, + LOGS.TOPIC_ID, LOGS.PARTITION, LOGS.TOPIC_NAME, LOGS.LOG_START_OFFSET, LOGS.HIGH_WATERMARK, LOGS.BYTE_SIZE, LOGS.DISKLESS_START_OFFSET + ).values( + TOPIC_ID1, 0, TOPIC_1, 0L, 100L, 999L, 50L + ).execute(); + connection.commit(); + } + + // Try to create logs - existing one should not be overwritten + final Set requests = Set.of( + new InitLogDisklessStartOffsetRequest(TOPIC_ID1, TOPIC_1, 0, 100L, 100L), // Should not overwrite existing + new InitLogDisklessStartOffsetRequest(TOPIC_ID1, TOPIC_1, 1, 200L, 200L) // Should be created + ); + final InitLogDisklessStartOffsetJob job = new InitLogDisklessStartOffsetJob( + Time.SYSTEM, pgContainer.getJooqCtx(), requests, durationMs -> {}); + job.run(); + + assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())).containsExactlyInAnyOrder( + new LogsRecord(TOPIC_ID1, 0, TOPIC_1, 0L, 100L, 999L, 50L, null), // Unchanged + new LogsRecord(TOPIC_ID1, 1, TOPIC_1, 200L, 200L, 0L, 200L, null) // Created + ); + } + + @Test + void idempotentExecution() { + final Set requests = Set.of( + new InitLogDisklessStartOffsetRequest(TOPIC_ID1, TOPIC_1, 0, 100L, 100L) + ); + + // First execution + final InitLogDisklessStartOffsetJob job1 = new InitLogDisklessStartOffsetJob( + Time.SYSTEM, pgContainer.getJooqCtx(), requests, durationMs -> {}); + job1.run(); + + assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())).containsExactlyInAnyOrder( + new LogsRecord(TOPIC_ID1, 0, TOPIC_1, 100L, 100L, 0L, 100L, null) + ); + + // Second execution with same value - should not change anything + final InitLogDisklessStartOffsetJob job2 = new InitLogDisklessStartOffsetJob( + Time.SYSTEM, pgContainer.getJooqCtx(), requests, durationMs -> {}); + job2.run(); + + assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())).containsExactlyInAnyOrder( + new LogsRecord(TOPIC_ID1, 0, TOPIC_1, 100L, 100L, 0L, 100L, null) + ); + + // Third execution with different value - should not overwrite + final Set differentRequests = Set.of( + new InitLogDisklessStartOffsetRequest(TOPIC_ID1, TOPIC_1, 0, 999L, 999L) + ); + final InitLogDisklessStartOffsetJob job3 = new InitLogDisklessStartOffsetJob( + Time.SYSTEM, pgContainer.getJooqCtx(), differentRequests, durationMs -> {}); + job3.run(); + + assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())).containsExactlyInAnyOrder( + new LogsRecord(TOPIC_ID1, 0, TOPIC_1, 100L, 100L, 0L, 100L, null) // Still 100L, not 999L + ); + } +} diff --git a/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/TopicsAndPartitionsCreateJobTest.java b/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/TopicsAndPartitionsCreateJobTest.java index 7e818812c9..1dfaaac5e9 100644 --- a/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/TopicsAndPartitionsCreateJobTest.java +++ b/storage/inkless/src/test/java/io/aiven/inkless/control_plane/postgres/TopicsAndPartitionsCreateJobTest.java @@ -96,18 +96,18 @@ void createTopicsAndPartition() { final TopicsAndPartitionsCreateJob job1 = new TopicsAndPartitionsCreateJob(Time.SYSTEM, pgContainer.getJooqCtx(), createTopicAndPartitionsRequests, durationMs -> {}); job1.run(); assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())).containsExactlyInAnyOrder( - new LogsRecord(TOPIC_ID1, 0, TOPIC_1, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID1, 1, TOPIC_1, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID2, 0, TOPIC_2, 0L, 0L, 0L) + new LogsRecord(TOPIC_ID1, 0, TOPIC_1, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID1, 1, TOPIC_1, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID2, 0, TOPIC_2, 0L, 0L, 0L, null, null) ); // Repetition doesn't affect anything. final TopicsAndPartitionsCreateJob job2 = new TopicsAndPartitionsCreateJob(Time.SYSTEM, pgContainer.getJooqCtx(), createTopicAndPartitionsRequests, durationMs -> {}); job2.run(); assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())).containsExactlyInAnyOrder( - new LogsRecord(TOPIC_ID1, 0, TOPIC_1, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID1, 1, TOPIC_1, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID2, 0, TOPIC_2, 0L, 0L, 0L) + new LogsRecord(TOPIC_ID1, 0, TOPIC_1, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID1, 1, TOPIC_1, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID2, 0, TOPIC_2, 0L, 0L, 0L, null, null) ); } @@ -128,10 +128,10 @@ void createPartitionAfterTopic() { job2.run(); assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())).containsExactlyInAnyOrder( - new LogsRecord(TOPIC_ID1, 0, TOPIC_1, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID1, 1, TOPIC_1, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID2, 0, TOPIC_2, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID2, 1, TOPIC_2, 0L, 0L, 0L) + new LogsRecord(TOPIC_ID1, 0, TOPIC_1, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID1, 1, TOPIC_1, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID2, 0, TOPIC_2, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID2, 1, TOPIC_2, 0L, 0L, 0L, null, null) ); } @@ -157,9 +157,9 @@ void existingRecordsNotAffected() throws SQLException { job1.run(); assertThat(DBUtils.getAllLogs(pgContainer.getDataSource())).containsExactlyInAnyOrder( - new LogsRecord(TOPIC_ID1, 0, TOPIC_1, 101L, 201L, 999L), // unaffected - new LogsRecord(TOPIC_ID1, 1, TOPIC_1, 0L, 0L, 0L), - new LogsRecord(TOPIC_ID2, 0, TOPIC_2, 102L, 202L, 1999L) // unaffected + new LogsRecord(TOPIC_ID1, 0, TOPIC_1, 101L, 201L, 999L, null, null), // unaffected + new LogsRecord(TOPIC_ID1, 1, TOPIC_1, 0L, 0L, 0L, null, null), + new LogsRecord(TOPIC_ID2, 0, TOPIC_2, 102L, 202L, 1999L, null, null) // unaffected ); } } diff --git a/storage/src/main/java/org/apache/kafka/storage/internals/log/LogConfig.java b/storage/src/main/java/org/apache/kafka/storage/internals/log/LogConfig.java index 838ab49929..5ba7999c81 100644 --- a/storage/src/main/java/org/apache/kafka/storage/internals/log/LogConfig.java +++ b/storage/src/main/java/org/apache/kafka/storage/internals/log/LogConfig.java @@ -396,6 +396,10 @@ public int initFileSize() { else return 0; } + + public boolean disklessEnable() { + return getBoolean(TopicConfig.DISKLESS_ENABLE_CONFIG); + } public boolean remoteStorageEnable() { return remoteLogConfig.remoteStorageEnable; @@ -512,11 +516,11 @@ private static void validateDiskless(Map existingConfigs, Optional.ofNullable((Boolean) newConfigs.get(TopicConfig.DISKLESS_ENABLE_CONFIG)) .ifPresent(isBeingEnabled -> { if (isBeingEnabled) { - // diskless.enable=true -> diskless.enable must be already set to true - if (wasDiskless.isPresent() && !wasDiskless.get()) { - // cannot change from diskless.enable = false to diskless.enable = true - throw new InvalidConfigurationException("It is invalid to enable diskless"); - } +// // diskless.enable=true -> diskless.enable must be already set to true +// if (wasDiskless.isPresent() && !wasDiskless.get()) { +// // cannot change from diskless.enable = false to diskless.enable = true +// throw new InvalidConfigurationException("It is invalid to enable diskless"); +// } if (isRemoteLogStorageEnabled) { throw new InvalidConfigurationException("Diskless and remote storage cannot be enabled simultaneously"); diff --git a/streams/integration-tests/bin/test/log4j2.yaml b/streams/integration-tests/bin/test/log4j2.yaml new file mode 100644 index 0000000000..0942036a33 --- /dev/null +++ b/streams/integration-tests/bin/test/log4j2.yaml @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Configuration: + Properties: + Property: + - name: "logPattern" + value: "[%d] %p %m (%c:%L)%n" + + Appenders: + Console: + name: STDOUT + PatternLayout: + pattern: "${logPattern}" + + Loggers: + Root: + level: INFO + AppenderRef: + - ref: STDOUT + Logger: + - name: kafka + level: ERROR + + - name: state.change.logger + level: ERROR + + - name: org.apache.kafka + level: ERROR + + - name: org.apache.kafka.clients + level: ERROR + + - name: org.apache.kafka.clients.consumer + level: INFO + + - name: org.apache.kafka.clients.producer + level: INFO + + - name: org.apache.kafka.streams + level: INFO + + - name: org.apache.kafka.clients.producer.ProducerConfig + level: ERROR + + - name: org.apache.kafka.clients.consumer.ConsumerConfig + level: ERROR + + - name: org.apache.kafka.clients.admin.AdminClientConfig + level: ERROR + + - name: org.apache.kafka.streams.StreamsConfig + level: ERROR diff --git a/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala b/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala new file mode 100644 index 0000000000..4cfc811728 --- /dev/null +++ b/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.integration + +import org.apache.kafka.streams.integration.utils.StreamToTableJoinScalaIntegrationTestBase +import org.apache.kafka.streams.scala.ImplicitConversions._ +import org.apache.kafka.streams.scala.StreamsBuilder +import org.apache.kafka.streams.scala.kstream._ +import org.apache.kafka.streams.scala.serialization.{Serdes => NewSerdes} +import org.apache.kafka.streams.{KafkaStreams, KeyValue, StreamsConfig} +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api._ + +import java.util.Properties + +/** + * Test suite that does an example to demonstrate stream-table joins in Kafka Streams + *

+ * The suite contains the test case using Scala APIs `testShouldCountClicksPerRegion` and the same test case using the + * Java APIs `testShouldCountClicksPerRegionJava`. The idea is to demonstrate that both generate the same result. + */ +@Tag("integration") +class StreamToTableJoinScalaIntegrationTestImplicitSerdes extends StreamToTableJoinScalaIntegrationTestBase { + + @Test def testShouldCountClicksPerRegion(): Unit = { + + // DefaultSerdes brings into scope implicit serdes (mostly for primitives) that will set up all Grouped, Produced, + // Consumed and Joined instances. So all APIs below that accept Grouped, Produced, Consumed or Joined will + // get these instances automatically + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamsConfiguration: Properties = getStreamsConfiguration() + + val builder = new StreamsBuilder() + + val userClicksStream: KStream[String, Long] = builder.stream(userClicksTopic) + + val userRegionsTable: KTable[String, String] = builder.table(userRegionsTopic) + + // Compute the total per region by summing the individual click counts per region. + val clicksPerRegion: KTable[String, Long] = + userClicksStream + + // Join the stream against the table. + .leftJoin(userRegionsTable)((clicks, region) => (if (region == null) "UNKNOWN" else region, clicks)) + + // Change the stream from -> to -> + .map((_, regionWithClicks) => regionWithClicks) + + // Compute the total per region by summing the individual click counts per region. + .groupByKey + .reduce(_ + _) + + // Write the (continuously updating) results to the output topic. + clicksPerRegion.toStream.to(outputTopic) + + val streams: KafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration) + streams.start() + + val actualClicksPerRegion: java.util.List[KeyValue[String, Long]] = + produceNConsume(userClicksTopic, userRegionsTopic, outputTopic) + + assertTrue(!actualClicksPerRegion.isEmpty, "Expected to process some data") + + streams.close() + } + + @Test + def testShouldCountClicksPerRegionWithNamedRepartitionTopic(): Unit = { + + // DefaultSerdes brings into scope implicit serdes (mostly for primitives) that will set up all Grouped, Produced, + // Consumed and Joined instances. So all APIs below that accept Grouped, Produced, Consumed or Joined will + // get these instances automatically + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamsConfiguration: Properties = getStreamsConfiguration() + + val builder = new StreamsBuilder() + + val userClicksStream: KStream[String, Long] = builder.stream(userClicksTopic) + + val userRegionsTable: KTable[String, String] = builder.table(userRegionsTopic) + + // Compute the total per region by summing the individual click counts per region. + val clicksPerRegion: KTable[String, Long] = + userClicksStream + + // Join the stream against the table. + .leftJoin(userRegionsTable)((clicks, region) => (if (region == null) "UNKNOWN" else region, clicks)) + + // Change the stream from -> to -> + .map((_, regionWithClicks) => regionWithClicks) + + // Compute the total per region by summing the individual click counts per region. + .groupByKey + .reduce(_ + _) + + // Write the (continuously updating) results to the output topic. + clicksPerRegion.toStream.to(outputTopic) + + val streams: KafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration) + streams.start() + + val actualClicksPerRegion: java.util.List[KeyValue[String, Long]] = + produceNConsume(userClicksTopic, userRegionsTopic, outputTopic) + + assertTrue(!actualClicksPerRegion.isEmpty, "Expected to process some data") + + streams.close() + } + + @Test + def testShouldCountClicksPerRegionJava(): Unit = { + + import org.apache.kafka.streams.kstream.{KStream => KStreamJ, KTable => KTableJ, _} + import org.apache.kafka.streams.{KafkaStreams => KafkaStreamsJ, StreamsBuilder => StreamsBuilderJ} + + import java.lang.{Long => JLong} + + val streamsConfiguration: Properties = getStreamsConfiguration() + + streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, NewSerdes.stringSerde.getClass.getName) + streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, NewSerdes.stringSerde.getClass.getName) + + val builder: StreamsBuilderJ = new StreamsBuilderJ() + + val userClicksStream: KStreamJ[String, JLong] = + builder.stream[String, JLong](userClicksTopicJ, Consumed.`with`(NewSerdes.stringSerde, NewSerdes.javaLongSerde)) + + val userRegionsTable: KTableJ[String, String] = + builder.table[String, String](userRegionsTopicJ, Consumed.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) + + // Join the stream against the table. + val valueJoinerJ: ValueJoiner[JLong, String, (String, JLong)] = + (clicks: JLong, region: String) => (if (region == null) "UNKNOWN" else region, clicks) + val userClicksJoinRegion: KStreamJ[String, (String, JLong)] = userClicksStream.leftJoin( + userRegionsTable, + valueJoinerJ, + Joined.`with`[String, JLong, String](NewSerdes.stringSerde, NewSerdes.javaLongSerde, NewSerdes.stringSerde) + ) + + // Change the stream from -> to -> + val clicksByRegion: KStreamJ[String, JLong] = userClicksJoinRegion.map { (_, regionWithClicks) => + new KeyValue(regionWithClicks._1, regionWithClicks._2) + } + + // Compute the total per region by summing the individual click counts per region. + val clicksPerRegion: KTableJ[String, JLong] = clicksByRegion + .groupByKey(Grouped.`with`(NewSerdes.stringSerde, NewSerdes.javaLongSerde)) + .reduce((v1, v2) => v1 + v2) + + // Write the (continuously updating) results to the output topic. + clicksPerRegion.toStream.to(outputTopicJ, Produced.`with`(NewSerdes.stringSerde, NewSerdes.javaLongSerde)) + + val streams = new KafkaStreamsJ(builder.build(), streamsConfiguration) + + streams.start() + produceNConsume(userClicksTopicJ, userRegionsTopicJ, outputTopicJ) + streams.close() + } +} diff --git a/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/WordCountTest.scala b/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/WordCountTest.scala new file mode 100644 index 0000000000..3e9813dda2 --- /dev/null +++ b/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/WordCountTest.scala @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.integration + +import java.util.Properties +import java.util.regex.Pattern +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api._ +import org.apache.kafka.streams.scala.serialization.{Serdes => NewSerdes} +import org.apache.kafka.streams.scala.ImplicitConversions._ +import org.apache.kafka.streams.scala.StreamsBuilder +import org.apache.kafka.streams.{KafkaStreams, KeyValue, StreamsConfig} +import org.apache.kafka.streams.scala.kstream._ +import org.apache.kafka.streams.integration.utils.{EmbeddedKafkaCluster, IntegrationTestUtils} +import org.apache.kafka.clients.consumer.ConsumerConfig +import org.apache.kafka.clients.producer.ProducerConfig +import org.apache.kafka.common.utils.{MockTime, Utils} +import org.apache.kafka.common.serialization.{LongDeserializer, StringDeserializer, StringSerializer} +import org.apache.kafka.test.TestUtils +import org.junit.jupiter.api.Tag + +import java.io.File + +/** + * Test suite that does a classic word count example. + *

+ * The suite contains the test case using Scala APIs `testShouldCountWords` and the same test case using the + * Java APIs `testShouldCountWordsJava`. The idea is to demonstrate that both generate the same result. + */ +@Tag("integration") +class WordCountTest extends WordCountTestData { + + private val cluster: EmbeddedKafkaCluster = new EmbeddedKafkaCluster(1) + + final private val alignedTime = (System.currentTimeMillis() / 1000 + 1) * 1000 + private val mockTime: MockTime = cluster.time + mockTime.setCurrentTimeMs(alignedTime) + + private val testFolder: File = TestUtils.tempDirectory() + + @BeforeEach + def startKafkaCluster(): Unit = { + cluster.start() + cluster.createTopic(inputTopic) + cluster.createTopic(outputTopic) + cluster.createTopic(inputTopicJ) + cluster.createTopic(outputTopicJ) + } + + @AfterEach + def stopKafkaCluster(): Unit = { + cluster.stop() + Utils.delete(testFolder) + } + + @Test + def testShouldCountWords(): Unit = { + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamsConfiguration = getStreamsConfiguration() + + val streamBuilder = new StreamsBuilder + val textLines = streamBuilder.stream[String, String](inputTopic) + + val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) + + // generate word counts + val wordCounts: KTable[String, Long] = + textLines + .flatMapValues(v => pattern.split(v.toLowerCase)) + .groupBy((_, v) => v) + .count() + + // write to output topic + wordCounts.toStream.to(outputTopic) + + val streams = new KafkaStreams(streamBuilder.build(), streamsConfiguration) + streams.start() + + // produce and consume synchronously + val actualWordCounts: java.util.List[KeyValue[String, Long]] = produceNConsume(inputTopic, outputTopic) + + streams.close() + + import scala.jdk.CollectionConverters._ + assertEquals(actualWordCounts.asScala.take(expectedWordCounts.size).sortBy(_.key), expectedWordCounts.sortBy(_.key)) + } + + @Test + def testShouldCountWordsMaterialized(): Unit = { + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamsConfiguration = getStreamsConfiguration() + + val streamBuilder = new StreamsBuilder + val textLines = streamBuilder.stream[String, String](inputTopic) + + val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) + + // generate word counts + val wordCounts: KTable[String, Long] = + textLines + .flatMapValues(v => pattern.split(v.toLowerCase)) + .groupBy((k, v) => v) + .count()(Materialized.as("word-count")) + + // write to output topic + wordCounts.toStream.to(outputTopic) + + val streams = new KafkaStreams(streamBuilder.build(), streamsConfiguration) + streams.start() + + // produce and consume synchronously + val actualWordCounts: java.util.List[KeyValue[String, Long]] = produceNConsume(inputTopic, outputTopic) + + streams.close() + + import scala.jdk.CollectionConverters._ + assertEquals(actualWordCounts.asScala.take(expectedWordCounts.size).sortBy(_.key), expectedWordCounts.sortBy(_.key)) + } + + @Test + def testShouldCountWordsJava(): Unit = { + + import org.apache.kafka.streams.{KafkaStreams => KafkaStreamsJ, StreamsBuilder => StreamsBuilderJ} + import org.apache.kafka.streams.kstream.{ + KTable => KTableJ, + KStream => KStreamJ, + KGroupedStream => KGroupedStreamJ, + _ + } + import scala.jdk.CollectionConverters._ + + val streamsConfiguration = getStreamsConfiguration() + streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, NewSerdes.stringSerde.getClass.getName) + streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, NewSerdes.stringSerde.getClass.getName) + + val streamBuilder = new StreamsBuilderJ + val textLines: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopicJ) + + val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) + + val splits: KStreamJ[String, String] = textLines.flatMapValues { line => + pattern.split(line.toLowerCase).toBuffer.asJava + } + + val grouped: KGroupedStreamJ[String, String] = splits.groupBy { (_, v) => + v + } + + val wordCounts: KTableJ[String, java.lang.Long] = grouped.count() + + wordCounts.toStream.to(outputTopicJ, Produced.`with`(NewSerdes.stringSerde, NewSerdes.javaLongSerde)) + + val streams: KafkaStreamsJ = new KafkaStreamsJ(streamBuilder.build(), streamsConfiguration) + streams.start() + + val actualWordCounts: java.util.List[KeyValue[String, Long]] = produceNConsume(inputTopicJ, outputTopicJ) + + streams.close() + + assertEquals(actualWordCounts.asScala.take(expectedWordCounts.size).sortBy(_.key), expectedWordCounts.sortBy(_.key)) + } + + private def getStreamsConfiguration(): Properties = { + val streamsConfiguration: Properties = new Properties() + + streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-test") + streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) + streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "10000") + streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") + streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, testFolder.getPath) + streamsConfiguration + } + + private def getProducerConfig(): Properties = { + val p = new Properties() + p.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) + p.put(ProducerConfig.ACKS_CONFIG, "all") + p.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) + p.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) + p + } + + private def getConsumerConfig(): Properties = { + val p = new Properties() + p.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) + p.put(ConsumerConfig.GROUP_ID_CONFIG, "wordcount-scala-integration-test-standard-consumer") + p.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") + p.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer]) + p.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[LongDeserializer]) + p + } + + private def produceNConsume(inputTopic: String, outputTopic: String): java.util.List[KeyValue[String, Long]] = { + + val linesProducerConfig: Properties = getProducerConfig() + + import scala.jdk.CollectionConverters._ + IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues.asJava, linesProducerConfig, mockTime) + + val consumerConfig = getConsumerConfig() + + IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, expectedWordCounts.size) + } +} + +trait WordCountTestData { + val inputTopic = s"inputTopic" + val outputTopic = s"outputTopic" + val inputTopicJ = s"inputTopicJ" + val outputTopicJ = s"outputTopicJ" + + val inputValues = List( + "Hello Kafka Streams", + "All streams lead to Kafka", + "Join Kafka Summit", + "И теперь пошли русские слова" + ) + + val expectedWordCounts: List[KeyValue[String, Long]] = List( + new KeyValue("hello", 1L), + new KeyValue("all", 1L), + new KeyValue("streams", 2L), + new KeyValue("lead", 1L), + new KeyValue("to", 1L), + new KeyValue("join", 1L), + new KeyValue("kafka", 3L), + new KeyValue("summit", 1L), + new KeyValue("и", 1L), + new KeyValue("теперь", 1L), + new KeyValue("пошли", 1L), + new KeyValue("русские", 1L), + new KeyValue("слова", 1L) + ) +} diff --git a/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinScalaIntegrationTestBase.scala b/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinScalaIntegrationTestBase.scala new file mode 100644 index 0000000000..f3aec5784c --- /dev/null +++ b/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinScalaIntegrationTestBase.scala @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.integration.utils + +import org.apache.kafka.clients.consumer.ConsumerConfig +import org.apache.kafka.clients.producer.ProducerConfig +import org.apache.kafka.common.serialization._ +import org.apache.kafka.common.utils.{MockTime, Utils} +import org.apache.kafka.streams._ +import org.apache.kafka.test.TestUtils +import org.junit.jupiter.api._ + +import java.io.File +import java.util.Properties + +/** + * Test suite base that prepares Kafka cluster for stream-table joins in Kafka Streams + *

+ */ +@Tag("integration") +class StreamToTableJoinScalaIntegrationTestBase extends StreamToTableJoinTestData { + + private val cluster: EmbeddedKafkaCluster = new EmbeddedKafkaCluster(1) + + final private val alignedTime = (System.currentTimeMillis() / 1000 + 1) * 1000 + private val mockTime: MockTime = cluster.time + mockTime.setCurrentTimeMs(alignedTime) + + private val testFolder: File = TestUtils.tempDirectory() + + @BeforeEach + def startKafkaCluster(): Unit = { + cluster.start() + cluster.createTopic(userClicksTopic) + cluster.createTopic(userRegionsTopic) + cluster.createTopic(outputTopic) + cluster.createTopic(userClicksTopicJ) + cluster.createTopic(userRegionsTopicJ) + cluster.createTopic(outputTopicJ) + } + + @AfterEach + def stopKafkaCluster(): Unit = { + cluster.stop() + Utils.delete(testFolder) + } + + def getStreamsConfiguration(): Properties = { + val streamsConfiguration: Properties = new Properties() + + streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "stream-table-join-scala-integration-test") + streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) + streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "1000") + streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") + streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, testFolder.getPath) + + streamsConfiguration + } + + private def getUserRegionsProducerConfig(): Properties = { + val p = new Properties() + p.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) + p.put(ProducerConfig.ACKS_CONFIG, "all") + p.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) + p.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) + p + } + + private def getUserClicksProducerConfig(): Properties = { + val p = new Properties() + p.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) + p.put(ProducerConfig.ACKS_CONFIG, "all") + p.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) + p.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[LongSerializer]) + p + } + + private def getConsumerConfig(): Properties = { + val p = new Properties() + p.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) + p.put(ConsumerConfig.GROUP_ID_CONFIG, "join-scala-integration-test-standard-consumer") + p.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") + p.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer]) + p.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[LongDeserializer]) + p + } + + def produceNConsume( + userClicksTopic: String, + userRegionsTopic: String, + outputTopic: String, + waitTillRecordsReceived: Boolean = true + ): java.util.List[KeyValue[String, Long]] = { + + import _root_.scala.jdk.CollectionConverters._ + + // Publish user-region information. + val userRegionsProducerConfig: Properties = getUserRegionsProducerConfig() + IntegrationTestUtils.produceKeyValuesSynchronously( + userRegionsTopic, + userRegions.asJava, + userRegionsProducerConfig, + mockTime, + false + ) + + // Publish user-click information. + val userClicksProducerConfig: Properties = getUserClicksProducerConfig() + IntegrationTestUtils.produceKeyValuesSynchronously( + userClicksTopic, + userClicks.asJava, + userClicksProducerConfig, + mockTime, + false + ) + + if (waitTillRecordsReceived) { + // consume and verify result + val consumerConfig = getConsumerConfig() + + IntegrationTestUtils.waitUntilFinalKeyValueRecordsReceived( + consumerConfig, + outputTopic, + expectedClicksPerRegion.asJava + ) + } else { + java.util.Collections.emptyList() + } + } +} diff --git a/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinTestData.scala b/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinTestData.scala new file mode 100644 index 0000000000..4e8a2f024a --- /dev/null +++ b/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinTestData.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.integration.utils + +import org.apache.kafka.streams.KeyValue + +trait StreamToTableJoinTestData { + val brokers = "localhost:9092" + + val userClicksTopic = s"user-clicks" + val userRegionsTopic = s"user-regions" + val outputTopic = s"output-topic" + + val userClicksTopicJ = s"user-clicks-j" + val userRegionsTopicJ = s"user-regions-j" + val outputTopicJ = s"output-topic-j" + + // Input 1: Clicks per user (multiple records allowed per user). + val userClicks: Seq[KeyValue[String, Long]] = Seq( + new KeyValue("alice", 13L), + new KeyValue("bob", 4L), + new KeyValue("chao", 25L), + new KeyValue("bob", 19L), + new KeyValue("dave", 56L), + new KeyValue("eve", 78L), + new KeyValue("alice", 40L), + new KeyValue("fang", 99L) + ) + + // Input 2: Region per user (multiple records allowed per user). + val userRegions: Seq[KeyValue[String, String]] = Seq( + new KeyValue("alice", "asia"), /* Alice lived in Asia originally... */ + new KeyValue("bob", "americas"), + new KeyValue("chao", "asia"), + new KeyValue("dave", "europe"), + new KeyValue("alice", "europe"), /* ...but moved to Europe some time later. */ + new KeyValue("eve", "americas"), + new KeyValue("fang", "asia") + ) + + val expectedClicksPerRegion: Seq[KeyValue[String, Long]] = Seq( + new KeyValue("americas", 101L), + new KeyValue("europe", 109L), + new KeyValue("asia", 124L) + ) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala new file mode 100644 index 0000000000..527f0544d0 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala + +import org.apache.kafka.streams.KeyValue +import org.apache.kafka.streams.kstream._ +import scala.jdk.CollectionConverters._ +import java.lang.{Iterable => JIterable} + +import org.apache.kafka.streams.processor.ProcessorContext + +/** + * Implicit classes that offer conversions of Scala function literals to SAM (Single Abstract Method) objects in Java. + * These make the Scala APIs much more expressive, with less boilerplate and more succinct. + */ +private[scala] object FunctionsCompatConversions { + + implicit class ForeachActionFromFunction[K, V](val p: (K, V) => Unit) extends AnyVal { + def asForeachAction: ForeachAction[K, V] = (key: K, value: V) => p(key, value) + } + + implicit class PredicateFromFunction[K, V](val p: (K, V) => Boolean) extends AnyVal { + def asPredicate: Predicate[K, V] = (key: K, value: V) => p(key, value) + } + + implicit class MapperFromFunction[T, U, VR](val f: (T, U) => VR) extends AnyVal { + def asKeyValueMapper: KeyValueMapper[T, U, VR] = (key: T, value: U) => f(key, value) + def asValueJoiner: ValueJoiner[T, U, VR] = (value1: T, value2: U) => f(value1, value2) + } + + implicit class KeyValueMapperFromFunction[K, V, KR, VR](val f: (K, V) => (KR, VR)) extends AnyVal { + def asKeyValueMapper: KeyValueMapper[K, V, KeyValue[KR, VR]] = (key: K, value: V) => { + val (kr, vr) = f(key, value) + KeyValue.pair(kr, vr) + } + } + + implicit class FunctionFromFunction[V, VR](val f: V => VR) extends AnyVal { + def asJavaFunction: java.util.function.Function[V, VR] = (value: V) => f(value) + } + + implicit class ValueMapperFromFunction[V, VR](val f: V => VR) extends AnyVal { + def asValueMapper: ValueMapper[V, VR] = (value: V) => f(value) + } + + implicit class FlatValueMapperFromFunction[V, VR](val f: V => Iterable[VR]) extends AnyVal { + def asValueMapper: ValueMapper[V, JIterable[VR]] = (value: V) => f(value).asJava + } + + implicit class ValueMapperWithKeyFromFunction[K, V, VR](val f: (K, V) => VR) extends AnyVal { + def asValueMapperWithKey: ValueMapperWithKey[K, V, VR] = (readOnlyKey: K, value: V) => f(readOnlyKey, value) + } + + implicit class FlatValueMapperWithKeyFromFunction[K, V, VR](val f: (K, V) => Iterable[VR]) extends AnyVal { + def asValueMapperWithKey: ValueMapperWithKey[K, V, JIterable[VR]] = + (readOnlyKey: K, value: V) => f(readOnlyKey, value).asJava + } + + implicit class AggregatorFromFunction[K, V, VA](val f: (K, V, VA) => VA) extends AnyVal { + def asAggregator: Aggregator[K, V, VA] = (key: K, value: V, aggregate: VA) => f(key, value, aggregate) + } + + implicit class MergerFromFunction[K, VR](val f: (K, VR, VR) => VR) extends AnyVal { + def asMerger: Merger[K, VR] = (aggKey: K, aggOne: VR, aggTwo: VR) => f(aggKey, aggOne, aggTwo) + } + + implicit class ReducerFromFunction[V](val f: (V, V) => V) extends AnyVal { + def asReducer: Reducer[V] = (value1: V, value2: V) => f(value1, value2) + } + + implicit class InitializerFromFunction[VA](val f: () => VA) extends AnyVal { + def asInitializer: Initializer[VA] = () => f() + } + + @deprecated( + since = "4.0.0" + ) + implicit class TransformerSupplierFromFunction[K, V, VO](val f: () => Transformer[K, V, VO]) extends AnyVal { + def asTransformerSupplier: TransformerSupplier[K, V, VO] = () => f() + } + + @deprecated( + since = "4.0.0" + ) + implicit class TransformerSupplierAsJava[K, V, VO](val supplier: TransformerSupplier[K, V, Iterable[VO]]) + extends AnyVal { + def asJava: TransformerSupplier[K, V, JIterable[VO]] = () => { + val innerTransformer = supplier.get() + new Transformer[K, V, JIterable[VO]] { + override def transform(key: K, value: V): JIterable[VO] = innerTransformer.transform(key, value).asJava + override def init(context: ProcessorContext): Unit = innerTransformer.init(context) + override def close(): Unit = innerTransformer.close() + } + } + } + + @deprecated( + since = "4.0.0" + ) + implicit class ValueTransformerSupplierAsJava[V, VO](val supplier: ValueTransformerSupplier[V, Iterable[VO]]) + extends AnyVal { + def asJava: ValueTransformerSupplier[V, JIterable[VO]] = () => { + val innerTransformer = supplier.get() + new ValueTransformer[V, JIterable[VO]] { + override def transform(value: V): JIterable[VO] = innerTransformer.transform(value).asJava + override def init(context: ProcessorContext): Unit = innerTransformer.init(context) + override def close(): Unit = innerTransformer.close() + } + } + } + + @deprecated( + since = "4.0.0" + ) + implicit class ValueTransformerSupplierWithKeyAsJava[K, V, VO]( + val supplier: ValueTransformerWithKeySupplier[K, V, Iterable[VO]] + ) extends AnyVal { + def asJava: ValueTransformerWithKeySupplier[K, V, JIterable[VO]] = () => { + val innerTransformer = supplier.get() + new ValueTransformerWithKey[K, V, JIterable[VO]] { + override def transform(key: K, value: V): JIterable[VO] = innerTransformer.transform(key, value).asJava + override def init(context: ProcessorContext): Unit = innerTransformer.init(context) + override def close(): Unit = innerTransformer.close() + } + } + } +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/ImplicitConversions.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/ImplicitConversions.scala new file mode 100644 index 0000000000..626038a315 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/ImplicitConversions.scala @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.KeyValue +import org.apache.kafka.streams.kstream.{ + CogroupedKStream => CogroupedKStreamJ, + KGroupedStream => KGroupedStreamJ, + KGroupedTable => KGroupedTableJ, + KStream => KStreamJ, + KTable => KTableJ, + SessionWindowedCogroupedKStream => SessionWindowedCogroupedKStreamJ, + SessionWindowedKStream => SessionWindowedKStreamJ, + TimeWindowedCogroupedKStream => TimeWindowedCogroupedKStreamJ, + TimeWindowedKStream => TimeWindowedKStreamJ +} +import org.apache.kafka.streams.processor.StateStore +import org.apache.kafka.streams.scala.kstream._ + +/** + * Implicit conversions between the Scala wrapper objects and the underlying Java objects. + */ +object ImplicitConversions { + + implicit def wrapKStream[K, V](inner: KStreamJ[K, V]): KStream[K, V] = + new KStream[K, V](inner) + + implicit def wrapKGroupedStream[K, V](inner: KGroupedStreamJ[K, V]): KGroupedStream[K, V] = + new KGroupedStream[K, V](inner) + + implicit def wrapTimeWindowedKStream[K, V](inner: TimeWindowedKStreamJ[K, V]): TimeWindowedKStream[K, V] = + new TimeWindowedKStream[K, V](inner) + + implicit def wrapSessionWindowedKStream[K, V](inner: SessionWindowedKStreamJ[K, V]): SessionWindowedKStream[K, V] = + new SessionWindowedKStream[K, V](inner) + + implicit def wrapCogroupedKStream[K, V](inner: CogroupedKStreamJ[K, V]): CogroupedKStream[K, V] = + new CogroupedKStream[K, V](inner) + + implicit def wrapTimeWindowedCogroupedKStream[K, V]( + inner: TimeWindowedCogroupedKStreamJ[K, V] + ): TimeWindowedCogroupedKStream[K, V] = + new TimeWindowedCogroupedKStream[K, V](inner) + + implicit def wrapSessionWindowedCogroupedKStream[K, V]( + inner: SessionWindowedCogroupedKStreamJ[K, V] + ): SessionWindowedCogroupedKStream[K, V] = + new SessionWindowedCogroupedKStream[K, V](inner) + + implicit def wrapKTable[K, V](inner: KTableJ[K, V]): KTable[K, V] = + new KTable[K, V](inner) + + implicit def wrapKGroupedTable[K, V](inner: KGroupedTableJ[K, V]): KGroupedTable[K, V] = + new KGroupedTable[K, V](inner) + + implicit def tuple2ToKeyValue[K, V](tuple: (K, V)): KeyValue[K, V] = new KeyValue(tuple._1, tuple._2) + + // we would also like to allow users implicit serdes + // and these implicits will convert them to `Grouped`, `Produced` or `Consumed` + + implicit def consumedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Consumed[K, V] = + Consumed.`with`[K, V] + + implicit def groupedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Grouped[K, V] = + Grouped.`with`[K, V] + + implicit def joinedFromKeyValueOtherSerde[K, V, VO](implicit + keySerde: Serde[K], + valueSerde: Serde[V], + otherValueSerde: Serde[VO] + ): Joined[K, V, VO] = + Joined.`with`[K, V, VO] + + implicit def materializedFromSerde[K, V, S <: StateStore](implicit + keySerde: Serde[K], + valueSerde: Serde[V] + ): Materialized[K, V, S] = + Materialized.`with`[K, V, S] + + implicit def producedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Produced[K, V] = + Produced.`with`[K, V] + + implicit def repartitionedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Repartitioned[K, V] = + Repartitioned.`with`[K, V] + + implicit def streamJoinFromKeyValueOtherSerde[K, V, VO](implicit + keySerde: Serde[K], + valueSerde: Serde[V], + otherValueSerde: Serde[VO] + ): StreamJoined[K, V, VO] = + StreamJoined.`with`[K, V, VO] +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/StreamsBuilder.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/StreamsBuilder.scala new file mode 100644 index 0000000000..25f5ce339b --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/StreamsBuilder.scala @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala + +import java.util.Properties +import java.util.regex.Pattern + +import org.apache.kafka.streams.kstream.GlobalKTable +import org.apache.kafka.streams.processor.StateStore +import org.apache.kafka.streams.state.StoreBuilder +import org.apache.kafka.streams.{StreamsBuilder => StreamsBuilderJ, Topology} +import org.apache.kafka.streams.scala.kstream.{Consumed, KStream, KTable, Materialized} + +import scala.jdk.CollectionConverters._ + +/** + * Wraps the Java class StreamsBuilder and delegates method calls to the underlying Java object. + */ +class StreamsBuilder(inner: StreamsBuilderJ = new StreamsBuilderJ) { + + /** + * Create a [[kstream.KStream]] from the specified topic. + *

+ * The `implicit Consumed` instance provides the values of `auto.offset.reset` strategy, `TimestampExtractor`, + * key and value deserializers etc. If the implicit is not found in scope, compiler error will result. + *

+ * A convenient alternative is to have the necessary implicit serdes in scope, which will be implicitly + * converted to generate an instance of `Consumed`. @see [[ImplicitConversions]]. + * {{{ + * // Brings all implicit conversions in scope + * import ImplicitConversions._ + * + * // Bring implicit default serdes in scope + * import Serdes._ + * + * val builder = new StreamsBuilder() + * + * // stream function gets the implicit Consumed which is constructed automatically + * // from the serdes through the implicits in ImplicitConversions#consumedFromSerde + * val userClicksStream: KStream[String, Long] = builder.stream(userClicksTopic) + * }}} + * + * @param topic the topic name + * @return a [[kstream.KStream]] for the specified topic + */ + def stream[K, V](topic: String)(implicit consumed: Consumed[K, V]): KStream[K, V] = + new KStream(inner.stream[K, V](topic, consumed)) + + /** + * Create a [[kstream.KStream]] from the specified topics. + * + * @param topics the topic names + * @return a [[kstream.KStream]] for the specified topics + * @see #stream(String) + * @see `org.apache.kafka.streams.StreamsBuilder#stream` + */ + def stream[K, V](topics: Set[String])(implicit consumed: Consumed[K, V]): KStream[K, V] = + new KStream(inner.stream[K, V](topics.asJava, consumed)) + + /** + * Create a [[kstream.KStream]] from the specified topic pattern. + * + * @param topicPattern the topic name pattern + * @return a [[kstream.KStream]] for the specified topics + * @see #stream(String) + * @see `org.apache.kafka.streams.StreamsBuilder#stream` + */ + def stream[K, V](topicPattern: Pattern)(implicit consumed: Consumed[K, V]): KStream[K, V] = + new KStream(inner.stream[K, V](topicPattern, consumed)) + + /** + * Create a [[kstream.KTable]] from the specified topic. + *

+ * The `implicit Consumed` instance provides the values of `auto.offset.reset` strategy, `TimestampExtractor`, + * key and value deserializers etc. If the implicit is not found in scope, compiler error will result. + *

+ * A convenient alternative is to have the necessary implicit serdes in scope, which will be implicitly + * converted to generate an instance of `Consumed`. @see [[ImplicitConversions]]. + * {{{ + * // Brings all implicit conversions in scope + * import ImplicitConversions._ + * + * // Bring implicit default serdes in scope + * import Serdes._ + * + * val builder = new StreamsBuilder() + * + * // stream function gets the implicit Consumed which is constructed automatically + * // from the serdes through the implicits in ImplicitConversions#consumedFromSerde + * val userClicksStream: KTable[String, Long] = builder.table(userClicksTopic) + * }}} + * + * @param topic the topic name + * @return a [[kstream.KTable]] for the specified topic + * @see `org.apache.kafka.streams.StreamsBuilder#table` + */ + def table[K, V](topic: String)(implicit consumed: Consumed[K, V]): KTable[K, V] = + new KTable(inner.table[K, V](topic, consumed)) + + /** + * Create a [[kstream.KTable]] from the specified topic. + * + * @param topic the topic name + * @param materialized the instance of `Materialized` used to materialize a state store + * @return a [[kstream.KTable]] for the specified topic + * @see #table(String) + * @see `org.apache.kafka.streams.StreamsBuilder#table` + */ + def table[K, V](topic: String, materialized: Materialized[K, V, ByteArrayKeyValueStore])(implicit + consumed: Consumed[K, V] + ): KTable[K, V] = + new KTable(inner.table[K, V](topic, consumed, materialized)) + + /** + * Create a `GlobalKTable` from the specified topic. The serializers from the implicit `Consumed` + * instance will be used. Input records with `null` key will be dropped. + * + * @param topic the topic name + * @return a `GlobalKTable` for the specified topic + * @see `org.apache.kafka.streams.StreamsBuilder#globalTable` + */ + def globalTable[K, V](topic: String)(implicit consumed: Consumed[K, V]): GlobalKTable[K, V] = + inner.globalTable(topic, consumed) + + /** + * Create a `GlobalKTable` from the specified topic. The resulting `GlobalKTable` will be materialized + * in a local `KeyValueStore` configured with the provided instance of `Materialized`. The serializers + * from the implicit `Consumed` instance will be used. + * + * @param topic the topic name + * @param materialized the instance of `Materialized` used to materialize a state store + * @return a `GlobalKTable` for the specified topic + * @see `org.apache.kafka.streams.StreamsBuilder#globalTable` + */ + def globalTable[K, V](topic: String, materialized: Materialized[K, V, ByteArrayKeyValueStore])(implicit + consumed: Consumed[K, V] + ): GlobalKTable[K, V] = + inner.globalTable(topic, consumed, materialized) + + /** + * Adds a state store to the underlying `Topology`. The store must still be "connected" to a `Processor`, + * `Transformer`, or `ValueTransformer` before it can be used. + *

+ * It is required to connect state stores to `Processor`, `Transformer`, or `ValueTransformer` before they can be used. + * + * @param builder the builder used to obtain this state store `StateStore` instance + * @return the underlying Java abstraction `StreamsBuilder` after adding the `StateStore` + * @throws org.apache.kafka.streams.errors.TopologyException if state store supplier is already added + * @see `org.apache.kafka.streams.StreamsBuilder#addStateStore` + */ + def addStateStore(builder: StoreBuilder[_ <: StateStore]): StreamsBuilderJ = inner.addStateStore(builder) + + /** + * Adds a global `StateStore` to the topology. Global stores should not be added to `Processor`, `Transformer`, + * or `ValueTransformer` (in contrast to regular stores). + *

+ * It is not required to connect a global store to `Processor`, `Transformer`, or `ValueTransformer`; + * those have read-only access to all global stores by default. + * + * @see `org.apache.kafka.streams.StreamsBuilder#addGlobalStore` + */ + def addGlobalStore[K, V]( + storeBuilder: StoreBuilder[_ <: StateStore], + topic: String, + consumed: Consumed[K, V], + stateUpdateSupplier: org.apache.kafka.streams.processor.api.ProcessorSupplier[K, V, Void, Void] + ): StreamsBuilderJ = + inner.addGlobalStore(storeBuilder, topic, consumed, stateUpdateSupplier) + + def build(): Topology = inner.build() + + /** + * Returns the `Topology` that represents the specified processing logic and accepts + * a `Properties` instance used to indicate whether to optimize topology or not. + * + * @param props the `Properties` used for building possibly optimized topology + * @return the `Topology` that represents the specified processing logic + * @see `org.apache.kafka.streams.StreamsBuilder#build` + */ + def build(props: Properties): Topology = inner.build(props) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Branched.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Branched.scala new file mode 100644 index 0000000000..63bcf323af --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Branched.scala @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.{Branched => BranchedJ, KStream => KStreamJ} + +object Branched { + + /** + * Create an instance of `Branched` with provided branch name suffix. + * + * @param name the branch name suffix to be used (see [[BranchedKStream]] description for details) + * @tparam K key type + * @tparam V value type + * @return a new instance of `Branched` + */ + def as[K, V](name: String): BranchedJ[K, V] = + BranchedJ.as[K, V](name) + + /** + * Create an instance of `Branched` with provided chain function and branch name suffix. + * + * @param chain A function that will be applied to the branch. If the provided function returns + * `null`, its result is ignored, otherwise it is added to the Map returned + * by [[BranchedKStream.defaultBranch()*]] or [[BranchedKStream.noDefaultBranch]] (see + * [[BranchedKStream]] description for details). + * @param name the branch name suffix to be used. If `null`, a default branch name suffix will be generated + * (see [[BranchedKStream]] description for details) + * @tparam K key type + * @tparam V value type + * @return a new instance of `Branched` + * @see `org.apache.kafka.streams.kstream.Branched#withFunction(java.util.function.Function, java.lang.String)` + */ + def withFunction[K, V](chain: KStream[K, V] => KStream[K, V], name: String = null): BranchedJ[K, V] = + BranchedJ.withFunction((f: KStreamJ[K, V]) => chain.apply(new KStream[K, V](f)).inner, name) + + /** + * Create an instance of `Branched` with provided chain consumer and branch name suffix. + * + * @param chain A consumer to which the branch will be sent. If a non-null consumer is provided here, + * the respective branch will not be added to the resulting Map returned + * by [[BranchedKStream.defaultBranch()*]] or [[BranchedKStream.noDefaultBranch]] (see + * [[BranchedKStream]] description for details). + * @param name the branch name suffix to be used. If `null`, a default branch name suffix will be generated + * (see [[BranchedKStream]] description for details) + * @tparam K key type + * @tparam V value type + * @return a new instance of `Branched` + * @see `org.apache.kafka.streams.kstream.Branched#withConsumer(java.util.function.Consumer, java.lang.String)` + */ + def withConsumer[K, V](chain: KStream[K, V] => Unit, name: String = null): BranchedJ[K, V] = + BranchedJ.withConsumer((c: KStreamJ[K, V]) => chain.apply(new KStream[K, V](c)), name) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/BranchedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/BranchedKStream.scala new file mode 100644 index 0000000000..196198f6e1 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/BranchedKStream.scala @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import java.util + +import org.apache.kafka.streams.kstream +import org.apache.kafka.streams.kstream.{BranchedKStream => BranchedKStreamJ} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.PredicateFromFunction + +import scala.jdk.CollectionConverters._ + +/** + * Branches the records in the original stream based on the predicates supplied for the branch definitions. + *

+ * Branches are defined with [[branch]] or [[defaultBranch()*]] + * methods. Each record is evaluated against the predicates + * supplied via [[Branched]] parameters, and is routed to the first branch for which its respective predicate + * evaluates to `true`. If a record does not match any predicates, it will be routed to the default branch, + * or dropped if no default branch is created. + *

+ * + * Each branch (which is a [[KStream]] instance) then can be processed either by + * a function or a consumer provided via a [[Branched]] + * parameter. If certain conditions are met, it also can be accessed from the `Map` returned by + * an optional [[defaultBranch()*]] or [[noDefaultBranch]] method call. + *

+ * The branching happens on a first match basis: A record in the original stream is assigned to the corresponding result + * stream for the first predicate that evaluates to true, and is assigned to this stream only. If you need + * to route a record to multiple streams, you can apply multiple + * [[KStream.filter]] operators to the same [[KStream]] + * instance, one for each predicate, instead of branching. + *

+ * The process of routing the records to different branches is a stateless record-by-record operation. + * + * @tparam K Type of keys + * @tparam V Type of values + */ +class BranchedKStream[K, V](val inner: BranchedKStreamJ[K, V]) { + + /** + * Define a branch for records that match the predicate. + * + * @param predicate A predicate against which each record will be evaluated. + * If this predicate returns `true` for a given record, the record will be + * routed to the current branch and will not be evaluated against the predicates + * for the remaining branches. + * @return `this` to facilitate method chaining + */ + def branch(predicate: (K, V) => Boolean): BranchedKStream[K, V] = { + inner.branch(predicate.asPredicate) + this + } + + /** + * Define a branch for records that match the predicate. + * + * @param predicate A predicate against which each record will be evaluated. + * If this predicate returns `true` for a given record, the record will be + * routed to the current branch and will not be evaluated against the predicates + * for the remaining branches. + * @param branched A [[Branched]] parameter, that allows to define a branch name, an in-place + * branch consumer or branch mapper (see code examples + * for [[BranchedKStream]]) + * @return `this` to facilitate method chaining + */ + def branch(predicate: (K, V) => Boolean, branched: Branched[K, V]): BranchedKStream[K, V] = { + inner.branch(predicate.asPredicate, branched) + this + } + + /** + * Finalize the construction of branches and defines the default branch for the messages not intercepted + * by other branches. Calling [[defaultBranch()*]] or [[noDefaultBranch]] is optional. + * + * @return Map of named branches. For rules of forming the resulting map, see [[BranchedKStream]] + * description. + */ + def defaultBranch(): Map[String, KStream[K, V]] = toScalaMap(inner.defaultBranch()) + + /** + * Finalize the construction of branches and defines the default branch for the messages not intercepted + * by other branches. Calling [[defaultBranch()*]] or [[noDefaultBranch]] is optional. + * + * @param branched A [[Branched]] parameter, that allows to define a branch name, an in-place + * branch consumer or branch mapper for [[BranchedKStream]]. + * @return Map of named branches. For rules of forming the resulting map, see [[BranchedKStream]] + * description. + */ + def defaultBranch(branched: Branched[K, V]): Map[String, KStream[K, V]] = toScalaMap(inner.defaultBranch(branched)) + + /** + * Finalizes the construction of branches without forming a default branch. + * + * @return Map of named branches. For rules of forming the resulting map, see [[BranchedKStream]] + * description. + */ + def noDefaultBranch(): Map[String, KStream[K, V]] = toScalaMap(inner.noDefaultBranch()) + + private def toScalaMap(m: util.Map[String, kstream.KStream[K, V]]): collection.immutable.Map[String, KStream[K, V]] = + m.asScala.map { case (name, kStreamJ) => + (name, new KStream(kStreamJ)) + }.toMap +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/CogroupedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/CogroupedKStream.scala new file mode 100644 index 0000000000..f8f33e7b4e --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/CogroupedKStream.scala @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.{ + CogroupedKStream => CogroupedKStreamJ, + SessionWindows, + SlidingWindows, + Window, + Windows +} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{AggregatorFromFunction, InitializerFromFunction} + +/** + * Wraps the Java class CogroupedKStream and delegates method calls to the underlying Java object. + * + * @tparam KIn Type of keys + * @tparam VOut Type of values + * @param inner The underlying Java abstraction for CogroupedKStream + * @see `org.apache.kafka.streams.kstream.CogroupedKStream` + */ +class CogroupedKStream[KIn, VOut](val inner: CogroupedKStreamJ[KIn, VOut]) { + + /** + * Add an already [[KGroupedStream]] to this [[CogroupedKStream]]. + * + * @param groupedStream a group stream + * @param aggregator a function that computes a new aggregate result + * @return a [[CogroupedKStream]] + */ + def cogroup[VIn]( + groupedStream: KGroupedStream[KIn, VIn], + aggregator: (KIn, VIn, VOut) => VOut + ): CogroupedKStream[KIn, VOut] = + new CogroupedKStream(inner.cogroup(groupedStream.inner, aggregator.asAggregator)) + + /** + * Aggregate the values of records in these streams by the grouped key and defined window. + * + * @param initializer an `Initializer` that computes an initial intermediate aggregation result. + * Cannot be { @code null}. + * @param materialized an instance of `Materialized` used to materialize a state store. + * Cannot be { @code null}. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the latest + * (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.CogroupedKStream#aggregate` + */ + def aggregate(initializer: => VOut)(implicit + materialized: Materialized[KIn, VOut, ByteArrayKeyValueStore] + ): KTable[KIn, VOut] = new KTable(inner.aggregate((() => initializer).asInitializer, materialized)) + + /** + * Aggregate the values of records in these streams by the grouped key and defined window. + * + * @param initializer an `Initializer` that computes an initial intermediate aggregation result. + * Cannot be { @code null}. + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * Cannot be { @code null}. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the latest + * (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.CogroupedKStream#aggregate` + */ + def aggregate(initializer: => VOut, named: Named)(implicit + materialized: Materialized[KIn, VOut, ByteArrayKeyValueStore] + ): KTable[KIn, VOut] = new KTable(inner.aggregate((() => initializer).asInitializer, named, materialized)) + + /** + * Create a new [[TimeWindowedCogroupedKStream]] instance that can be used to perform windowed aggregations. + * + * @param windows the specification of the aggregation `Windows` + * @return an instance of [[TimeWindowedCogroupedKStream]] + * @see `org.apache.kafka.streams.kstream.CogroupedKStream#windowedBy` + */ + def windowedBy[W <: Window](windows: Windows[W]): TimeWindowedCogroupedKStream[KIn, VOut] = + new TimeWindowedCogroupedKStream(inner.windowedBy(windows)) + + /** + * Create a new [[TimeWindowedCogroupedKStream]] instance that can be used to perform sliding windowed aggregations. + * + * @param windows the specification of the aggregation `SlidingWindows` + * @return an instance of [[TimeWindowedCogroupedKStream]] + * @see `org.apache.kafka.streams.kstream.CogroupedKStream#windowedBy` + */ + def windowedBy(windows: SlidingWindows): TimeWindowedCogroupedKStream[KIn, VOut] = + new TimeWindowedCogroupedKStream(inner.windowedBy(windows)) + + /** + * Create a new [[SessionWindowedKStream]] instance that can be used to perform session windowed aggregations. + * + * @param windows the specification of the aggregation `SessionWindows` + * @return an instance of [[SessionWindowedKStream]] + * @see `org.apache.kafka.streams.kstream.KGroupedStream#windowedBy` + */ + def windowedBy(windows: SessionWindows): SessionWindowedCogroupedKStream[KIn, VOut] = + new SessionWindowedCogroupedKStream(inner.windowedBy(windows)) + +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Consumed.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Consumed.scala new file mode 100644 index 0000000000..89f461a8fe --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Consumed.scala @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.kstream.{Consumed => ConsumedJ} +import org.apache.kafka.streams.{AutoOffsetReset, Topology} +import org.apache.kafka.streams.processor.TimestampExtractor + +object Consumed { + + /** + * Create an instance of [[Consumed]] with the supplied arguments. `null` values are acceptable. + * + * @tparam K key type + * @tparam V value type + * @param timestampExtractor the timestamp extractor to used. If `null` the default timestamp extractor from + * config will be used + * @param resetPolicy the offset reset policy to be used. If `null` the default reset policy from config + * will be used + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @return a new instance of [[Consumed]] + */ + @deprecated("Use `with` method that accepts `AutoOffsetReset` instead", "4.0.0") + def `with`[K, V]( + timestampExtractor: TimestampExtractor, + resetPolicy: Topology.AutoOffsetReset + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = + ConsumedJ.`with`(keySerde, valueSerde, timestampExtractor, resetPolicy) + + /** + * Create an instance of [[Consumed]] with the supplied arguments. `null` values are acceptable. + * + * @tparam K key type + * @tparam V value type + * @param timestampExtractor the timestamp extractor to used. If `null` the default timestamp extractor from + * config will be used + * @param resetPolicy the offset reset policy to be used. If `null` the default reset policy from config + * will be used + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @return a new instance of [[Consumed]] + */ + def `with`[K, V]( + timestampExtractor: TimestampExtractor, + resetPolicy: AutoOffsetReset + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = + ConsumedJ.`with`(keySerde, valueSerde, timestampExtractor, resetPolicy) + + /** + * Create an instance of [[Consumed]] with key and value Serdes. + * + * @tparam K key type + * @tparam V value type + * @return a new instance of [[Consumed]] + */ + def `with`[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = + ConsumedJ.`with`(keySerde, valueSerde) + + /** + * Create an instance of [[Consumed]] with a `org.apache.kafka.streams.processor.TimestampExtractor`. + * + * @param timestampExtractor the timestamp extractor to used. If `null` the default timestamp extractor from + * config will be used + * @tparam K key type + * @tparam V value type + * @return a new instance of [[Consumed]] + */ + def `with`[K, V]( + timestampExtractor: TimestampExtractor + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = + ConsumedJ.`with`(timestampExtractor).withKeySerde(keySerde).withValueSerde(valueSerde) + + /** + * Create an instance of [[Consumed]] with a `org.apache.kafka.streams.Topology.AutoOffsetReset`. + * + * @tparam K key type + * @tparam V value type + * @param resetPolicy the offset reset policy to be used. If `null` the default reset policy from config will be used + * @return a new instance of [[Consumed]] + */ + @deprecated("Use `with` method that accepts `AutoOffsetReset` instead", "4.0.0") + def `with`[K, V]( + resetPolicy: Topology.AutoOffsetReset + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = + ConsumedJ.`with`(resetPolicy).withKeySerde(keySerde).withValueSerde(valueSerde) + + /** + * Create an instance of [[Consumed]] with a `org.apache.kafka.streams.AutoOffsetReset`. + * + * @tparam K key type + * @tparam V value type + * @param resetPolicy the offset reset policy to be used. If `null` the default reset policy from config will be used + * @return a new instance of [[Consumed]] + */ + def `with`[K, V]( + resetPolicy: AutoOffsetReset + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = + ConsumedJ.`with`(resetPolicy).withKeySerde(keySerde).withValueSerde(valueSerde) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Grouped.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Grouped.scala new file mode 100644 index 0000000000..de1aa4e983 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Grouped.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.kstream.{Grouped => GroupedJ} + +object Grouped { + + /** + * Construct a `Grouped` instance with the provided key and value Serdes. + * If the Serde params are `null` the default serdes defined in the configs will be used. + * + * @tparam K the key type + * @tparam V the value type + * @param keySerde keySerde that will be used to materialize a stream + * @param valueSerde valueSerde that will be used to materialize a stream + * @return a new instance of [[Grouped]] configured with the provided serdes + */ + def `with`[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): GroupedJ[K, V] = + GroupedJ.`with`(keySerde, valueSerde) + + /** + * Construct a `Grouped` instance with the provided key and value Serdes. + * If the Serde params are `null` the default serdes defined in the configs will be used. + * + * @tparam K the key type + * @tparam V the value type + * @param name the name used as part of a potential repartition topic + * @param keySerde keySerde that will be used to materialize a stream + * @param valueSerde valueSerde that will be used to materialize a stream + * @return a new instance of [[Grouped]] configured with the provided serdes + */ + def `with`[K, V](name: String)(implicit keySerde: Serde[K], valueSerde: Serde[V]): GroupedJ[K, V] = + GroupedJ.`with`(name, keySerde, valueSerde) + +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Joined.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Joined.scala new file mode 100644 index 0000000000..6233ad15f4 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Joined.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.kstream.{Joined => JoinedJ} + +object Joined { + + /** + * Create an instance of `org.apache.kafka.streams.kstream.Joined` with key, value, and otherValue Serde + * instances. + * `null` values are accepted and will be replaced by the default serdes as defined in config. + * + * @tparam K key type + * @tparam V value type + * @tparam VO other value type + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used + * @return new `org.apache.kafka.streams.kstream.Joined` instance with the provided serdes + */ + def `with`[K, V, VO](implicit + keySerde: Serde[K], + valueSerde: Serde[V], + otherValueSerde: Serde[VO] + ): JoinedJ[K, V, VO] = + JoinedJ.`with`(keySerde, valueSerde, otherValueSerde) + + /** + * Create an instance of `org.apache.kafka.streams.kstream.Joined` with key, value, and otherValue Serde + * instances. + * `null` values are accepted and will be replaced by the default serdes as defined in config. + * + * @tparam K key type + * @tparam V value type + * @tparam VO other value type + * @param name name of possible repartition topic + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used + * @return new `org.apache.kafka.streams.kstream.Joined` instance with the provided serdes + */ + // disable spotless scala, which wants to make a mess of the argument lists + // format: off + def `with`[K, V, VO](name: String) + (implicit keySerde: Serde[K], + valueSerde: Serde[V], + otherValueSerde: Serde[VO]): JoinedJ[K, V, VO] = + JoinedJ.`with`(keySerde, valueSerde, otherValueSerde, name) + // format:on +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedStream.scala new file mode 100644 index 0000000000..01e7c1c520 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedStream.scala @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.internals.KTableImpl +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.kstream.{ + KGroupedStream => KGroupedStreamJ, + KTable => KTableJ, + SessionWindows, + SlidingWindows, + Window, + Windows +} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ + AggregatorFromFunction, + InitializerFromFunction, + ReducerFromFunction, + ValueMapperFromFunction +} + +/** + * Wraps the Java class KGroupedStream and delegates method calls to the underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for KGroupedStream + * @see `org.apache.kafka.streams.kstream.KGroupedStream` + */ +class KGroupedStream[K, V](val inner: KGroupedStreamJ[K, V]) { + + /** + * Count the number of records in this stream by the grouped key. + * The result is written into a local `KeyValueStore` (which is basically an ever-updating materialized view) + * provided by the given `materialized`. + * + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that + * represent the latest (rolling) count (i.e., number of records) for each key + * @see `org.apache.kafka.streams.kstream.KGroupedStream#count` + */ + def count()(implicit materialized: Materialized[K, Long, ByteArrayKeyValueStore]): KTable[K, Long] = { + val javaCountTable: KTableJ[K, java.lang.Long] = + inner.count(materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayKeyValueStore]]) + val tableImpl = javaCountTable.asInstanceOf[KTableImpl[K, ByteArrayKeyValueStore, java.lang.Long]] + new KTable( + javaCountTable.mapValues[Long]( + ((l: java.lang.Long) => Long2long(l)).asValueMapper, + Materialized.`with`[K, Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) + ) + ) + } + + /** + * Count the number of records in this stream by the grouped key. + * The result is written into a local `KeyValueStore` (which is basically an ever-updating materialized view) + * provided by the given `materialized`. + * + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that + * represent the latest (rolling) count (i.e., number of records) for each key + * @see `org.apache.kafka.streams.kstream.KGroupedStream#count` + */ + def count(named: Named)(implicit materialized: Materialized[K, Long, ByteArrayKeyValueStore]): KTable[K, Long] = { + val javaCountTable: KTableJ[K, java.lang.Long] = + inner.count(named, materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayKeyValueStore]]) + val tableImpl = javaCountTable.asInstanceOf[KTableImpl[K, ByteArrayKeyValueStore, java.lang.Long]] + new KTable( + javaCountTable.mapValues[Long]( + ((l: java.lang.Long) => Long2long(l)).asValueMapper, + Materialized.`with`[K, Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) + ) + ) + } + + /** + * Combine the values of records in this stream by the grouped key. + * + * @param reducer a function `(V, V) => V` that computes a new aggregate result. + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedStream#reduce` + */ + def reduce(reducer: (V, V) => V)(implicit materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = + new KTable(inner.reduce(reducer.asReducer, materialized)) + + /** + * Combine the values of records in this stream by the grouped key. + * + * @param reducer a function `(V, V) => V` that computes a new aggregate result. + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedStream#reduce` + */ + def reduce(reducer: (V, V) => V, named: Named)(implicit + materialized: Materialized[K, V, ByteArrayKeyValueStore] + ): KTable[K, V] = + new KTable(inner.reduce(reducer.asReducer, materialized)) + + /** + * Aggregate the values of records in this stream by the grouped key. + * + * @param initializer an `Initializer` that computes an initial intermediate aggregation result + * @param aggregator an `Aggregator` that computes a new aggregate result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedStream#aggregate` + */ + def aggregate[VR](initializer: => VR)(aggregator: (K, V, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArrayKeyValueStore] + ): KTable[K, VR] = + new KTable(inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, materialized)) + + /** + * Aggregate the values of records in this stream by the grouped key. + * + * @param initializer an `Initializer` that computes an initial intermediate aggregation result + * @param aggregator an `Aggregator` that computes a new aggregate result + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedStream#aggregate` + */ + def aggregate[VR](initializer: => VR, named: Named)(aggregator: (K, V, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArrayKeyValueStore] + ): KTable[K, VR] = + new KTable(inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, named, materialized)) + + /** + * Create a new [[TimeWindowedKStream]] instance that can be used to perform windowed aggregations. + * + * @param windows the specification of the aggregation `Windows` + * @return an instance of [[TimeWindowedKStream]] + * @see `org.apache.kafka.streams.kstream.KGroupedStream#windowedBy` + */ + def windowedBy[W <: Window](windows: Windows[W]): TimeWindowedKStream[K, V] = + new TimeWindowedKStream(inner.windowedBy(windows)) + + /** + * Create a new [[TimeWindowedKStream]] instance that can be used to perform sliding windowed aggregations. + * + * @param windows the specification of the aggregation `SlidingWindows` + * @return an instance of [[TimeWindowedKStream]] + * @see `org.apache.kafka.streams.kstream.KGroupedStream#windowedBy` + */ + def windowedBy(windows: SlidingWindows): TimeWindowedKStream[K, V] = + new TimeWindowedKStream(inner.windowedBy(windows)) + + /** + * Create a new [[SessionWindowedKStream]] instance that can be used to perform session windowed aggregations. + * + * @param windows the specification of the aggregation `SessionWindows` + * @return an instance of [[SessionWindowedKStream]] + * @see `org.apache.kafka.streams.kstream.KGroupedStream#windowedBy` + */ + def windowedBy(windows: SessionWindows): SessionWindowedKStream[K, V] = + new SessionWindowedKStream(inner.windowedBy(windows)) + + /** + * Create a new [[CogroupedKStream]] from this grouped KStream to allow cogrouping other [[KGroupedStream]] to it. + * + * @param aggregator an `Aggregator` that computes a new aggregate result + * @return an instance of [[CogroupedKStream]] + * @see `org.apache.kafka.streams.kstream.KGroupedStream#cogroup` + */ + def cogroup[VR](aggregator: (K, V, VR) => VR): CogroupedKStream[K, VR] = + new CogroupedKStream(inner.cogroup(aggregator.asAggregator)) + +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedTable.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedTable.scala new file mode 100644 index 0000000000..3d9e052a2f --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedTable.scala @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.{KGroupedTable => KGroupedTableJ} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ + AggregatorFromFunction, + InitializerFromFunction, + ReducerFromFunction +} + +/** + * Wraps the Java class KGroupedTable and delegates method calls to the underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for KGroupedTable + * @see `org.apache.kafka.streams.kstream.KGroupedTable` + */ +class KGroupedTable[K, V](inner: KGroupedTableJ[K, V]) { + + /** + * Count number of records of the original [[KTable]] that got [[KTable#groupBy]] to + * the same key into a new instance of [[KTable]]. + * + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that + * represent the latest (rolling) count (i.e., number of records) for each key + * @see `org.apache.kafka.streams.kstream.KGroupedTable#count` + */ + def count()(implicit materialized: Materialized[K, Long, ByteArrayKeyValueStore]): KTable[K, Long] = { + val c: KTable[K, java.lang.Long] = + new KTable(inner.count(materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayKeyValueStore]])) + c.mapValues[Long](Long2long _) + } + + /** + * Count number of records of the original [[KTable]] that got [[KTable#groupBy]] to + * the same key into a new instance of [[KTable]]. + * + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that + * represent the latest (rolling) count (i.e., number of records) for each key + * @see `org.apache.kafka.streams.kstream.KGroupedTable#count` + */ + def count(named: Named)(implicit materialized: Materialized[K, Long, ByteArrayKeyValueStore]): KTable[K, Long] = { + val c: KTable[K, java.lang.Long] = + new KTable(inner.count(named, materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayKeyValueStore]])) + c.mapValues[Long](Long2long _) + } + + /** + * Combine the value of records of the original [[KTable]] that got [[KTable#groupBy]] + * to the same key into a new instance of [[KTable]]. + * + * @param adder a function that adds a new value to the aggregate result + * @param subtractor a function that removed an old value from the aggregate result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedTable#reduce` + */ + def reduce(adder: (V, V) => V, subtractor: (V, V) => V)(implicit + materialized: Materialized[K, V, ByteArrayKeyValueStore] + ): KTable[K, V] = + new KTable(inner.reduce(adder.asReducer, subtractor.asReducer, materialized)) + + /** + * Combine the value of records of the original [[KTable]] that got [[KTable#groupBy]] + * to the same key into a new instance of [[KTable]]. + * + * @param adder a function that adds a new value to the aggregate result + * @param subtractor a function that removed an old value from the aggregate result + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedTable#reduce` + */ + def reduce(adder: (V, V) => V, subtractor: (V, V) => V, named: Named)(implicit + materialized: Materialized[K, V, ByteArrayKeyValueStore] + ): KTable[K, V] = + new KTable(inner.reduce(adder.asReducer, subtractor.asReducer, named, materialized)) + + /** + * Aggregate the value of records of the original [[KTable]] that got [[KTable#groupBy]] + * to the same key into a new instance of [[KTable]] using default serializers and deserializers. + * + * @param initializer a function that provides an initial aggregate result value + * @param adder a function that adds a new record to the aggregate result + * @param subtractor an aggregator function that removed an old record from the aggregate result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedTable#aggregate` + */ + def aggregate[VR](initializer: => VR)(adder: (K, V, VR) => VR, subtractor: (K, V, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArrayKeyValueStore] + ): KTable[K, VR] = + new KTable( + inner.aggregate((() => initializer).asInitializer, adder.asAggregator, subtractor.asAggregator, materialized) + ) + + /** + * Aggregate the value of records of the original [[KTable]] that got [[KTable#groupBy]] + * to the same key into a new instance of [[KTable]] using default serializers and deserializers. + * + * @param initializer a function that provides an initial aggregate result value + * @param named a [[Named]] config used to name the processor in the topology + * @param adder a function that adds a new record to the aggregate result + * @param subtractor an aggregator function that removed an old record from the aggregate result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.KGroupedTable#aggregate` + */ + def aggregate[VR](initializer: => VR, named: Named)(adder: (K, V, VR) => VR, subtractor: (K, V, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArrayKeyValueStore] + ): KTable[K, VR] = + new KTable( + inner.aggregate( + (() => initializer).asInitializer, + adder.asAggregator, + subtractor.asAggregator, + named, + materialized + ) + ) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KStream.scala new file mode 100644 index 0000000000..76918a6f74 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KStream.scala @@ -0,0 +1,877 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.{GlobalKTable, JoinWindows, KStream => KStreamJ, Printed} +import org.apache.kafka.streams.processor.TopicNameExtractor +import org.apache.kafka.streams.processor.api.{FixedKeyProcessorSupplier, ProcessorSupplier} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ + FlatValueMapperFromFunction, + FlatValueMapperWithKeyFromFunction, + ForeachActionFromFunction, + KeyValueMapperFromFunction, + MapperFromFunction, + PredicateFromFunction, + ValueMapperFromFunction, + ValueMapperWithKeyFromFunction +} + +import scala.jdk.CollectionConverters._ + +/** + * Wraps the Java class `org.apache.kafka.streams.kstream.KStream` and delegates method calls to the + * underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for KStream + * @see `org.apache.kafka.streams.kstream.KStream` + */ +//noinspection ScalaDeprecation +class KStream[K, V](val inner: KStreamJ[K, V]) { + + /** + * Create a new [[KStream]] that consists all records of this stream which satisfies the given predicate. + * + * @param predicate a filter that is applied to each record + * @return a [[KStream]] that contains only those records that satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KStream#filter` + */ + def filter(predicate: (K, V) => Boolean): KStream[K, V] = + new KStream(inner.filter(predicate.asPredicate)) + + /** + * Create a new [[KStream]] that consists all records of this stream which satisfies the given predicate. + * + * @param predicate a filter that is applied to each record + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains only those records that satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KStream#filter` + */ + def filter(predicate: (K, V) => Boolean, named: Named): KStream[K, V] = + new KStream(inner.filter(predicate.asPredicate, named)) + + /** + * Create a new [[KStream]] that consists all records of this stream which do not satisfy the given + * predicate. + * + * @param predicate a filter that is applied to each record + * @return a [[KStream]] that contains only those records that do not satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KStream#filterNot` + */ + def filterNot(predicate: (K, V) => Boolean): KStream[K, V] = + new KStream(inner.filterNot(predicate.asPredicate)) + + /** + * Create a new [[KStream]] that consists all records of this stream which do not satisfy the given + * predicate. + * + * @param predicate a filter that is applied to each record + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains only those records that do not satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KStream#filterNot` + */ + def filterNot(predicate: (K, V) => Boolean, named: Named): KStream[K, V] = + new KStream(inner.filterNot(predicate.asPredicate, named)) + + /** + * Set a new key (with possibly new type) for each input record. + *

+ * The function `mapper` passed is applied to every record and results in the generation of a new + * key `KR`. The function outputs a new [[KStream]] where each record has this new key. + * + * @param mapper a function `(K, V) => KR` that computes a new key for each record + * @return a [[KStream]] that contains records with new key (possibly of different type) and unmodified value + * @see `org.apache.kafka.streams.kstream.KStream#selectKey` + */ + def selectKey[KR](mapper: (K, V) => KR): KStream[KR, V] = + new KStream(inner.selectKey[KR](mapper.asKeyValueMapper)) + + /** + * Set a new key (with possibly new type) for each input record. + *

+ * The function `mapper` passed is applied to every record and results in the generation of a new + * key `KR`. The function outputs a new [[KStream]] where each record has this new key. + * + * @param mapper a function `(K, V) => KR` that computes a new key for each record + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains records with new key (possibly of different type) and unmodified value + * @see `org.apache.kafka.streams.kstream.KStream#selectKey` + */ + def selectKey[KR](mapper: (K, V) => KR, named: Named): KStream[KR, V] = + new KStream(inner.selectKey[KR](mapper.asKeyValueMapper, named)) + + /** + * Transform each record of the input stream into a new record in the output stream (both key and value type can be + * altered arbitrarily). + *

+ * The provided `mapper`, a function `(K, V) => (KR, VR)` is applied to each input record and computes a new output record. + * + * @param mapper a function `(K, V) => (KR, VR)` that computes a new output record + * @return a [[KStream]] that contains records with new key and value (possibly both of different type) + * @see `org.apache.kafka.streams.kstream.KStream#map` + */ + def map[KR, VR](mapper: (K, V) => (KR, VR)): KStream[KR, VR] = + new KStream(inner.map[KR, VR](mapper.asKeyValueMapper)) + + /** + * Transform each record of the input stream into a new record in the output stream (both key and value type can be + * altered arbitrarily). + *

+ * The provided `mapper`, a function `(K, V) => (KR, VR)` is applied to each input record and computes a new output record. + * + * @param mapper a function `(K, V) => (KR, VR)` that computes a new output record + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains records with new key and value (possibly both of different type) + * @see `org.apache.kafka.streams.kstream.KStream#map` + */ + def map[KR, VR](mapper: (K, V) => (KR, VR), named: Named): KStream[KR, VR] = + new KStream(inner.map[KR, VR](mapper.asKeyValueMapper, named)) + + /** + * Transform the value of each input record into a new value (with possible new type) of the output record. + *

+ * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `V => VR` that computes a new output value + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#mapValues` + */ + def mapValues[VR](mapper: V => VR): KStream[K, VR] = + new KStream(inner.mapValues[VR](mapper.asValueMapper)) + + /** + * Transform the value of each input record into a new value (with possible new type) of the output record. + *

+ * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `V => VR` that computes a new output value + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#mapValues` + */ + def mapValues[VR](mapper: V => VR, named: Named): KStream[K, VR] = + new KStream(inner.mapValues[VR](mapper.asValueMapper, named)) + + /** + * Transform the value of each input record into a new value (with possible new type) of the output record. + *

+ * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `(K, V) => VR` that computes a new output value + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#mapValues` + */ + def mapValues[VR](mapper: (K, V) => VR): KStream[K, VR] = + new KStream(inner.mapValues[VR](mapper.asValueMapperWithKey)) + + /** + * Transform the value of each input record into a new value (with possible new type) of the output record. + *

+ * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `(K, V) => VR` that computes a new output value + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#mapValues` + */ + def mapValues[VR](mapper: (K, V) => VR, named: Named): KStream[K, VR] = + new KStream(inner.mapValues[VR](mapper.asValueMapperWithKey, named)) + + /** + * Transform each record of the input stream into zero or more records in the output stream (both key and value type + * can be altered arbitrarily). + *

+ * The provided `mapper`, function `(K, V) => Iterable[(KR, VR)]` is applied to each input record and computes zero or more output records. + * + * @param mapper function `(K, V) => Iterable[(KR, VR)]` that computes the new output records + * @return a [[KStream]] that contains more or less records with new key and value (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#flatMap` + */ + def flatMap[KR, VR](mapper: (K, V) => Iterable[(KR, VR)]): KStream[KR, VR] = { + val kvMapper = mapper.tupled.andThen(_.map(ImplicitConversions.tuple2ToKeyValue).asJava) + new KStream(inner.flatMap[KR, VR](((k: K, v: V) => kvMapper(k, v)).asKeyValueMapper)) + } + + /** + * Transform each record of the input stream into zero or more records in the output stream (both key and value type + * can be altered arbitrarily). + *

+ * The provided `mapper`, function `(K, V) => Iterable[(KR, VR)]` is applied to each input record and computes zero or more output records. + * + * @param mapper function `(K, V) => Iterable[(KR, VR)]` that computes the new output records + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains more or less records with new key and value (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#flatMap` + */ + def flatMap[KR, VR](mapper: (K, V) => Iterable[(KR, VR)], named: Named): KStream[KR, VR] = { + val kvMapper = mapper.tupled.andThen(_.map(ImplicitConversions.tuple2ToKeyValue).asJava) + new KStream(inner.flatMap[KR, VR](((k: K, v: V) => kvMapper(k, v)).asKeyValueMapper, named)) + } + + /** + * Create a new [[KStream]] by transforming the value of each record in this stream into zero or more values + * with the same key in the new stream. + *

+ * Transform the value of each input record into zero or more records with the same (unmodified) key in the output + * stream (value type can be altered arbitrarily). + * The provided `mapper`, a function `V => Iterable[VR]` is applied to each input record and computes zero or more output values. + * + * @param mapper a function `V => Iterable[VR]` that computes the new output values + * @return a [[KStream]] that contains more or less records with unmodified keys and new values of different type + * @see `org.apache.kafka.streams.kstream.KStream#flatMapValues` + */ + def flatMapValues[VR](mapper: V => Iterable[VR]): KStream[K, VR] = + new KStream(inner.flatMapValues[VR](mapper.asValueMapper)) + + /** + * Create a new [[KStream]] by transforming the value of each record in this stream into zero or more values + * with the same key in the new stream. + *

+ * Transform the value of each input record into zero or more records with the same (unmodified) key in the output + * stream (value type can be altered arbitrarily). + * The provided `mapper`, a function `V => Iterable[VR]` is applied to each input record and computes zero or more output values. + * + * @param mapper a function `V => Iterable[VR]` that computes the new output values + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains more or less records with unmodified keys and new values of different type + * @see `org.apache.kafka.streams.kstream.KStream#flatMapValues` + */ + def flatMapValues[VR](mapper: V => Iterable[VR], named: Named): KStream[K, VR] = + new KStream(inner.flatMapValues[VR](mapper.asValueMapper, named)) + + /** + * Create a new [[KStream]] by transforming the value of each record in this stream into zero or more values + * with the same key in the new stream. + *

+ * Transform the value of each input record into zero or more records with the same (unmodified) key in the output + * stream (value type can be altered arbitrarily). + * The provided `mapper`, a function `(K, V) => Iterable[VR]` is applied to each input record and computes zero or more output values. + * + * @param mapper a function `(K, V) => Iterable[VR]` that computes the new output values + * @return a [[KStream]] that contains more or less records with unmodified keys and new values of different type + * @see `org.apache.kafka.streams.kstream.KStream#flatMapValues` + */ + def flatMapValues[VR](mapper: (K, V) => Iterable[VR]): KStream[K, VR] = + new KStream(inner.flatMapValues[VR](mapper.asValueMapperWithKey)) + + /** + * Create a new [[KStream]] by transforming the value of each record in this stream into zero or more values + * with the same key in the new stream. + *

+ * Transform the value of each input record into zero or more records with the same (unmodified) key in the output + * stream (value type can be altered arbitrarily). + * The provided `mapper`, a function `(K, V) => Iterable[VR]` is applied to each input record and computes zero or more output values. + * + * @param mapper a function `(K, V) => Iterable[VR]` that computes the new output values + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains more or less records with unmodified keys and new values of different type + * @see `org.apache.kafka.streams.kstream.KStream#flatMapValues` + */ + def flatMapValues[VR](mapper: (K, V) => Iterable[VR], named: Named): KStream[K, VR] = + new KStream(inner.flatMapValues[VR](mapper.asValueMapperWithKey, named)) + + /** + * Print the records of this KStream using the options provided by `Printed` + * + * @param printed options for printing + * @see `org.apache.kafka.streams.kstream.KStream#print` + */ + def print(printed: Printed[K, V]): Unit = inner.print(printed) + + /** + * Perform an action on each record of `KStream` + * + * @param action an action to perform on each record + * @see `org.apache.kafka.streams.kstream.KStream#foreach` + */ + def foreach(action: (K, V) => Unit): Unit = + inner.foreach(action.asForeachAction) + + /** + * Perform an action on each record of `KStream` + * + * @param action an action to perform on each record + * @param named a [[Named]] config used to name the processor in the topology + * @see `org.apache.kafka.streams.kstream.KStream#foreach` + */ + def foreach(action: (K, V) => Unit, named: Named): Unit = + inner.foreach(action.asForeachAction, named) + + /** + * Split this stream. [[BranchedKStream]] can be used for routing the records to different branches depending + * on evaluation against the supplied predicates. + * Stream branching is a stateless record-by-record operation. + * + * @return [[BranchedKStream]] that provides methods for routing the records to different branches. + * @see `org.apache.kafka.streams.kstream.KStream#split` + */ + def split(): BranchedKStream[K, V] = + new BranchedKStream(inner.split()) + + /** + * Split this stream. [[BranchedKStream]] can be used for routing the records to different branches depending + * on evaluation against the supplied predicates. + * Stream branching is a stateless record-by-record operation. + * + * @param named a [[Named]] config used to name the processor in the topology and also to set the name prefix + * for the resulting branches (see [[BranchedKStream]]) + * @return [[BranchedKStream]] that provides methods for routing the records to different branches. + * @see `org.apache.kafka.streams.kstream.KStream#split` + */ + def split(named: Named): BranchedKStream[K, V] = + new BranchedKStream(inner.split(named)) + + /** + * Materialize this stream to a topic and creates a new [[KStream]] from the topic using the `Repartitioned` instance + * for configuration of the `Serde key serde`, `Serde value serde`, `StreamPartitioner`, number of partitions, and + * topic name part. + *

+ * The created topic is considered as an internal topic and is meant to be used only by the current Kafka Streams instance. + * Similar to auto-repartitioning, the topic will be created with infinite retention time and data will be automatically purged by Kafka Streams. + * The topic will be named as "\${applicationId}-<name>-repartition", where "applicationId" is user-specified in + * `StreamsConfig` via parameter `APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG`, + * "<name>" is either provided via `Repartitioned#as(String)` or an internally + * generated name, and "-repartition" is a fixed suffix. + *

+ * The user can either supply the `Repartitioned` instance as an implicit in scope or they can also provide implicit + * key and value serdes that will be converted to a `Repartitioned` instance implicitly. + *

+ * {{{ + * Example: + * + * // brings implicit serdes in scope + * import Serdes._ + * + * //.. + * val clicksPerRegion: KStream[String, Long] = //.. + * + * // Implicit serdes in scope will generate an implicit Produced instance, which + * // will be passed automatically to the call of through below + * clicksPerRegion.repartition + * + * // Similarly you can create an implicit Repartitioned and it will be passed implicitly + * // to the repartition call + * }}} + * + * @param repartitioned the `Repartitioned` instance used to specify `Serdes`, `StreamPartitioner` which determines + * how records are distributed among partitions of the topic, + * part of the topic name, and number of partitions for a repartition topic. + * @return a [[KStream]] that contains the exact same repartitioned records as this [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#repartition` + */ + def repartition(implicit repartitioned: Repartitioned[K, V]): KStream[K, V] = + new KStream(inner.repartition(repartitioned)) + + /** + * Materialize this stream to a topic using the `Produced` instance for + * configuration of the `Serde key serde`, `Serde value serde`, and `StreamPartitioner` + *

+ * The user can either supply the `Produced` instance as an implicit in scope or they can also provide implicit + * key and value serdes that will be converted to a `Produced` instance implicitly. + *

+ * {{{ + * Example: + * + * // brings implicit serdes in scope + * import Serdes._ + * + * //.. + * val clicksPerRegion: KTable[String, Long] = //.. + * + * // Implicit serdes in scope will generate an implicit Produced instance, which + * // will be passed automatically to the call of through below + * clicksPerRegion.to(topic) + * + * // Similarly you can create an implicit Produced and it will be passed implicitly + * // to the through call + * }}} + * + * @param topic the topic name + * @param produced the instance of Produced that gives the serdes and `StreamPartitioner` + * @see `org.apache.kafka.streams.kstream.KStream#to` + */ + def to(topic: String)(implicit produced: Produced[K, V]): Unit = + inner.to(topic, produced) + + /** + * Dynamically materialize this stream to topics using the `Produced` instance for + * configuration of the `Serde key serde`, `Serde value serde`, and `StreamPartitioner`. + * The topic names for each record to send to is dynamically determined based on the given mapper. + *

+ * The user can either supply the `Produced` instance as an implicit in scope or they can also provide implicit + * key and value serdes that will be converted to a `Produced` instance implicitly. + *

+ * {{{ + * Example: + * + * // brings implicit serdes in scope + * import Serdes._ + * + * //.. + * val clicksPerRegion: KTable[String, Long] = //.. + * + * // Implicit serdes in scope will generate an implicit Produced instance, which + * // will be passed automatically to the call of through below + * clicksPerRegion.to(topicChooser) + * + * // Similarly you can create an implicit Produced and it will be passed implicitly + * // to the through call + * }}} + * + * @param extractor the extractor to determine the name of the Kafka topic to write to for reach record + * @param produced the instance of Produced that gives the serdes and `StreamPartitioner` + * @see `org.apache.kafka.streams.kstream.KStream#to` + */ + def to(extractor: TopicNameExtractor[K, V])(implicit produced: Produced[K, V]): Unit = + inner.to(extractor, produced) + + /** + * Convert this stream to a [[KTable]]. + * + * @return a [[KTable]] that contains the same records as this [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#toTable` + */ + def toTable: KTable[K, V] = + new KTable(inner.toTable) + + /** + * Convert this stream to a [[KTable]]. + * + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KTable]] that contains the same records as this [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#toTable` + */ + def toTable(named: Named): KTable[K, V] = + new KTable(inner.toTable(named)) + + /** + * Convert this stream to a [[KTable]]. + * + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains the same records as this [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#toTable` + */ + def toTable(materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = + new KTable(inner.toTable(materialized)) + + /** + * Convert this stream to a [[KTable]]. + * + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains the same records as this [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#toTable` + */ + def toTable(named: Named, materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = + new KTable(inner.toTable(named, materialized)) + + /** + * Process all records in this stream, one record at a time, by applying a `Processor` (provided by the given + * `processorSupplier`). + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `Processor`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * Note that this overload takes a ProcessorSupplier instead of a Function to avoid post-erasure ambiguity with + * the older (deprecated) overload. + * + * @param processorSupplier a supplier for `org.apache.kafka.streams.processor.api.Processor` + * @param stateStoreNames the names of the state store used by the processor + * @see `org.apache.kafka.streams.kstream.KStream#process` + */ + def process[KR, VR](processorSupplier: ProcessorSupplier[K, V, KR, VR], stateStoreNames: String*): KStream[KR, VR] = + new KStream(inner.process(processorSupplier, stateStoreNames: _*)) + + /** + * Process all records in this stream, one record at a time, by applying a `Processor` (provided by the given + * `processorSupplier`). + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `Processor`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * Note that this overload takes a ProcessorSupplier instead of a Function to avoid post-erasure ambiguity with + * the older (deprecated) overload. + * + * @param processorSupplier a supplier for `org.apache.kafka.streams.processor.api.Processor` + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state store used by the processor + * @see `org.apache.kafka.streams.kstream.KStream#process` + */ + def process[KR, VR]( + processorSupplier: ProcessorSupplier[K, V, KR, VR], + named: Named, + stateStoreNames: String* + ): KStream[KR, VR] = + new KStream(inner.process(processorSupplier, named, stateStoreNames: _*)) + + /** + * Process all records in this stream, one record at a time, by applying a `FixedKeyProcessor` (provided by the given + * `processorSupplier`). + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `FixedKeyProcessor`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * Note that this overload takes a FixedKeyProcessorSupplier instead of a Function to avoid post-erasure ambiguity with + * the older (deprecated) overload. + * + * @param processorSupplier a supplier for `org.apache.kafka.streams.processor.api.FixedKeyProcessor` + * @param stateStoreNames the names of the state store used by the processor + * @see `org.apache.kafka.streams.kstream.KStream#process` + */ + def processValues[VR]( + processorSupplier: FixedKeyProcessorSupplier[K, V, VR], + stateStoreNames: String* + ): KStream[K, VR] = + new KStream(inner.processValues(processorSupplier, stateStoreNames: _*)) + + /** + * Process all records in this stream, one record at a time, by applying a `FixedKeyProcessor` (provided by the given + * `processorSupplier`). + * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected + * to the `FixedKeyProcessor`. + * It's not required to connect global state stores that are added via `addGlobalStore`; + * read-only access to global state stores is available by default. + * + * Note that this overload takes a ProcessorSupplier instead of a Function to avoid post-erasure ambiguity with + * the older (deprecated) overload. + * + * @param processorSupplier a supplier for `org.apache.kafka.streams.processor.api.FixedKeyProcessor` + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state store used by the processor + * @see `org.apache.kafka.streams.kstream.KStream#process` + */ + def processValues[VR]( + processorSupplier: FixedKeyProcessorSupplier[K, V, VR], + named: Named, + stateStoreNames: String* + ): KStream[K, VR] = + new KStream(inner.processValues(processorSupplier, named, stateStoreNames: _*)) + + /** + * Group the records by their current key into a [[KGroupedStream]] + *

+ * The user can either supply the `Grouped` instance as an implicit in scope or they can also provide an implicit + * serdes that will be converted to a `Grouped` instance implicitly. + *

+ * {{{ + * Example: + * + * // brings implicit serdes in scope + * import Serdes._ + * + * val clicksPerRegion: KTable[String, Long] = + * userClicksStream + * .leftJoin(userRegionsTable, (clicks: Long, region: String) => (if (region == null) "UNKNOWN" else region, clicks)) + * .map((_, regionWithClicks) => regionWithClicks) + * + * // the groupByKey gets the Grouped instance through an implicit conversion of the + * // serdes brought into scope through the import Serdes._ above + * .groupByKey + * .reduce(_ + _) + * + * // Similarly you can create an implicit Grouped and it will be passed implicitly + * // to the groupByKey call + * }}} + * + * @param grouped the instance of Grouped that gives the serdes + * @return a [[KGroupedStream]] that contains the grouped records of the original [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#groupByKey` + */ + def groupByKey(implicit grouped: Grouped[K, V]): KGroupedStream[K, V] = + new KGroupedStream(inner.groupByKey(grouped)) + + /** + * Group the records of this [[KStream]] on a new key that is selected using the provided key transformation function + * and the `Grouped` instance. + *

+ * The user can either supply the `Grouped` instance as an implicit in scope or they can also provide an implicit + * serdes that will be converted to a `Grouped` instance implicitly. + *

+ * {{{ + * Example: + * + * // brings implicit serdes in scope + * import Serdes._ + * + * val textLines = streamBuilder.stream[String, String](inputTopic) + * + * val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) + * + * val wordCounts: KTable[String, Long] = + * textLines.flatMapValues(v => pattern.split(v.toLowerCase)) + * + * // the groupBy gets the Grouped instance through an implicit conversion of the + * // serdes brought into scope through the import Serdes._ above + * .groupBy((k, v) => v) + * + * .count() + * }}} + * + * @param selector a function that computes a new key for grouping + * @return a [[KGroupedStream]] that contains the grouped records of the original [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#groupBy` + */ + def groupBy[KR](selector: (K, V) => KR)(implicit grouped: Grouped[KR, V]): KGroupedStream[KR, V] = + new KGroupedStream(inner.groupBy(selector.asKeyValueMapper, grouped)) + + /** + * Join records of this stream with another [[KStream]]'s records using windowed inner equi join with + * serializers and deserializers supplied by the implicit `StreamJoined` instance. + * + * @param otherStream the [[KStream]] to be joined with this stream + * @param joiner a function that computes the join result for a pair of matching records + * @param windows the specification of the `JoinWindows` + * @param streamJoin an implicit `StreamJoin` instance that defines the serdes to be used to serialize/deserialize + * inputs and outputs of the joined streams. Instead of `StreamJoin`, the user can also supply + * key serde, value serde and other value serde in implicit scope and they will be + * converted to the instance of `Stream` through implicit conversion. The `StreamJoin` instance can + * also name the repartition topic (if required), the state stores for the join, and the join + * processor node. + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one for each matched record-pair with the same key and within the joining window intervals + * @see `org.apache.kafka.streams.kstream.KStream#join` + */ + def join[VO, VR](otherStream: KStream[K, VO])( + joiner: (V, VO) => VR, + windows: JoinWindows + )(implicit streamJoin: StreamJoined[K, V, VO]): KStream[K, VR] = + new KStream(inner.join[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, streamJoin)) + + /** + * Join records of this stream with another [[KStream]]'s records using windowed left equi join with + * serializers and deserializers supplied by the implicit `StreamJoined` instance. + * + * @param otherStream the [[KStream]] to be joined with this stream + * @param joiner a function that computes the join result for a pair of matching records + * @param windows the specification of the `JoinWindows` + * @param streamJoin an implicit `StreamJoin` instance that defines the serdes to be used to serialize/deserialize + * inputs and outputs of the joined streams. Instead of `StreamJoin`, the user can also supply + * key serde, value serde and other value serde in implicit scope and they will be + * converted to the instance of `Stream` through implicit conversion. The `StreamJoin` instance can + * also name the repartition topic (if required), the state stores for the join, and the join + * processor node. + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one for each matched record-pair with the same key and within the joining window intervals + * @see `org.apache.kafka.streams.kstream.KStream#leftJoin` + */ + def leftJoin[VO, VR](otherStream: KStream[K, VO])( + joiner: (V, VO) => VR, + windows: JoinWindows + )(implicit streamJoin: StreamJoined[K, V, VO]): KStream[K, VR] = + new KStream(inner.leftJoin[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, streamJoin)) + + /** + * Join records of this stream with another [[KStream]]'s records using windowed outer equi join with + * serializers and deserializers supplied by the implicit `Joined` instance. + * + * @param otherStream the [[KStream]] to be joined with this stream + * @param joiner a function that computes the join result for a pair of matching records + * @param windows the specification of the `JoinWindows` + * @param streamJoin an implicit `StreamJoin` instance that defines the serdes to be used to serialize/deserialize + * inputs and outputs of the joined streams. Instead of `StreamJoin`, the user can also supply + * key serde, value serde and other value serde in implicit scope and they will be + * converted to the instance of `Stream` through implicit conversion. The `StreamJoin` instance can + * also name the repartition topic (if required), the state stores for the join, and the join + * processor node. + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one for each matched record-pair with the same key and within the joining window intervals + * @see `org.apache.kafka.streams.kstream.KStream#outerJoin` + */ + def outerJoin[VO, VR](otherStream: KStream[K, VO])( + joiner: (V, VO) => VR, + windows: JoinWindows + )(implicit streamJoin: StreamJoined[K, V, VO]): KStream[K, VR] = + new KStream(inner.outerJoin[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, streamJoin)) + + /** + * Join records of this stream with another [[KTable]]'s records using inner equi join with + * serializers and deserializers supplied by the implicit `Joined` instance. + * + * @param table the [[KTable]] to be joined with this stream + * @param joiner a function that computes the join result for a pair of matching records + * @param joined an implicit `Joined` instance that defines the serdes to be used to serialize/deserialize + * inputs and outputs of the joined streams. Instead of `Joined`, the user can also supply + * key serde, value serde and other value serde in implicit scope and they will be + * converted to the instance of `Joined` through implicit conversion + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KStream#join` + */ + def join[VT, VR](table: KTable[K, VT])(joiner: (V, VT) => VR)(implicit joined: Joined[K, V, VT]): KStream[K, VR] = + new KStream(inner.join[VT, VR](table.inner, joiner.asValueJoiner, joined)) + + /** + * Join records of this stream with another [[KTable]]'s records using left equi join with + * serializers and deserializers supplied by the implicit `Joined` instance. + * + * @param table the [[KTable]] to be joined with this stream + * @param joiner a function that computes the join result for a pair of matching records + * @param joined an implicit `Joined` instance that defines the serdes to be used to serialize/deserialize + * inputs and outputs of the joined streams. Instead of `Joined`, the user can also supply + * key serde, value serde and other value serde in implicit scope and they will be + * converted to the instance of `Joined` through implicit conversion + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KStream#leftJoin` + */ + def leftJoin[VT, VR](table: KTable[K, VT])(joiner: (V, VT) => VR)(implicit joined: Joined[K, V, VT]): KStream[K, VR] = + new KStream(inner.leftJoin[VT, VR](table.inner, joiner.asValueJoiner, joined)) + + /** + * Join records of this stream with `GlobalKTable`'s records using non-windowed inner equi join. + * + * @param globalKTable the `GlobalKTable` to be joined with this stream + * @param keyValueMapper a function used to map from the (key, value) of this stream + * to the key of the `GlobalKTable` + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one output for each input [[KStream]] record + * @see `org.apache.kafka.streams.kstream.KStream#join` + */ + def join[GK, GV, RV](globalKTable: GlobalKTable[GK, GV])( + keyValueMapper: (K, V) => GK, + joiner: (V, GV) => RV + ): KStream[K, RV] = + new KStream( + inner.join[GK, GV, RV]( + globalKTable, + ((k: K, v: V) => keyValueMapper(k, v)).asKeyValueMapper, + ((v: V, gv: GV) => joiner(v, gv)).asValueJoiner + ) + ) + + /** + * Join records of this stream with `GlobalKTable`'s records using non-windowed inner equi join. + * + * @param globalKTable the `GlobalKTable` to be joined with this stream + * @param named a [[Named]] config used to name the processor in the topology + * @param keyValueMapper a function used to map from the (key, value) of this stream + * to the key of the `GlobalKTable` + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one output for each input [[KStream]] record + * @see `org.apache.kafka.streams.kstream.KStream#join` + */ + def join[GK, GV, RV](globalKTable: GlobalKTable[GK, GV], named: Named)( + keyValueMapper: (K, V) => GK, + joiner: (V, GV) => RV + ): KStream[K, RV] = + new KStream( + inner.join[GK, GV, RV]( + globalKTable, + ((k: K, v: V) => keyValueMapper(k, v)).asKeyValueMapper, + ((v: V, gv: GV) => joiner(v, gv)).asValueJoiner, + named + ) + ) + + /** + * Join records of this stream with `GlobalKTable`'s records using non-windowed left equi join. + * + * @param globalKTable the `GlobalKTable` to be joined with this stream + * @param keyValueMapper a function used to map from the (key, value) of this stream + * to the key of the `GlobalKTable` + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one output for each input [[KStream]] record + * @see `org.apache.kafka.streams.kstream.KStream#leftJoin` + */ + def leftJoin[GK, GV, RV](globalKTable: GlobalKTable[GK, GV])( + keyValueMapper: (K, V) => GK, + joiner: (V, GV) => RV + ): KStream[K, RV] = + new KStream(inner.leftJoin[GK, GV, RV](globalKTable, keyValueMapper.asKeyValueMapper, joiner.asValueJoiner)) + + /** + * Join records of this stream with `GlobalKTable`'s records using non-windowed left equi join. + * + * @param globalKTable the `GlobalKTable` to be joined with this stream + * @param named a [[Named]] config used to name the processor in the topology + * @param keyValueMapper a function used to map from the (key, value) of this stream + * to the key of the `GlobalKTable` + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, + * one output for each input [[KStream]] record + * @see `org.apache.kafka.streams.kstream.KStream#leftJoin` + */ + def leftJoin[GK, GV, RV](globalKTable: GlobalKTable[GK, GV], named: Named)( + keyValueMapper: (K, V) => GK, + joiner: (V, GV) => RV + ): KStream[K, RV] = + new KStream(inner.leftJoin[GK, GV, RV](globalKTable, keyValueMapper.asKeyValueMapper, joiner.asValueJoiner, named)) + + /** + * Merge this stream and the given stream into one larger stream. + *

+ * There is no ordering guarantee between records from this `KStream` and records from the provided `KStream` + * in the merged stream. Relative order is preserved within each input stream though (ie, records within + * one input stream are processed in order). + * + * @param stream a stream which is to be merged into this stream + * @return a merged stream containing all records from this and the provided [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#merge` + */ + def merge(stream: KStream[K, V]): KStream[K, V] = + new KStream(inner.merge(stream.inner)) + + /** + * Merge this stream and the given stream into one larger stream. + *

+ * There is no ordering guarantee between records from this `KStream` and records from the provided `KStream` + * in the merged stream. Relative order is preserved within each input stream though (ie, records within + * one input stream are processed in order). + * + * @param named a [[Named]] config used to name the processor in the topology + * @param stream a stream which is to be merged into this stream + * @return a merged stream containing all records from this and the provided [[KStream]] + * @see `org.apache.kafka.streams.kstream.KStream#merge` + */ + def merge(stream: KStream[K, V], named: Named): KStream[K, V] = + new KStream(inner.merge(stream.inner, named)) + + /** + * Perform an action on each record of `KStream`. + *

+ * Peek is a non-terminal operation that triggers a side effect (such as logging or statistics collection) + * and returns an unchanged stream. + * + * @param action an action to perform on each record + * @see `org.apache.kafka.streams.kstream.KStream#peek` + */ + def peek(action: (K, V) => Unit): KStream[K, V] = + new KStream(inner.peek(action.asForeachAction)) + + /** + * Perform an action on each record of `KStream`. + *

+ * Peek is a non-terminal operation that triggers a side effect (such as logging or statistics collection) + * and returns an unchanged stream. + * + * @param action an action to perform on each record + * @param named a [[Named]] config used to name the processor in the topology + * @see `org.apache.kafka.streams.kstream.KStream#peek` + */ + def peek(action: (K, V) => Unit, named: Named): KStream[K, V] = + new KStream(inner.peek(action.asForeachAction, named)) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KTable.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KTable.scala new file mode 100644 index 0000000000..6a7f42285a --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KTable.scala @@ -0,0 +1,806 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import scala.jdk.FunctionWrappers.AsJavaBiFunction +import org.apache.kafka.common.utils.Bytes +import org.apache.kafka.streams.kstream.{KTable => KTableJ, TableJoined, ValueJoiner, ValueTransformerWithKeySupplier} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ + FunctionFromFunction, + KeyValueMapperFromFunction, + MapperFromFunction, + PredicateFromFunction, + ValueMapperFromFunction, + ValueMapperWithKeyFromFunction +} +import org.apache.kafka.streams.state.KeyValueStore + +/** + * Wraps the Java class `org.apache.kafka.streams.kstream.KTable` and delegates method calls to the underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for KTable + * @see `org.apache.kafka.streams.kstream.KTable` + */ +class KTable[K, V](val inner: KTableJ[K, V]) { + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which satisfies the given + * predicate + * + * @param predicate a filter that is applied to each record + * @return a [[KTable]] that contains only those records that satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filter` + */ + def filter(predicate: (K, V) => Boolean): KTable[K, V] = + new KTable(inner.filter(predicate.asPredicate)) + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which satisfies the given + * predicate + * + * @param predicate a filter that is applied to each record + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KTable]] that contains only those records that satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filter` + */ + def filter(predicate: (K, V) => Boolean, named: Named): KTable[K, V] = + new KTable(inner.filter(predicate.asPredicate, named)) + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which satisfies the given + * predicate + * + * @param predicate a filter that is applied to each record + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains only those records that satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filter` + */ + def filter(predicate: (K, V) => Boolean, materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = + new KTable(inner.filter(predicate.asPredicate, materialized)) + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which satisfies the given + * predicate + * + * @param predicate a filter that is applied to each record + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains only those records that satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filter` + */ + def filter( + predicate: (K, V) => Boolean, + named: Named, + materialized: Materialized[K, V, ByteArrayKeyValueStore] + ): KTable[K, V] = + new KTable(inner.filter(predicate.asPredicate, named, materialized)) + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which do not satisfy the given + * predicate + * + * @param predicate a filter that is applied to each record + * @return a [[KTable]] that contains only those records that do not satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filterNot` + */ + def filterNot(predicate: (K, V) => Boolean): KTable[K, V] = + new KTable(inner.filterNot(predicate.asPredicate)) + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which do not satisfy the given + * predicate + * + * @param predicate a filter that is applied to each record + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KTable]] that contains only those records that do not satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filterNot` + */ + def filterNot(predicate: (K, V) => Boolean, named: Named): KTable[K, V] = + new KTable(inner.filterNot(predicate.asPredicate, named)) + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which do not satisfy the given + * predicate + * + * @param predicate a filter that is applied to each record + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains only those records that do not satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filterNot` + */ + def filterNot(predicate: (K, V) => Boolean, materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = + new KTable(inner.filterNot(predicate.asPredicate, materialized)) + + /** + * Create a new [[KTable]] that consists all records of this [[KTable]] which do not satisfy the given + * predicate + * + * @param predicate a filter that is applied to each record + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains only those records that do not satisfy the given predicate + * @see `org.apache.kafka.streams.kstream.KTable#filterNot` + */ + def filterNot( + predicate: (K, V) => Boolean, + named: Named, + materialized: Materialized[K, V, ByteArrayKeyValueStore] + ): KTable[K, V] = + new KTable(inner.filterNot(predicate.asPredicate, named, materialized)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `V => VR` that computes a new output value + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR](mapper: V => VR): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapper)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `V => VR` that computes a new output value + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR](mapper: V => VR, named: Named): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapper, named)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `V => VR` that computes a new output value + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR](mapper: V => VR, materialized: Materialized[K, VR, ByteArrayKeyValueStore]): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapper, materialized)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `V => VR` that computes a new output value + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR]( + mapper: V => VR, + named: Named, + materialized: Materialized[K, VR, ByteArrayKeyValueStore] + ): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapper, named, materialized)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `(K, V) => VR` that computes a new output value + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR](mapper: (K, V) => VR): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapperWithKey)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `(K, V) => VR` that computes a new output value + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR](mapper: (K, V) => VR, named: Named): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapperWithKey, named)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `(K, V) => VR` that computes a new output value + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR](mapper: (K, V) => VR, materialized: Materialized[K, VR, ByteArrayKeyValueStore]): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapperWithKey, materialized)) + + /** + * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value + * (with possible new type) in the new [[KTable]]. + *

+ * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it + * + * @param mapper , a function `(K, V) => VR` that computes a new output value + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KTable#mapValues` + */ + def mapValues[VR]( + mapper: (K, V) => VR, + named: Named, + materialized: Materialized[K, VR, ByteArrayKeyValueStore] + ): KTable[K, VR] = + new KTable(inner.mapValues[VR](mapper.asValueMapperWithKey, named, materialized)) + + /** + * Convert this changelog stream to a [[KStream]]. + * + * @return a [[KStream]] that contains the same records as this [[KTable]] + * @see `org.apache.kafka.streams.kstream.KTable#toStream` + */ + def toStream: KStream[K, V] = + new KStream(inner.toStream) + + /** + * Convert this changelog stream to a [[KStream]]. + * + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains the same records as this [[KTable]] + * @see `org.apache.kafka.streams.kstream.KTable#toStream` + */ + def toStream(named: Named): KStream[K, V] = + new KStream(inner.toStream(named)) + + /** + * Convert this changelog stream to a [[KStream]] using the given key/value mapper to select the new key + * + * @param mapper a function that computes a new key for each record + * @return a [[KStream]] that contains the same records as this [[KTable]] + * @see `org.apache.kafka.streams.kstream.KTable#toStream` + */ + def toStream[KR](mapper: (K, V) => KR): KStream[KR, V] = + new KStream(inner.toStream[KR](mapper.asKeyValueMapper)) + + /** + * Convert this changelog stream to a [[KStream]] using the given key/value mapper to select the new key + * + * @param mapper a function that computes a new key for each record + * @param named a [[Named]] config used to name the processor in the topology + * @return a [[KStream]] that contains the same records as this [[KTable]] + * @see `org.apache.kafka.streams.kstream.KTable#toStream` + */ + def toStream[KR](mapper: (K, V) => KR, named: Named): KStream[KR, V] = + new KStream(inner.toStream[KR](mapper.asKeyValueMapper, named)) + + /** + * Suppress some updates from this changelog stream, determined by the supplied `org.apache.kafka.streams.kstream.Suppressed` configuration. + * + * This controls what updates downstream table and stream operations will receive. + * + * @param suppressed Configuration object determining what, if any, updates to suppress. + * @return A new KTable with the desired suppression characteristics. + * @see `org.apache.kafka.streams.kstream.KTable#suppress` + */ + def suppress(suppressed: org.apache.kafka.streams.kstream.Suppressed[_ >: K]): KTable[K, V] = + new KTable(inner.suppress(suppressed)) + + /** + * Create a new `KTable` by transforming the value of each record in this `KTable` into a new value, (with possibly new type). + * Transform the value of each input record into a new value (with possible new type) of the output record. + * A `ValueTransformerWithKey` (provided by the given `ValueTransformerWithKeySupplier`) is applied to each input + * record value and computes a new value for it. + * This is similar to `#mapValues(ValueMapperWithKey)`, but more flexible, allowing access to additional state-stores, + * and to the `ProcessorContext`. + * If the downstream topology uses aggregation functions, (e.g. `KGroupedTable#reduce`, `KGroupedTable#aggregate`, etc), + * care must be taken when dealing with state, (either held in state-stores or transformer instances), to ensure correct + * aggregate results. + * In contrast, if the resulting KTable is materialized, (cf. `#transformValues(ValueTransformerWithKeySupplier, Materialized, String...)`), + * such concerns are handled for you. + * In order to assign a state, the state must be created and registered + * beforehand via stores added via `addStateStore` or `addGlobalStore` before they can be connected to the `Transformer` + * + * @param valueTransformerWithKeySupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey`. + * At least one transformer instance will be created per streaming task. + * Transformer implementations doe not need to be thread-safe. + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + def transformValues[VR]( + valueTransformerWithKeySupplier: ValueTransformerWithKeySupplier[K, V, VR], + stateStoreNames: String* + ): KTable[K, VR] = + new KTable(inner.transformValues[VR](valueTransformerWithKeySupplier, stateStoreNames: _*)) + + /** + * Create a new `KTable` by transforming the value of each record in this `KTable` into a new value, (with possibly new type). + * Transform the value of each input record into a new value (with possible new type) of the output record. + * A `ValueTransformerWithKey` (provided by the given `ValueTransformerWithKeySupplier`) is applied to each input + * record value and computes a new value for it. + * This is similar to `#mapValues(ValueMapperWithKey)`, but more flexible, allowing access to additional state-stores, + * and to the `ProcessorContext`. + * If the downstream topology uses aggregation functions, (e.g. `KGroupedTable#reduce`, `KGroupedTable#aggregate`, etc), + * care must be taken when dealing with state, (either held in state-stores or transformer instances), to ensure correct + * aggregate results. + * In contrast, if the resulting KTable is materialized, (cf. `#transformValues(ValueTransformerWithKeySupplier, Materialized, String...)`), + * such concerns are handled for you. + * In order to assign a state, the state must be created and registered + * beforehand via stores added via `addStateStore` or `addGlobalStore` before they can be connected to the `Transformer` + * + * @param valueTransformerWithKeySupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey`. + * At least one transformer instance will be created per streaming task. + * Transformer implementations doe not need to be thread-safe. + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + def transformValues[VR]( + valueTransformerWithKeySupplier: ValueTransformerWithKeySupplier[K, V, VR], + named: Named, + stateStoreNames: String* + ): KTable[K, VR] = + new KTable(inner.transformValues[VR](valueTransformerWithKeySupplier, named, stateStoreNames: _*)) + + /** + * Create a new `KTable` by transforming the value of each record in this `KTable` into a new value, (with possibly new type). + * A `ValueTransformer` (provided by the given `ValueTransformerSupplier`) is applied to each input + * record value and computes a new value for it. + * This is similar to `#mapValues(ValueMapperWithKey)`, but more flexible, allowing stateful, rather than stateless, + * record-by-record operation, access to additional state-stores, and access to the `ProcessorContext`. + * In order to assign a state, the state must be created and registered + * beforehand via stores added via `addStateStore` or `addGlobalStore` before they can be connected to the `Transformer` + * The resulting `KTable` is materialized into another state store (additional to the provided state store names) + * as specified by the user via `Materialized` parameter, and is queryable through its given name. + * + * @param valueTransformerWithKeySupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey` + * At least one transformer instance will be created per streaming task. + * Transformer implementations doe not need to be thread-safe. + * @param materialized an instance of `Materialized` used to describe how the state store of the + * resulting table should be materialized. + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + def transformValues[VR]( + valueTransformerWithKeySupplier: ValueTransformerWithKeySupplier[K, V, VR], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]], + stateStoreNames: String* + ): KTable[K, VR] = + new KTable(inner.transformValues[VR](valueTransformerWithKeySupplier, materialized, stateStoreNames: _*)) + + /** + * Create a new `KTable` by transforming the value of each record in this `KTable` into a new value, (with possibly new type). + * A `ValueTransformer` (provided by the given `ValueTransformerSupplier`) is applied to each input + * record value and computes a new value for it. + * This is similar to `#mapValues(ValueMapperWithKey)`, but more flexible, allowing stateful, rather than stateless, + * record-by-record operation, access to additional state-stores, and access to the `ProcessorContext`. + * In order to assign a state, the state must be created and registered + * beforehand via stores added via `addStateStore` or `addGlobalStore` before they can be connected to the `Transformer` + * The resulting `KTable` is materialized into another state store (additional to the provided state store names) + * as specified by the user via `Materialized` parameter, and is queryable through its given name. + * + * @param valueTransformerWithKeySupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey` + * At least one transformer instance will be created per streaming task. + * Transformer implementations doe not need to be thread-safe. + * @param materialized an instance of `Materialized` used to describe how the state store of the + * resulting table should be materialized. + * @param named a [[Named]] config used to name the processor in the topology + * @param stateStoreNames the names of the state stores used by the processor + * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) + * @see `org.apache.kafka.streams.kstream.KStream#transformValues` + */ + def transformValues[VR]( + valueTransformerWithKeySupplier: ValueTransformerWithKeySupplier[K, V, VR], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]], + named: Named, + stateStoreNames: String* + ): KTable[K, VR] = + new KTable(inner.transformValues[VR](valueTransformerWithKeySupplier, materialized, named, stateStoreNames: _*)) + + /** + * Re-groups the records of this [[KTable]] using the provided key/value mapper + * and `Serde`s as specified by `Grouped`. + * + * @param selector a function that computes a new grouping key and value to be aggregated + * @param grouped the `Grouped` instance used to specify `Serdes` + * @return a [[KGroupedTable]] that contains the re-grouped records of the original [[KTable]] + * @see `org.apache.kafka.streams.kstream.KTable#groupBy` + */ + def groupBy[KR, VR](selector: (K, V) => (KR, VR))(implicit grouped: Grouped[KR, VR]): KGroupedTable[KR, VR] = + new KGroupedTable(inner.groupBy(selector.asKeyValueMapper, grouped)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#join` + */ + def join[VO, VR](other: KTable[K, VO])(joiner: (V, VO) => VR): KTable[K, VR] = + new KTable(inner.join[VO, VR](other.inner, joiner.asValueJoiner)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param named a [[Named]] config used to name the processor in the topology + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#join` + */ + def join[VO, VR](other: KTable[K, VO], named: Named)(joiner: (V, VO) => VR): KTable[K, VR] = + new KTable(inner.join[VO, VR](other.inner, joiner.asValueJoiner, named)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#join` + */ + def join[VO, VR](other: KTable[K, VO], materialized: Materialized[K, VR, ByteArrayKeyValueStore])( + joiner: (V, VO) => VR + ): KTable[K, VR] = + new KTable(inner.join[VO, VR](other.inner, joiner.asValueJoiner, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param joiner a function that computes the join result for a pair of matching records + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#join` + */ + def join[VO, VR](other: KTable[K, VO], named: Named, materialized: Materialized[K, VR, ByteArrayKeyValueStore])( + joiner: (V, VO) => VR + ): KTable[K, VR] = + new KTable(inner.join[VO, VR](other.inner, joiner.asValueJoiner, named, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def leftJoin[VO, VR](other: KTable[K, VO])(joiner: (V, VO) => VR): KTable[K, VR] = + new KTable(inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param named a [[Named]] config used to name the processor in the topology + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def leftJoin[VO, VR](other: KTable[K, VO], named: Named)(joiner: (V, VO) => VR): KTable[K, VR] = + new KTable(inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner, named)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def leftJoin[VO, VR](other: KTable[K, VO], materialized: Materialized[K, VR, ByteArrayKeyValueStore])( + joiner: (V, VO) => VR + ): KTable[K, VR] = + new KTable(inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param named a [[Named]] config used to name the processor in the topology + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def leftJoin[VO, VR](other: KTable[K, VO], named: Named, materialized: Materialized[K, VR, ByteArrayKeyValueStore])( + joiner: (V, VO) => VR + ): KTable[K, VR] = + new KTable(inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner, named, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed outer equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def outerJoin[VO, VR](other: KTable[K, VO])(joiner: (V, VO) => VR): KTable[K, VR] = + new KTable(inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed outer equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param named a [[Named]] config used to name the processor in the topology + * @param joiner a function that computes the join result for a pair of matching records + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def outerJoin[VO, VR](other: KTable[K, VO], named: Named)(joiner: (V, VO) => VR): KTable[K, VR] = + new KTable(inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner, named)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed outer equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def outerJoin[VO, VR](other: KTable[K, VO], materialized: Materialized[K, VR, ByteArrayKeyValueStore])( + joiner: (V, VO) => VR + ): KTable[K, VR] = + new KTable(inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed outer equi join. + * + * @param other the other [[KTable]] to be joined with this [[KTable]] + * @param named a [[Named]] config used to name the processor in the topology + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` + */ + def outerJoin[VO, VR](other: KTable[K, VO], named: Named, materialized: Materialized[K, VR, ByteArrayKeyValueStore])( + joiner: (V, VO) => VR + ): KTable[K, VR] = + new KTable(inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner, named, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's value + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + def join[VR, KO, VO]( + other: KTable[KO, VO], + keyExtractor: Function[V, KO], + joiner: ValueJoiner[V, VO, VR], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] + ): KTable[K, VR] = + new KTable(inner.join(other.inner, keyExtractor.asJavaFunction, joiner, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's key and value + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + def join[VR, KO, VO]( + other: KTable[KO, VO], + keyExtractor: (K, V) => KO, + joiner: ValueJoiner[V, VO, VR], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] + ): KTable[K, VR] = + new KTable(inner.join(other.inner, AsJavaBiFunction[K, V, KO](keyExtractor), joiner, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's value + * @param joiner a function that computes the join result for a pair of matching records + * @param tableJoined a `org.apache.kafka.streams.kstream.TableJoined` used to configure + * partitioners and names of internal topics and stores + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + def join[VR, KO, VO]( + other: KTable[KO, VO], + keyExtractor: Function[V, KO], + joiner: ValueJoiner[V, VO, VR], + tableJoined: TableJoined[K, KO], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] + ): KTable[K, VR] = + new KTable(inner.join(other.inner, keyExtractor.asJavaFunction, joiner, tableJoined, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's key and value + * @param joiner a function that computes the join result for a pair of matching records + * @param tableJoined a `org.apache.kafka.streams.kstream.TableJoined` used to configure + * partitioners and names of internal topics and stores + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + def join[VR, KO, VO]( + other: KTable[KO, VO], + keyExtractor: (K, V) => KO, + joiner: ValueJoiner[V, VO, VR], + tableJoined: TableJoined[K, KO], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] + ): KTable[K, VR] = + new KTable(inner.join(other.inner, AsJavaBiFunction[K, V, KO](keyExtractor), joiner, tableJoined, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's value + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + def leftJoin[VR, KO, VO]( + other: KTable[KO, VO], + keyExtractor: Function[V, KO], + joiner: ValueJoiner[V, VO, VR], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] + ): KTable[K, VR] = + new KTable(inner.leftJoin(other.inner, keyExtractor.asJavaFunction, joiner, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's key and value + * @param joiner a function that computes the join result for a pair of matching records + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + def leftJoin[VR, KO, VO]( + other: KTable[KO, VO], + keyExtractor: (K, V) => KO, + joiner: ValueJoiner[V, VO, VR], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] + ): KTable[K, VR] = + new KTable(inner.leftJoin(other.inner, AsJavaBiFunction[K, V, KO](keyExtractor), joiner, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's value + * @param joiner a function that computes the join result for a pair of matching records + * @param tableJoined a `org.apache.kafka.streams.kstream.TableJoined` used to configure + * partitioners and names of internal topics and stores + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + def leftJoin[VR, KO, VO]( + other: KTable[KO, VO], + keyExtractor: Function[V, KO], + joiner: ValueJoiner[V, VO, VR], + tableJoined: TableJoined[K, KO], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] + ): KTable[K, VR] = + new KTable(inner.leftJoin(other.inner, keyExtractor.asJavaFunction, joiner, tableJoined, materialized)) + + /** + * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left join. Records from this + * table are joined according to the result of keyExtractor on the other KTable. + * + * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor + * @param keyExtractor a function that extracts the foreign key from this table's key and value + * @param joiner a function that computes the join result for a pair of matching records + * @param tableJoined a `org.apache.kafka.streams.kstream.TableJoined` used to configure + * partitioners and names of internal topics and stores + * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] + * should be materialized. + * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, + * one for each matched record-pair with the same key + */ + def leftJoin[VR, KO, VO]( + other: KTable[KO, VO], + keyExtractor: (K, V) => KO, + joiner: ValueJoiner[V, VO, VR], + tableJoined: TableJoined[K, KO], + materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] + ): KTable[K, VR] = + new KTable(inner.leftJoin(other.inner, AsJavaBiFunction[K, V, KO](keyExtractor), joiner, tableJoined, materialized)) + + /** + * Get the name of the local state store used that can be used to query this [[KTable]]. + * + * @return the underlying state store name, or `null` if this [[KTable]] cannot be queried. + */ + def queryableStoreName: String = + inner.queryableStoreName +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Materialized.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Materialized.scala new file mode 100644 index 0000000000..55c09896ac --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Materialized.scala @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.kstream.{Materialized => MaterializedJ} +import org.apache.kafka.streams.processor.StateStore +import org.apache.kafka.streams.scala.{ByteArrayKeyValueStore, ByteArraySessionStore, ByteArrayWindowStore} +import org.apache.kafka.streams.state.{KeyValueBytesStoreSupplier, SessionBytesStoreSupplier, WindowBytesStoreSupplier} + +object Materialized { + + /** + * Materialize a `org.apache.kafka.streams.processor.StateStore` with the provided key and value Serdes. + * An internal name will be used for the store. + * + * @tparam K key type + * @tparam V value type + * @tparam S store type + * @param keySerde the key Serde to use. + * @param valueSerde the value Serde to use. + * @return a new [[Materialized]] instance with the given key and value serdes + */ + def `with`[K, V, S <: StateStore](implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, S] = + MaterializedJ.`with`(keySerde, valueSerde) + + /** + * Materialize a `org.apache.kafka.streams.processor.StateStore` with the given name. + * + * @tparam K key type of the store + * @tparam V value type of the store + * @tparam S type of the `org.apache.kafka.streams.processor.StateStore` + * @param storeName the name of the underlying [[org.apache.kafka.streams.scala.kstream.KTable]] state store; + * valid characters are ASCII alphanumerics, '.', '_' and '-'. + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @return a new [[Materialized]] instance with the given storeName + */ + def as[K, V, S <: StateStore]( + storeName: String + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, S] = + MaterializedJ.as(storeName).withKeySerde(keySerde).withValueSerde(valueSerde) + + /** + * Materialize a `org.apache.kafka.streams.state.WindowStore` using the provided + * `org.apache.kafka.streams.state.WindowBytesStoreSupplier`. + * + * Important: Custom subclasses are allowed here, but they should respect the retention contract: + * Window stores are required to retain windows at least as long as (window size + window grace period). + * Stores constructed via `org.apache.kafka.streams.state.Stores` already satisfy this contract. + * + * @tparam K key type of the store + * @tparam V value type of the store + * @param supplier the `org.apache.kafka.streams.state.WindowBytesStoreSupplier` used to materialize the store + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @return a new [[Materialized]] instance with the given supplier + */ + def as[K, V]( + supplier: WindowBytesStoreSupplier + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, ByteArrayWindowStore] = + MaterializedJ.as(supplier).withKeySerde(keySerde).withValueSerde(valueSerde) + + /** + * Materialize a `org.apache.kafka.streams.state.SessionStore` using the provided + * `org.apache.kafka.streams.state.SessionBytesStoreSupplier`. + * + * Important: Custom subclasses are allowed here, but they should respect the retention contract: + * Session stores are required to retain windows at least as long as (session inactivity gap + session grace period). + * Stores constructed via `org.apache.kafka.streams.state.Stores` already satisfy this contract. + * + * @tparam K key type of the store + * @tparam V value type of the store + * @param supplier the `org.apache.kafka.streams.state.SessionBytesStoreSupplier` used to materialize the store + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @return a new [[Materialized]] instance with the given supplier + */ + def as[K, V]( + supplier: SessionBytesStoreSupplier + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, ByteArraySessionStore] = + MaterializedJ.as(supplier).withKeySerde(keySerde).withValueSerde(valueSerde) + + /** + * Materialize a `org.apache.kafka.streams.state.KeyValueStore` using the provided + * `org.apache.kafka.streams.state.KeyValueBytesStoreSupplier`. + * + * @tparam K key type of the store + * @tparam V value type of the store + * @param supplier the `org.apache.kafka.streams.state.KeyValueBytesStoreSupplier` used to + * materialize the store + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @return a new [[Materialized]] instance with the given supplier + */ + def as[K, V]( + supplier: KeyValueBytesStoreSupplier + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, ByteArrayKeyValueStore] = + MaterializedJ.as(supplier).withKeySerde(keySerde).withValueSerde(valueSerde) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Produced.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Produced.scala new file mode 100644 index 0000000000..1f4498b8af --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Produced.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.kstream.{Produced => ProducedJ} +import org.apache.kafka.streams.processor.StreamPartitioner + +object Produced { + + /** + * Create a Produced instance with provided keySerde and valueSerde. + * + * @tparam K key type + * @tparam V value type + * @param keySerde Serde to use for serializing the key + * @param valueSerde Serde to use for serializing the value + * @return A new [[Produced]] instance configured with keySerde and valueSerde + * @see KStream#through(String, Produced) + * @see KStream#to(String, Produced) + */ + def `with`[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): ProducedJ[K, V] = + ProducedJ.`with`(keySerde, valueSerde) + + /** + * Create a Produced instance with provided keySerde, valueSerde, and partitioner. + * + * @tparam K key type + * @tparam V value type + * @param partitioner the function used to determine how records are distributed among partitions of the topic, + * if not specified and `keySerde` provides a + * `org.apache.kafka.streams.kstream.internals.WindowedSerializer` for the key + * `org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner` will be + * used—otherwise `org.apache.kafka.clients.producer.internals.DefaultPartitioner` + * will be used + * @param keySerde Serde to use for serializing the key + * @param valueSerde Serde to use for serializing the value + * @return A new [[Produced]] instance configured with keySerde, valueSerde, and partitioner + * @see KStream#through(String, Produced) + * @see KStream#to(String, Produced) + */ + def `with`[K, V]( + partitioner: StreamPartitioner[K, V] + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ProducedJ[K, V] = + ProducedJ.`with`(keySerde, valueSerde, partitioner) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Repartitioned.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Repartitioned.scala new file mode 100644 index 0000000000..f968c859c5 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Repartitioned.scala @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.kstream.{Repartitioned => RepartitionedJ} +import org.apache.kafka.streams.processor.StreamPartitioner + +object Repartitioned { + + /** + * Create a Repartitioned instance with provided keySerde and valueSerde. + * + * @tparam K key type + * @tparam V value type + * @param keySerde Serde to use for serializing the key + * @param valueSerde Serde to use for serializing the value + * @return A new [[Repartitioned]] instance configured with keySerde and valueSerde + * @see KStream#repartition(Repartitioned) + */ + def `with`[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): RepartitionedJ[K, V] = + RepartitionedJ.`with`(keySerde, valueSerde) + + /** + * Create a Repartitioned instance with provided keySerde, valueSerde, and name used as part of the repartition topic. + * + * @tparam K key type + * @tparam V value type + * @param name the name used as a processor named and part of the repartition topic name. + * @param keySerde Serde to use for serializing the key + * @param valueSerde Serde to use for serializing the value + * @return A new [[Repartitioned]] instance configured with keySerde, valueSerde, and processor and repartition topic name + * @see KStream#repartition(Repartitioned) + */ + def `with`[K, V](name: String)(implicit keySerde: Serde[K], valueSerde: Serde[V]): RepartitionedJ[K, V] = + RepartitionedJ.`as`(name).withKeySerde(keySerde).withValueSerde(valueSerde) + + /** + * Create a Repartitioned instance with provided keySerde, valueSerde, and partitioner. + * + * @tparam K key type + * @tparam V value type + * @param partitioner the function used to determine how records are distributed among partitions of the topic, + * if not specified and `keySerde` provides a + * `org.apache.kafka.streams.kstream.internals.WindowedSerializer` for the key + * `org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner` will be + * used—otherwise `org.apache.kafka.clients.producer.internals.DefaultPartitioner` + * will be used + * @param keySerde Serde to use for serializing the key + * @param valueSerde Serde to use for serializing the value + * @return A new [[Repartitioned]] instance configured with keySerde, valueSerde, and partitioner + * @see KStream#repartition(Repartitioned) + */ + def `with`[K, V]( + partitioner: StreamPartitioner[K, V] + )(implicit keySerde: Serde[K], valueSerde: Serde[V]): RepartitionedJ[K, V] = + RepartitionedJ.`streamPartitioner`(partitioner).withKeySerde(keySerde).withValueSerde(valueSerde) + + /** + * Create a Repartitioned instance with provided keySerde, valueSerde, and number of partitions for repartition topic. + * + * @tparam K key type + * @tparam V value type + * @param numberOfPartitions number of partitions used when creating repartition topic + * @param keySerde Serde to use for serializing the key + * @param valueSerde Serde to use for serializing the value + * @return A new [[Repartitioned]] instance configured with keySerde, valueSerde, and number of partitions + * @see KStream#repartition(Repartitioned) + */ + def `with`[K, V](numberOfPartitions: Int)(implicit keySerde: Serde[K], valueSerde: Serde[V]): RepartitionedJ[K, V] = + RepartitionedJ.`numberOfPartitions`(numberOfPartitions).withKeySerde(keySerde).withValueSerde(valueSerde) + +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedCogroupedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedCogroupedKStream.scala new file mode 100644 index 0000000000..1b20179d5d --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedCogroupedKStream.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.{SessionWindowedCogroupedKStream => SessionWindowedCogroupedKStreamJ, Windowed} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{InitializerFromFunction, MergerFromFunction} + +/** + * Wraps the Java class SessionWindowedCogroupedKStream and delegates method calls to the underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for SessionWindowedCogroupedKStream + * @see `org.apache.kafka.streams.kstream.SessionWindowedCogroupedKStream` + */ +class SessionWindowedCogroupedKStream[K, V](val inner: SessionWindowedCogroupedKStreamJ[K, V]) { + + /** + * Aggregate the values of records in this stream by the grouped key and defined `SessionWindows`. + * + * @param initializer the initializer function + * @param merger a function that combines two aggregation results. + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent + * the latest (rolling) aggregate for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedCogroupedKStream#aggregate` + */ + def aggregate(initializer: => V, merger: (K, V, V) => V)(implicit + materialized: Materialized[K, V, ByteArraySessionStore] + ): KTable[Windowed[K], V] = + new KTable(inner.aggregate((() => initializer).asInitializer, merger.asMerger, materialized)) + + /** + * Aggregate the values of records in this stream by the grouped key and defined `SessionWindows`. + * + * @param initializer the initializer function + * @param merger a function that combines two aggregation results. + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent + * the latest (rolling) aggregate for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedCogroupedKStream#aggregate` + */ + def aggregate(initializer: => V, merger: (K, V, V) => V, named: Named)(implicit + materialized: Materialized[K, V, ByteArraySessionStore] + ): KTable[Windowed[K], V] = + new KTable(inner.aggregate((() => initializer).asInitializer, merger.asMerger, named, materialized)) + +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedKStream.scala new file mode 100644 index 0000000000..3d6e157ecd --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedKStream.scala @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.internals.KTableImpl +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.kstream.{KTable => KTableJ, SessionWindowedKStream => SessionWindowedKStreamJ, Windowed} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ + AggregatorFromFunction, + InitializerFromFunction, + MergerFromFunction, + ReducerFromFunction, + ValueMapperFromFunction +} + +/** + * Wraps the Java class SessionWindowedKStream and delegates method calls to the underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for SessionWindowedKStream + * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream` + */ +class SessionWindowedKStream[K, V](val inner: SessionWindowedKStreamJ[K, V]) { + + /** + * Aggregate the values of records in this stream by the grouped key and defined `SessionWindows`. + * + * @param initializer the initializer function + * @param aggregator the aggregator function + * @param merger the merger function + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent + * the latest (rolling) aggregate for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#aggregate` + */ + def aggregate[VR](initializer: => VR)(aggregator: (K, V, VR) => VR, merger: (K, VR, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArraySessionStore] + ): KTable[Windowed[K], VR] = + new KTable( + inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, merger.asMerger, materialized) + ) + + /** + * Aggregate the values of records in this stream by the grouped key and defined `SessionWindows`. + * + * @param initializer the initializer function + * @param aggregator the aggregator function + * @param merger the merger function + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent + * the latest (rolling) aggregate for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#aggregate` + */ + def aggregate[VR](initializer: => VR, named: Named)(aggregator: (K, V, VR) => VR, merger: (K, VR, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArraySessionStore] + ): KTable[Windowed[K], VR] = + new KTable( + inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, merger.asMerger, named, materialized) + ) + + /** + * Count the number of records in this stream by the grouped key into `SessionWindows`. + * + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys and `Long` values + * that represent the latest (rolling) count (i.e., number of records) for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#count` + */ + def count()(implicit materialized: Materialized[K, Long, ByteArraySessionStore]): KTable[Windowed[K], Long] = { + val javaCountTable: KTableJ[Windowed[K], java.lang.Long] = + inner.count(materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArraySessionStore]]) + val tableImpl = javaCountTable.asInstanceOf[KTableImpl[Windowed[K], ByteArraySessionStore, java.lang.Long]] + new KTable( + javaCountTable.mapValues[Long]( + ((l: java.lang.Long) => Long2long(l)).asValueMapper, + Materialized.`with`[Windowed[K], Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) + ) + ) + } + + /** + * Count the number of records in this stream by the grouped key into `SessionWindows`. + * + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys and `Long` values + * that represent the latest (rolling) count (i.e., number of records) for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#count` + */ + def count( + named: Named + )(implicit materialized: Materialized[K, Long, ByteArraySessionStore]): KTable[Windowed[K], Long] = { + val javaCountTable: KTableJ[Windowed[K], java.lang.Long] = + inner.count(named, materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArraySessionStore]]) + val tableImpl = javaCountTable.asInstanceOf[KTableImpl[Windowed[K], ByteArraySessionStore, java.lang.Long]] + new KTable( + javaCountTable.mapValues[Long]( + ((l: java.lang.Long) => Long2long(l)).asValueMapper, + Materialized.`with`[Windowed[K], Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) + ) + ) + } + + /** + * Combine values of this stream by the grouped key into `SessionWindows`. + * + * @param reducer a reducer function that computes a new aggregate result. + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent + * the latest (rolling) aggregate for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#reduce` + */ + def reduce(reducer: (V, V) => V)(implicit + materialized: Materialized[K, V, ByteArraySessionStore] + ): KTable[Windowed[K], V] = + new KTable(inner.reduce(reducer.asReducer, materialized)) + + /** + * Combine values of this stream by the grouped key into `SessionWindows`. + * + * @param reducer a reducer function that computes a new aggregate result. + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent + * the latest (rolling) aggregate for each key within a window + * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#reduce` + */ + def reduce(reducer: (V, V) => V, named: Named)(implicit + materialized: Materialized[K, V, ByteArraySessionStore] + ): KTable[Windowed[K], V] = + new KTable(inner.reduce(reducer.asReducer, named, materialized)) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/StreamJoined.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/StreamJoined.scala new file mode 100644 index 0000000000..c8c08ba0fd --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/StreamJoined.scala @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.kstream.{StreamJoined => StreamJoinedJ} +import org.apache.kafka.streams.state.WindowBytesStoreSupplier + +object StreamJoined { + + /** + * Create an instance of [[StreamJoined]] with key, value, and otherValue + * `org.apache.kafka.common.serialization.Serde` instances. + * `null` values are accepted and will be replaced by the default serdes as defined in config. + * + * @tparam K key type + * @tparam V value type + * @tparam VO other value type + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used + * @return new [[StreamJoined]] instance with the provided serdes + */ + def `with`[K, V, VO](implicit + keySerde: Serde[K], + valueSerde: Serde[V], + otherValueSerde: Serde[VO] + ): StreamJoinedJ[K, V, VO] = + StreamJoinedJ.`with`(keySerde, valueSerde, otherValueSerde) + + /** + * Create an instance of [[StreamJoined]] with store suppliers for the calling stream + * and the other stream. Also adds the key, value, and otherValue + * `org.apache.kafka.common.serialization.Serde` instances. + * `null` values are accepted and will be replaced by the default serdes as defined in config. + * + * @tparam K key type + * @tparam V value type + * @tparam VO other value type + * @param supplier store supplier to use + * @param otherSupplier other store supplier to use + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used + * @return new [[StreamJoined]] instance with the provided store suppliers and serdes + */ + def `with`[K, V, VO]( + supplier: WindowBytesStoreSupplier, + otherSupplier: WindowBytesStoreSupplier + )(implicit keySerde: Serde[K], valueSerde: Serde[V], otherValueSerde: Serde[VO]): StreamJoinedJ[K, V, VO] = + StreamJoinedJ + .`with`(supplier, otherSupplier) + .withKeySerde(keySerde) + .withValueSerde(valueSerde) + .withOtherValueSerde(otherValueSerde) + + /** + * Create an instance of [[StreamJoined]] with the name used for naming + * the state stores involved in the join. Also adds the key, value, and otherValue + * `org.apache.kafka.common.serialization.Serde` instances. + * `null` values are accepted and will be replaced by the default serdes as defined in config. + * + * @tparam K key type + * @tparam V value type + * @tparam VO other value type + * @param storeName the name to use as a base name for the state stores of the join + * @param keySerde the key serde to use. + * @param valueSerde the value serde to use. + * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used + * @return new [[StreamJoined]] instance with the provided store suppliers and serdes + */ + def as[K, V, VO]( + storeName: String + )(implicit keySerde: Serde[K], valueSerde: Serde[V], otherValueSerde: Serde[VO]): StreamJoinedJ[K, V, VO] = + StreamJoinedJ.as(storeName).withKeySerde(keySerde).withValueSerde(valueSerde).withOtherValueSerde(otherValueSerde) + +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedCogroupedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedCogroupedKStream.scala new file mode 100644 index 0000000000..ad24228ecc --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedCogroupedKStream.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.{TimeWindowedCogroupedKStream => TimeWindowedCogroupedKStreamJ, Windowed} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.InitializerFromFunction + +/** + * Wraps the Java class TimeWindowedCogroupedKStream and delegates method calls to the underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for TimeWindowedCogroupedKStream + * @see `org.apache.kafka.streams.kstream.TimeWindowedCogroupedKStream` + */ +class TimeWindowedCogroupedKStream[K, V](val inner: TimeWindowedCogroupedKStreamJ[K, V]) { + + /** + * Aggregate the values of records in these streams by the grouped key and defined window. + * + * @param initializer an initializer function that computes an initial intermediate aggregation result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the latest + * (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedCogroupedKStream#aggregate` + */ + def aggregate(initializer: => V)(implicit + materialized: Materialized[K, V, ByteArrayWindowStore] + ): KTable[Windowed[K], V] = + new KTable(inner.aggregate((() => initializer).asInitializer, materialized)) + + /** + * Aggregate the values of records in these streams by the grouped key and defined window. + * + * @param initializer an initializer function that computes an initial intermediate aggregation result + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the latest + * (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedCogroupedKStream#aggregate` + */ + def aggregate(initializer: => V, named: Named)(implicit + materialized: Materialized[K, V, ByteArrayWindowStore] + ): KTable[Windowed[K], V] = + new KTable(inner.aggregate((() => initializer).asInitializer, named, materialized)) + +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedKStream.scala new file mode 100644 index 0000000000..4fcf227e03 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedKStream.scala @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala +package kstream + +import org.apache.kafka.streams.kstream.internals.KTableImpl +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.kstream.{KTable => KTableJ, TimeWindowedKStream => TimeWindowedKStreamJ, Windowed} +import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ + AggregatorFromFunction, + InitializerFromFunction, + ReducerFromFunction, + ValueMapperFromFunction +} + +/** + * Wraps the Java class TimeWindowedKStream and delegates method calls to the underlying Java object. + * + * @tparam K Type of keys + * @tparam V Type of values + * @param inner The underlying Java abstraction for TimeWindowedKStream + * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream` + */ +class TimeWindowedKStream[K, V](val inner: TimeWindowedKStreamJ[K, V]) { + + /** + * Aggregate the values of records in this stream by the grouped key. + * + * @param initializer an initializer function that computes an initial intermediate aggregation result + * @param aggregator an aggregator function that computes a new aggregate result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#aggregate` + */ + def aggregate[VR](initializer: => VR)(aggregator: (K, V, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArrayWindowStore] + ): KTable[Windowed[K], VR] = + new KTable(inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, materialized)) + + /** + * Aggregate the values of records in this stream by the grouped key. + * + * @param initializer an initializer function that computes an initial intermediate aggregation result + * @param named a [[Named]] config used to name the processor in the topology + * @param aggregator an aggregator function that computes a new aggregate result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#aggregate` + */ + def aggregate[VR](initializer: => VR, named: Named)(aggregator: (K, V, VR) => VR)(implicit + materialized: Materialized[K, VR, ByteArrayWindowStore] + ): KTable[Windowed[K], VR] = + new KTable(inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, named, materialized)) + + /** + * Count the number of records in this stream by the grouped key and the defined windows. + * + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that + * represent the latest (rolling) count (i.e., number of records) for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#count` + */ + def count()(implicit materialized: Materialized[K, Long, ByteArrayWindowStore]): KTable[Windowed[K], Long] = { + val javaCountTable: KTableJ[Windowed[K], java.lang.Long] = + inner.count(materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayWindowStore]]) + val tableImpl = javaCountTable.asInstanceOf[KTableImpl[Windowed[K], ByteArrayWindowStore, java.lang.Long]] + new KTable( + javaCountTable.mapValues[Long]( + ((l: java.lang.Long) => Long2long(l)).asValueMapper, + Materialized.`with`[Windowed[K], Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) + ) + ) + } + + /** + * Count the number of records in this stream by the grouped key and the defined windows. + * + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that + * represent the latest (rolling) count (i.e., number of records) for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#count` + */ + def count( + named: Named + )(implicit materialized: Materialized[K, Long, ByteArrayWindowStore]): KTable[Windowed[K], Long] = { + val javaCountTable: KTableJ[Windowed[K], java.lang.Long] = + inner.count(named, materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayWindowStore]]) + val tableImpl = javaCountTable.asInstanceOf[KTableImpl[Windowed[K], ByteArrayWindowStore, java.lang.Long]] + new KTable( + javaCountTable.mapValues[Long]( + ((l: java.lang.Long) => Long2long(l)).asValueMapper, + Materialized.`with`[Windowed[K], Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) + ) + ) + } + + /** + * Combine the values of records in this stream by the grouped key. + * + * @param reducer a function that computes a new aggregate result + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#reduce` + */ + def reduce(reducer: (V, V) => V)(implicit + materialized: Materialized[K, V, ByteArrayWindowStore] + ): KTable[Windowed[K], V] = + new KTable(inner.reduce(reducer.asReducer, materialized)) + + /** + * Combine the values of records in this stream by the grouped key. + * + * @param reducer a function that computes a new aggregate result + * @param named a [[Named]] config used to name the processor in the topology + * @param materialized an instance of `Materialized` used to materialize a state store. + * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the + * latest (rolling) aggregate for each key + * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#reduce` + */ + def reduce(reducer: (V, V) => V, named: Named)(implicit + materialized: Materialized[K, V, ByteArrayWindowStore] + ): KTable[Windowed[K], V] = + new KTable(inner.reduce(reducer.asReducer, materialized)) +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/package.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/package.scala new file mode 100644 index 0000000000..7365c68ad1 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/package.scala @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala + +import org.apache.kafka.streams.processor.StateStore + +package object kstream { + type Materialized[K, V, S <: StateStore] = org.apache.kafka.streams.kstream.Materialized[K, V, S] + type Grouped[K, V] = org.apache.kafka.streams.kstream.Grouped[K, V] + type Consumed[K, V] = org.apache.kafka.streams.kstream.Consumed[K, V] + type Produced[K, V] = org.apache.kafka.streams.kstream.Produced[K, V] + type Repartitioned[K, V] = org.apache.kafka.streams.kstream.Repartitioned[K, V] + type Joined[K, V, VO] = org.apache.kafka.streams.kstream.Joined[K, V, VO] + type StreamJoined[K, V, VO] = org.apache.kafka.streams.kstream.StreamJoined[K, V, VO] + type Named = org.apache.kafka.streams.kstream.Named + type Branched[K, V] = org.apache.kafka.streams.kstream.Branched[K, V] +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/package.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/package.scala new file mode 100644 index 0000000000..6a3906dd58 --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/package.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams + +import org.apache.kafka.streams.state.{KeyValueStore, SessionStore, WindowStore} +import org.apache.kafka.common.utils.Bytes + +package object scala { + type ByteArrayKeyValueStore = KeyValueStore[Bytes, Array[Byte]] + type ByteArraySessionStore = SessionStore[Bytes, Array[Byte]] + type ByteArrayWindowStore = WindowStore[Bytes, Array[Byte]] +} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/serialization/Serdes.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/serialization/Serdes.scala new file mode 100644 index 0000000000..cf643d518a --- /dev/null +++ b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/serialization/Serdes.scala @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.serialization + +import java.nio.ByteBuffer +import java.util +import java.util.UUID + +import org.apache.kafka.common.serialization.{Deserializer, Serde, Serdes => JSerdes, Serializer} +import org.apache.kafka.streams.kstream.WindowedSerdes + +object Serdes extends LowPrioritySerdes { + implicit def stringSerde: Serde[String] = JSerdes.String() + implicit def longSerde: Serde[Long] = JSerdes.Long().asInstanceOf[Serde[Long]] + implicit def javaLongSerde: Serde[java.lang.Long] = JSerdes.Long() + implicit def byteArraySerde: Serde[Array[Byte]] = JSerdes.ByteArray() + implicit def bytesSerde: Serde[org.apache.kafka.common.utils.Bytes] = JSerdes.Bytes() + implicit def byteBufferSerde: Serde[ByteBuffer] = JSerdes.ByteBuffer() + implicit def shortSerde: Serde[Short] = JSerdes.Short().asInstanceOf[Serde[Short]] + implicit def javaShortSerde: Serde[java.lang.Short] = JSerdes.Short() + implicit def floatSerde: Serde[Float] = JSerdes.Float().asInstanceOf[Serde[Float]] + implicit def javaFloatSerde: Serde[java.lang.Float] = JSerdes.Float() + implicit def doubleSerde: Serde[Double] = JSerdes.Double().asInstanceOf[Serde[Double]] + implicit def javaDoubleSerde: Serde[java.lang.Double] = JSerdes.Double() + implicit def intSerde: Serde[Int] = JSerdes.Integer().asInstanceOf[Serde[Int]] + implicit def javaIntegerSerde: Serde[java.lang.Integer] = JSerdes.Integer() + implicit def uuidSerde: Serde[UUID] = JSerdes.UUID() + + implicit def sessionWindowedSerde[T](implicit tSerde: Serde[T]): WindowedSerdes.SessionWindowedSerde[T] = + new WindowedSerdes.SessionWindowedSerde[T](tSerde) + + def fromFn[T >: Null](serializer: T => Array[Byte], deserializer: Array[Byte] => Option[T]): Serde[T] = + JSerdes.serdeFrom( + new Serializer[T] { + override def serialize(topic: String, data: T): Array[Byte] = serializer(data) + override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () + override def close(): Unit = () + }, + new Deserializer[T] { + override def deserialize(topic: String, data: Array[Byte]): T = deserializer(data).orNull + override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () + override def close(): Unit = () + } + ) + + def fromFn[T >: Null]( + serializer: (String, T) => Array[Byte], + deserializer: (String, Array[Byte]) => Option[T] + ): Serde[T] = + JSerdes.serdeFrom( + new Serializer[T] { + override def serialize(topic: String, data: T): Array[Byte] = serializer(topic, data) + override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () + override def close(): Unit = () + }, + new Deserializer[T] { + override def deserialize(topic: String, data: Array[Byte]): T = deserializer(topic, data).orNull + override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () + override def close(): Unit = () + } + ) +} + +trait LowPrioritySerdes { + + implicit val nullSerde: Serde[Null] = + Serdes.fromFn[Null]( + { _: Null => + null + }, + { _: Array[Byte] => + None + } + ) +} diff --git a/streams/streams-scala/bin/test/log4j2.yaml b/streams/streams-scala/bin/test/log4j2.yaml new file mode 100644 index 0000000000..8c33fae786 --- /dev/null +++ b/streams/streams-scala/bin/test/log4j2.yaml @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Configuration: + Properties: + Property: + - name: "logPattern" + value: "%-4r [%t] %-5p %c %x - %m%n" + + Appenders: + Console: + name: STDOUT + PatternLayout: + pattern: "${logPattern}" + + Loggers: + Root: + level: INFO + AppenderRef: + - ref: STDOUT diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/TopologyTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/TopologyTest.scala new file mode 100644 index 0000000000..2b1f25afdf --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/TopologyTest.scala @@ -0,0 +1,470 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala + +import java.time.Duration +import java.util +import java.util.{Locale, Properties} +import java.util.regex.Pattern +import org.apache.kafka.common.serialization.{Serdes => SerdesJ} +import org.apache.kafka.streams.kstream.{ + Aggregator, + Initializer, + JoinWindows, + KGroupedStream => KGroupedStreamJ, + KStream => KStreamJ, + KTable => KTableJ, + KeyValueMapper, + Materialized => MaterializedJ, + Reducer, + StreamJoined => StreamJoinedJ, + ValueJoiner, + ValueMapper +} +import org.apache.kafka.streams.processor.api +import org.apache.kafka.streams.processor.api.{Processor, ProcessorSupplier} +import org.apache.kafka.streams.scala.ImplicitConversions._ +import org.apache.kafka.streams.scala.serialization.{Serdes => NewSerdes} +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.apache.kafka.streams.scala.kstream._ +import org.apache.kafka.streams.{KeyValue, StreamsBuilder => StreamsBuilderJ, StreamsConfig, TopologyDescription} +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api._ + +import scala.jdk.CollectionConverters._ + +/** + * Test suite that verifies that the topology built by the Java and Scala APIs match. + */ +//noinspection ScalaDeprecation +@Timeout(600) +class TopologyTest { + private val inputTopic = "input-topic" + private val userClicksTopic = "user-clicks-topic" + private val userRegionsTopic = "user-regions-topic" + + private val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) + + @Test + def shouldBuildIdenticalTopologyInJavaNScalaSimple(): Unit = { + + // build the Scala topology + def getTopologyScala: TopologyDescription = { + + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamBuilder = new StreamsBuilder + val textLines = streamBuilder.stream[String, String](inputTopic) + + val _: KStream[String, String] = textLines.flatMapValues(v => pattern.split(v.toLowerCase)) + + streamBuilder.build().describe() + } + + // build the Java topology + def getTopologyJava: TopologyDescription = { + val streamBuilder = new StreamsBuilderJ + val textLines = streamBuilder.stream[String, String](inputTopic) + val _: KStreamJ[String, String] = textLines.flatMapValues(s => pattern.split(s.toLowerCase).toBuffer.asJava) + streamBuilder.build().describe() + } + + // should match + assertEquals(getTopologyScala, getTopologyJava) + } + + @Test + def shouldBuildIdenticalTopologyInJavaNScalaAggregate(): Unit = { + + // build the Scala topology + def getTopologyScala: TopologyDescription = { + + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamBuilder = new StreamsBuilder + val textLines = streamBuilder.stream[String, String](inputTopic) + + textLines + .flatMapValues(v => pattern.split(v.toLowerCase)) + .groupBy((_, v) => v) + .count() + + streamBuilder.build().describe() + } + + // build the Java topology + def getTopologyJava: TopologyDescription = { + + val streamBuilder = new StreamsBuilderJ + val textLines: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopic) + + val splits: KStreamJ[String, String] = + textLines.flatMapValues(s => pattern.split(s.toLowerCase).toBuffer.asJava) + + val grouped: KGroupedStreamJ[String, String] = splits.groupBy((_, v) => v) + + grouped.count() + + streamBuilder.build().describe() + } + + // should match + assertEquals(getTopologyScala, getTopologyJava) + } + + @Test def shouldBuildIdenticalTopologyInJavaNScalaCogroupSimple(): Unit = { + + // build the Scala topology + def getTopologyScala: TopologyDescription = { + + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamBuilder = new StreamsBuilder + val textLines = streamBuilder.stream[String, String](inputTopic) + textLines + .mapValues(v => v.length) + .groupByKey + .cogroup((_, v1, v2: Long) => v1 + v2) + .aggregate(0L) + + streamBuilder.build().describe() + } + + // build the Java topology + def getTopologyJava: TopologyDescription = { + + val streamBuilder = new StreamsBuilderJ + val textLines: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopic) + + val splits: KStreamJ[String, Int] = textLines.mapValues( + new ValueMapper[String, Int] { + def apply(s: String): Int = s.length + } + ) + + splits.groupByKey + .cogroup((k: String, v: Int, a: Long) => a + v) + .aggregate(() => 0L) + + streamBuilder.build().describe() + } + + // should match + assertEquals(getTopologyScala, getTopologyJava) + } + + @Test def shouldBuildIdenticalTopologyInJavaNScalaCogroup(): Unit = { + + // build the Scala topology + def getTopologyScala: TopologyDescription = { + + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamBuilder = new StreamsBuilder + val textLines1 = streamBuilder.stream[String, String](inputTopic) + val textLines2 = streamBuilder.stream[String, String]("inputTopic2") + + textLines1 + .mapValues(v => v.length) + .groupByKey + .cogroup((_, v1, v2: Long) => v1 + v2) + .cogroup(textLines2.groupByKey, (_, v: String, a) => v.length + a) + .aggregate(0L) + + streamBuilder.build().describe() + } + + // build the Java topology + def getTopologyJava: TopologyDescription = { + + val streamBuilder = new StreamsBuilderJ + val textLines1: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopic) + val textLines2: KStreamJ[String, String] = streamBuilder.stream[String, String]("inputTopic2") + + val splits: KStreamJ[String, Int] = textLines1.mapValues( + new ValueMapper[String, Int] { + def apply(s: String): Int = s.length + } + ) + + splits.groupByKey + .cogroup((k: String, v: Int, a: Long) => a + v) + .cogroup(textLines2.groupByKey(), (k: String, v: String, a: Long) => v.length + a) + .aggregate(() => 0L) + + streamBuilder.build().describe() + } + + // should match + assertEquals(getTopologyScala, getTopologyJava) + } + + @Test def shouldBuildIdenticalTopologyInJavaNScalaJoin(): Unit = { + + // build the Scala topology + def getTopologyScala: TopologyDescription = { + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val builder = new StreamsBuilder() + + val userClicksStream: KStream[String, Long] = builder.stream(userClicksTopic) + + val userRegionsTable: KTable[String, String] = builder.table(userRegionsTopic) + + // clicks per region + userClicksStream + .leftJoin(userRegionsTable)((clicks, region) => (if (region == null) "UNKNOWN" else region, clicks)) + .map((_, regionWithClicks) => regionWithClicks) + .groupByKey + .reduce(_ + _) + + builder.build().describe() + } + + // build the Java topology + def getTopologyJava: TopologyDescription = { + + import java.lang.{Long => JLong} + + val builder: StreamsBuilderJ = new StreamsBuilderJ() + + val userClicksStream: KStreamJ[String, JLong] = + builder.stream[String, JLong](userClicksTopic, Consumed.`with`[String, JLong]) + + val userRegionsTable: KTableJ[String, String] = + builder.table[String, String](userRegionsTopic, Consumed.`with`[String, String]) + + // Join the stream against the table. + val valueJoinerJ: ValueJoiner[JLong, String, (String, JLong)] = + (clicks: JLong, region: String) => (if (region == null) "UNKNOWN" else region, clicks) + val userClicksJoinRegion: KStreamJ[String, (String, JLong)] = userClicksStream.leftJoin( + userRegionsTable, + valueJoinerJ, + Joined.`with`[String, JLong, String] + ) + + // Change the stream from -> to -> + val clicksByRegion: KStreamJ[String, JLong] = userClicksJoinRegion.map { (_, regionWithClicks) => + new KeyValue(regionWithClicks._1, regionWithClicks._2) + } + + // Compute the total per region by summing the individual click counts per region. + clicksByRegion + .groupByKey(Grouped.`with`[String, JLong]) + .reduce((v1, v2) => v1 + v2) + + builder.build().describe() + } + + // should match + assertEquals(getTopologyScala, getTopologyJava) + } + + @Test + def shouldBuildIdenticalTopologyInJavaNScalaProcess(): Unit = { + val processorSupplier = new ProcessorSupplier[String, String, String, String] { + override def get(): Processor[String, String, String, String] = + new api.Processor[String, String, String, String] { + override def process(record: api.Record[String, String]): Unit = {} + } + } + + // build the Scala topology + def getTopologyScala: TopologyDescription = { + + import org.apache.kafka.streams.scala.serialization.Serdes._ + + val streamBuilder = new StreamsBuilder + val textLines = streamBuilder.stream[String, String](inputTopic) + + val _: KTable[String, Long] = textLines + .process(processorSupplier) + .groupBy((_, v) => v) + .count() + + streamBuilder.build().describe() + } + + // build the Java topology + def getTopologyJava: TopologyDescription = { + + val streamBuilder = new StreamsBuilderJ + val textLines: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopic) + + val lowered: KStreamJ[String, String] = textLines.process(processorSupplier) + + val grouped: KGroupedStreamJ[String, String] = lowered.groupBy((_, v) => v) + + // word counts + grouped.count() + + streamBuilder.build().describe() + } + + // should match + assertEquals(getTopologyScala, getTopologyJava) + } + + @Test + def shouldBuildIdenticalTopologyInJavaNScalaProperties(): Unit = { + + val props = new Properties() + props.put(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.OPTIMIZE) + + val propsNoOptimization = new Properties() + propsNoOptimization.put(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.NO_OPTIMIZATION) + + val AGGREGATION_TOPIC = "aggregationTopic" + val REDUCE_TOPIC = "reduceTopic" + val JOINED_TOPIC = "joinedTopic" + + // build the Scala topology + def getTopologyScala: StreamsBuilder = { + + val aggregator = (_: String, v: String, agg: Int) => agg + v.length + val reducer = (v1: String, v2: String) => v1 + ":" + v2 + val processorValueCollector: util.List[String] = new util.ArrayList[String] + + val builder: StreamsBuilder = new StreamsBuilder + + val sourceStream: KStream[String, String] = + builder.stream(inputTopic)(Consumed.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) + + val mappedStream: KStream[String, String] = + sourceStream.map((k: String, v: String) => (k.toUpperCase(Locale.getDefault), v)) + mappedStream + .filter((k: String, _: String) => k == "B") + .mapValues((v: String) => v.toUpperCase(Locale.getDefault)) + .process(new SimpleProcessorSupplier(processorValueCollector)) + + val stream2 = mappedStream.groupByKey + .aggregate(0)(aggregator)(Materialized.`with`(NewSerdes.stringSerde, NewSerdes.intSerde)) + .toStream + stream2.to(AGGREGATION_TOPIC)(Produced.`with`(NewSerdes.stringSerde, NewSerdes.intSerde)) + + // adding operators for case where the repartition node is further downstream + val stream3 = mappedStream + .filter((_: String, _: String) => true) + .peek((k: String, v: String) => System.out.println(k + ":" + v)) + .groupByKey + .reduce(reducer)(Materialized.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) + .toStream + stream3.to(REDUCE_TOPIC)(Produced.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) + + mappedStream + .filter((k: String, _: String) => k == "A") + .join(stream2)( + (v1: String, v2: Int) => v1 + ":" + v2.toString, + JoinWindows.ofTimeDifferenceAndGrace(Duration.ofMillis(5000), Duration.ofHours(24)) + )( + StreamJoined.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde, NewSerdes.intSerde) + ) + .to(JOINED_TOPIC) + + mappedStream + .filter((k: String, _: String) => k == "A") + .join(stream3)( + (v1: String, v2: String) => v1 + ":" + v2.toString, + JoinWindows.ofTimeDifferenceAndGrace(Duration.ofMillis(5000), Duration.ofHours(24)) + )( + StreamJoined.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde, NewSerdes.stringSerde) + ) + .to(JOINED_TOPIC) + + builder + } + + // build the Java topology + def getTopologyJava: StreamsBuilderJ = { + + val keyValueMapper: KeyValueMapper[String, String, KeyValue[String, String]] = + (key, value) => KeyValue.pair(key.toUpperCase(Locale.getDefault), value) + val initializer: Initializer[Integer] = () => 0 + val aggregator: Aggregator[String, String, Integer] = (_, value, aggregate) => aggregate + value.length + val reducer: Reducer[String] = (v1, v2) => v1 + ":" + v2 + val valueMapper: ValueMapper[String, String] = v => v.toUpperCase(Locale.getDefault) + val processorValueCollector = new util.ArrayList[String] + val processorSupplier = new SimpleProcessorSupplier(processorValueCollector) + val valueJoiner2: ValueJoiner[String, Integer, String] = (value1, value2) => value1 + ":" + value2.toString + val valueJoiner3: ValueJoiner[String, String, String] = (value1, value2) => value1 + ":" + value2 + + val builder = new StreamsBuilderJ + + val sourceStream = builder.stream(inputTopic, Consumed.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) + + val mappedStream: KStreamJ[String, String] = + sourceStream.map(keyValueMapper) + mappedStream + .filter((key, _) => key == "B") + .mapValues[String](valueMapper) + .process(processorSupplier) + + val stream2: KStreamJ[String, Integer] = mappedStream.groupByKey + .aggregate(initializer, aggregator, MaterializedJ.`with`(NewSerdes.stringSerde, SerdesJ.Integer)) + .toStream + stream2.to(AGGREGATION_TOPIC, Produced.`with`(NewSerdes.stringSerde, SerdesJ.Integer)) + + // adding operators for case where the repartition node is further downstream + val stream3 = mappedStream + .filter((_, _) => true) + .peek((k, v) => System.out.println(k + ":" + v)) + .groupByKey + .reduce(reducer, MaterializedJ.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) + .toStream + stream3.to(REDUCE_TOPIC, Produced.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) + + mappedStream + .filter((key, _) => key == "A") + .join[Integer, String]( + stream2, + valueJoiner2, + JoinWindows.ofTimeDifferenceAndGrace(Duration.ofMillis(5000), Duration.ofHours(24)), + StreamJoinedJ.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde, SerdesJ.Integer) + ) + .to(JOINED_TOPIC) + + mappedStream + .filter((key, _) => key == "A") + .join( + stream3, + valueJoiner3, + JoinWindows.ofTimeDifferenceAndGrace(Duration.ofMillis(5000), Duration.ofHours(24)), + StreamJoinedJ.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde, SerdesJ.String) + ) + .to(JOINED_TOPIC) + + builder + } + + assertNotEquals( + getTopologyScala.build(props).describe.toString, + getTopologyScala.build(propsNoOptimization).describe.toString + ) + assertEquals( + getTopologyScala.build(propsNoOptimization).describe.toString, + getTopologyJava.build(propsNoOptimization).describe.toString + ) + assertEquals(getTopologyScala.build(props).describe.toString, getTopologyJava.build(props).describe.toString) + } + + private class SimpleProcessorSupplier private[TopologyTest] (val valueList: util.List[String]) + extends ProcessorSupplier[String, String, Void, Void] { + + override def get(): Processor[String, String, Void, Void] = + (record: api.Record[String, String]) => valueList.add(record.value()) + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ConsumedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ConsumedTest.scala new file mode 100644 index 0000000000..4656a4d12f --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ConsumedTest.scala @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.clients.consumer.internals.AutoOffsetResetStrategy +import org.apache.kafka.streams.AutoOffsetReset +import org.apache.kafka.streams.kstream.internals.ConsumedInternal +import org.apache.kafka.streams.processor.FailOnInvalidTimestamp +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +class ConsumedTest { + + @Test + def testCreateConsumed(): Unit = { + val consumed: Consumed[String, Long] = Consumed.`with`[String, Long] + + val internalConsumed = new ConsumedInternal(consumed) + assertEquals(Serdes.stringSerde.getClass, internalConsumed.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalConsumed.valueSerde.getClass) + } + + @Test + def testCreateConsumedWithTimestampExtractorAndResetPolicy(): Unit = { + val timestampExtractor = new FailOnInvalidTimestamp() + val resetPolicy = AutoOffsetReset.latest() + val consumed: Consumed[String, Long] = + Consumed.`with`(timestampExtractor, resetPolicy) + + val internalConsumed = new ConsumedInternal(consumed) + assertEquals(Serdes.stringSerde.getClass, internalConsumed.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalConsumed.valueSerde.getClass) + assertEquals(timestampExtractor, internalConsumed.timestampExtractor) + assertEquals(AutoOffsetResetStrategy.StrategyType.LATEST, internalConsumed.offsetResetPolicy.offsetResetStrategy()) + } + + @Test + def testCreateConsumedWithTimestampExtractor(): Unit = { + val timestampExtractor = new FailOnInvalidTimestamp() + val consumed: Consumed[String, Long] = Consumed.`with`[String, Long](timestampExtractor) + + val internalConsumed = new ConsumedInternal(consumed) + assertEquals(Serdes.stringSerde.getClass, internalConsumed.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalConsumed.valueSerde.getClass) + assertEquals(timestampExtractor, internalConsumed.timestampExtractor) + } + + @Test + def testCreateConsumedWithResetPolicy(): Unit = { + val resetPolicy = AutoOffsetReset.latest() + val consumed: Consumed[String, Long] = Consumed.`with`[String, Long](resetPolicy) + + val internalConsumed = new ConsumedInternal(consumed) + assertEquals(Serdes.stringSerde.getClass, internalConsumed.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalConsumed.valueSerde.getClass) + assertEquals(AutoOffsetResetStrategy.StrategyType.LATEST, internalConsumed.offsetResetPolicy.offsetResetStrategy()) + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/GroupedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/GroupedTest.scala new file mode 100644 index 0000000000..02f333ec6b --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/GroupedTest.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.internals.GroupedInternal +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +class GroupedTest { + + @Test + def testCreateGrouped(): Unit = { + val grouped: Grouped[String, Long] = Grouped.`with`[String, Long] + + val internalGrouped = new GroupedInternal[String, Long](grouped) + assertEquals(Serdes.stringSerde.getClass, internalGrouped.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalGrouped.valueSerde.getClass) + } + + @Test + def testCreateGroupedWithRepartitionTopicName(): Unit = { + val repartitionTopicName = "repartition-topic" + val grouped: Grouped[String, Long] = Grouped.`with`(repartitionTopicName) + + val internalGrouped = new GroupedInternal[String, Long](grouped) + assertEquals(Serdes.stringSerde.getClass, internalGrouped.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalGrouped.valueSerde.getClass) + assertEquals(repartitionTopicName, internalGrouped.name()) + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/JoinedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/JoinedTest.scala new file mode 100644 index 0000000000..4e6fa563f5 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/JoinedTest.scala @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +class JoinedTest { + + @Test + def testCreateJoined(): Unit = { + val joined: Joined[String, Long, Int] = Joined.`with`[String, Long, Int] + + assertEquals(joined.keySerde.getClass, Serdes.stringSerde.getClass) + assertEquals(joined.valueSerde.getClass, Serdes.longSerde.getClass) + assertEquals(joined.otherValueSerde.getClass, Serdes.intSerde.getClass) + } + + @Test + def testCreateJoinedWithSerdesAndRepartitionTopicName(): Unit = { + val repartitionTopicName = "repartition-topic" + val joined: Joined[String, Long, Int] = Joined.`with`(repartitionTopicName) + + assertEquals(joined.keySerde.getClass, Serdes.stringSerde.getClass) + assertEquals(joined.valueSerde.getClass, Serdes.longSerde.getClass) + assertEquals(joined.otherValueSerde.getClass, Serdes.intSerde.getClass) + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamSplitTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamSplitTest.scala new file mode 100644 index 0000000000..bbcc1b503f --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamSplitTest.scala @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.Named +import org.apache.kafka.streams.scala.ImplicitConversions._ +import org.apache.kafka.streams.scala.StreamsBuilder +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.apache.kafka.streams.scala.utils.TestDriver +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.Test + +import scala.jdk.CollectionConverters._ + +class KStreamSplitTest extends TestDriver { + + @Test + def testRouteMessagesAccordingToPredicates(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = Array("default", "even", "three"); + + val m = builder + .stream[Integer, Integer](sourceTopic) + .split(Named.as("_")) + .branch((_, v) => v % 2 == 0) + .branch((_, v) => v % 3 == 0) + .defaultBranch() + + m("_0").to(sinkTopic(0)) + m("_1").to(sinkTopic(1)) + m("_2").to(sinkTopic(2)) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[Integer, Integer](sourceTopic) + val testOutput = sinkTopic.map(name => testDriver.createOutput[Integer, Integer](name)) + + testInput.pipeValueList( + List(1, 2, 3, 4, 5) + .map(Integer.valueOf) + .asJava + ) + assertEquals(List(1, 5), testOutput(0).readValuesToList().asScala) + assertEquals(List(2, 4), testOutput(1).readValuesToList().asScala) + assertEquals(List(3), testOutput(2).readValuesToList().asScala) + + testDriver.close() + } + + @Test + def testRouteMessagesToConsumers(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + + val m = builder + .stream[Integer, Integer](sourceTopic) + .split(Named.as("_")) + .branch((_, v) => v % 2 == 0, Branched.withConsumer(ks => ks.to("even"), "consumedEvens")) + .branch((_, v) => v % 3 == 0, Branched.withFunction(ks => ks.mapValues(x => x * x), "mapped")) + .noDefaultBranch() + + m("_mapped").to("mapped") + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[Integer, Integer](sourceTopic) + testInput.pipeValueList( + List(1, 2, 3, 4, 5, 9) + .map(Integer.valueOf) + .asJava + ) + + val even = testDriver.createOutput[Integer, Integer]("even") + val mapped = testDriver.createOutput[Integer, Integer]("mapped") + + assertEquals(List(2, 4), even.readValuesToList().asScala) + assertEquals(List(9, 81), mapped.readValuesToList().asScala) + + testDriver.close() + } + + @Test + def testRouteMessagesToAnonymousConsumers(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + + val m = builder + .stream[Integer, Integer](sourceTopic) + .split(Named.as("_")) + .branch((_, v) => v % 2 == 0, Branched.withConsumer(ks => ks.to("even"))) + .branch((_, v) => v % 3 == 0, Branched.withFunction(ks => ks.mapValues(x => x * x))) + .noDefaultBranch() + + m("_2").to("mapped") + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[Integer, Integer](sourceTopic) + testInput.pipeValueList( + List(1, 2, 3, 4, 5, 9) + .map(Integer.valueOf) + .asJava + ) + + val even = testDriver.createOutput[Integer, Integer]("even") + val mapped = testDriver.createOutput[Integer, Integer]("mapped") + + assertEquals(List(2, 4), even.readValuesToList().asScala) + assertEquals(List(9, 81), mapped.readValuesToList().asScala) + + testDriver.close() + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamTest.scala new file mode 100644 index 0000000000..6a0b6c1b0e --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamTest.scala @@ -0,0 +1,419 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import java.time.Duration.ofSeconds +import java.time.{Duration, Instant} +import org.apache.kafka.streams.kstream.{JoinWindows, Named} +import org.apache.kafka.streams.processor.api +import org.apache.kafka.streams.processor.api.{FixedKeyRecord, Processor, ProcessorSupplier} +import org.apache.kafka.streams.scala.ImplicitConversions._ +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.apache.kafka.streams.scala.StreamsBuilder +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.utils.TestDriver +import org.apache.kafka.streams.state.{KeyValueStore, StoreBuilder, Stores} +import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} +import org.junit.jupiter.api.Test + +import java.util +import java.util.Collections +import scala.jdk.CollectionConverters._ + +class KStreamTest extends TestDriver { + + @Test + def testFilterRecordsSatisfyingPredicate(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + builder.stream[String, String](sourceTopic).filter((_, value) => value != "value2").to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value1") + assertEquals("value1", testOutput.readValue) + + testInput.pipeInput("2", "value2") + assertTrue(testOutput.isEmpty) + + testInput.pipeInput("3", "value3") + assertEquals("value3", testOutput.readValue) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testFilterRecordsNotSatisfyingPredicate(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + builder.stream[String, String](sourceTopic).filterNot((_, value) => value == "value2").to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value1") + assertEquals("value1", testOutput.readValue) + + testInput.pipeInput("2", "value2") + assertTrue(testOutput.isEmpty) + + testInput.pipeInput("3", "value3") + assertEquals("value3", testOutput.readValue) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testForeachActionsOnRecords(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + + var acc = "" + builder.stream[String, String](sourceTopic).foreach((_, value) => acc += value) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + + testInput.pipeInput("1", "value1") + assertEquals("value1", acc) + + testInput.pipeInput("2", "value2") + assertEquals("value1value2", acc) + + testDriver.close() + } + + @Test + def testPeekActionsOnRecords(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + var acc = "" + builder.stream[String, String](sourceTopic).peek((_, v) => acc += v).to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value1") + assertEquals("value1", acc) + assertEquals("value1", testOutput.readValue) + + testInput.pipeInput("2", "value2") + assertEquals("value1value2", acc) + assertEquals("value2", testOutput.readValue) + + testDriver.close() + } + + @Test + def testSelectNewKey(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + builder.stream[String, String](sourceTopic).selectKey((_, value) => value).to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value1") + assertEquals("value1", testOutput.readKeyValue.key) + + testInput.pipeInput("1", "value2") + assertEquals("value2", testOutput.readKeyValue.key) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testRepartitionKStream(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val repartitionName = "repartition" + val sinkTopic = "sink" + + builder.stream[String, String](sourceTopic).repartition(Repartitioned.`with`(repartitionName)).to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value1") + val kv1 = testOutput.readKeyValue + assertEquals("1", kv1.key) + assertEquals("value1", kv1.value) + + testInput.pipeInput("2", "value2") + val kv2 = testOutput.readKeyValue + assertEquals("2", kv2.key) + assertEquals("value2", kv2.value) + + assertTrue(testOutput.isEmpty) + + // appId == "test" + testDriver.producedTopicNames() contains "test-" + repartitionName + "-repartition" + + testDriver.close() + } + + // noinspection ScalaDeprecation + @Test + def testJoinCorrectlyRecords(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source1" + val sourceTopic2 = "source2" + val sinkTopic = "sink" + + val stream1 = builder.stream[String, String](sourceTopic1) + val stream2 = builder.stream[String, String](sourceTopic2) + stream1 + .join(stream2)((a, b) => s"$a-$b", JoinWindows.ofTimeDifferenceAndGrace(ofSeconds(1), Duration.ofHours(24))) + .to(sinkTopic) + + val now = Instant.now() + + val testDriver = createTestDriver(builder, now) + val testInput1 = testDriver.createInput[String, String](sourceTopic1) + val testInput2 = testDriver.createInput[String, String](sourceTopic2) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput1.pipeInput("1", "topic1value1", now) + testInput2.pipeInput("1", "topic2value1", now) + + assertEquals("topic1value1-topic2value1", testOutput.readValue) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testProcessCorrectlyRecords(): Unit = { + val processorSupplier: ProcessorSupplier[String, String, String, String] = + new api.ProcessorSupplier[String, String, String, String] { + private val storeName = "store-name" + + override def stores: util.Set[StoreBuilder[_]] = { + val keyValueStoreBuilder = Stores.keyValueStoreBuilder( + Stores.persistentKeyValueStore(storeName), + Serdes.stringSerde, + Serdes.stringSerde + ) + Collections.singleton(keyValueStoreBuilder) + } + + override def get(): Processor[String, String, String, String] = + new api.Processor[String, String, String, String] { + private var context: api.ProcessorContext[String, String] = _ + private var store: KeyValueStore[String, String] = _ + + override def init(context: api.ProcessorContext[String, String]): Unit = { + this.context = context + store = context.getStateStore(storeName) + } + + override def process(record: api.Record[String, String]): Unit = { + val key = record.key() + val value = record.value() + val processedKey = s"$key-processed" + val processedValue = s"$value-processed" + store.put(processedKey, processedValue) + context.forward(new api.Record(processedKey, processedValue, record.timestamp())) + } + } + } + + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val stream = builder.stream[String, String](sourceTopic) + stream + .process(processorSupplier) + .to(sinkTopic) + + val now = Instant.now() + val testDriver = createTestDriver(builder, now) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value", now) + + val result = testOutput.readKeyValue() + assertEquals("value-processed", result.value) + assertEquals("1-processed", result.key) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testProcessValuesCorrectlyRecords(): Unit = { + val processorSupplier: api.FixedKeyProcessorSupplier[String, String, String] = + () => + new api.FixedKeyProcessor[String, String, String] { + private var context: api.FixedKeyProcessorContext[String, String] = _ + + override def init(context: api.FixedKeyProcessorContext[String, String]): Unit = + this.context = context + + override def process(record: FixedKeyRecord[String, String]): Unit = { + val processedValue = s"${record.value()}-processed" + context.forward(record.withValue(processedValue)) + } + } + + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val stream = builder.stream[String, String](sourceTopic) + stream + .processValues(processorSupplier) + .to(sinkTopic) + + val now = Instant.now() + val testDriver = createTestDriver(builder, now) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput.pipeInput("1", "value", now) + + val result = testOutput.readKeyValue() + assertEquals("value-processed", result.value) + assertEquals("1", result.key) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testJoinTwoKStreamToTables(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source1" + val sourceTopic2 = "source2" + val sinkTopic = "sink" + + val table1 = builder.stream[String, String](sourceTopic1).toTable + val table2 = builder.stream[String, String](sourceTopic2).toTable + table1.join(table2)((a, b) => a + b).toStream.to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput1 = testDriver.createInput[String, String](sourceTopic1) + val testInput2 = testDriver.createInput[String, String](sourceTopic2) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput1.pipeInput("1", "topic1value1") + testInput2.pipeInput("1", "topic2value1") + + assertEquals("topic1value1topic2value1", testOutput.readValue) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testSettingNameOnFilter(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + builder + .stream[String, String](sourceTopic) + .filter((_, value) => value != "value2", Named.as("my-name")) + .to(sinkTopic) + + import scala.jdk.CollectionConverters._ + + val filterNode = builder.build().describe().subtopologies().asScala.head.nodes().asScala.toList(1) + assertEquals("my-name", filterNode.name()) + } + + @Test + def testSettingNameOnOutputTable(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source1" + val sinkTopic = "sink" + + builder + .stream[String, String](sourceTopic1) + .toTable(Named.as("my-name")) + .toStream + .to(sinkTopic) + + import scala.jdk.CollectionConverters._ + + val tableNode = builder.build().describe().subtopologies().asScala.head.nodes().asScala.toList(1) + assertEquals("my-name", tableNode.name()) + } + + @Test + def testSettingNameOnJoin(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source" + val sourceGTable = "table" + val sinkTopic = "sink" + + val stream = builder.stream[String, String](sourceTopic1) + val table = builder.globalTable[String, String](sourceGTable) + stream + .join(table, Named.as("my-name"))((a, b) => s"$a-$b", (a, b) => a + b) + .to(sinkTopic) + + import scala.jdk.CollectionConverters._ + + val joinNode = builder.build().describe().subtopologies().asScala.head.nodes().asScala.toList(1) + assertEquals("my-name", joinNode.name()) + } + + @Test + def testSettingNameOnProcess(): Unit = { + class TestProcessor extends api.Processor[String, String, String, String] { + override def process(record: api.Record[String, String]): Unit = {} + } + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val stream = builder.stream[String, String](sourceTopic) + stream + .process(() => new TestProcessor, Named.as("my-name")) + .to(sinkTopic) + + val transformNode = builder.build().describe().subtopologies().asScala.head.nodes().asScala.toList(1) + assertEquals("my-name", transformNode.name()) + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KTableTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KTableTest.scala new file mode 100644 index 0000000000..e473c6579a --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KTableTest.scala @@ -0,0 +1,617 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.Suppressed.BufferConfig +import org.apache.kafka.streams.kstream.{ + Named, + SessionWindows, + SlidingWindows, + Suppressed => JSuppressed, + TimeWindows, + Windowed +} +import org.apache.kafka.streams.scala.ImplicitConversions._ +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.apache.kafka.streams.scala.utils.TestDriver +import org.apache.kafka.streams.scala.{ByteArrayKeyValueStore, StreamsBuilder} +import org.junit.jupiter.api.Assertions.{assertEquals, assertNull, assertTrue} +import org.junit.jupiter.api.Test +import java.time.Duration +import java.time.Duration.ofMillis + +import scala.jdk.CollectionConverters._ + +//noinspection ScalaDeprecation +class KTableTest extends TestDriver { + + @Test + def testFilterRecordsSatisfyingPredicate(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val table = builder.stream[String, String](sourceTopic).groupBy((key, _) => key).count() + table.filter((key, value) => key.equals("a") && value == 1).toStream.to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + { + testInput.pipeInput("a", "passes filter : add new row to table") + val record = testOutput.readKeyValue + assertEquals("a", record.key) + assertEquals(1, record.value) + } + { + testInput.pipeInput("a", "fails filter : remove existing row from table") + val record = testOutput.readKeyValue + assertEquals("a", record.key) + assertNull(record.value) + } + { + testInput.pipeInput("b", "fails filter : no output") + assertTrue(testOutput.isEmpty) + } + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testFilterRecordsNotSatisfyingPredicate(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val table = builder.stream[String, String](sourceTopic).groupBy((key, _) => key).count() + table.filterNot((_, value) => value > 1).toStream.to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + { + testInput.pipeInput("1", "value1") + val record = testOutput.readKeyValue + assertEquals("1", record.key) + assertEquals(1, record.value) + } + { + testInput.pipeInput("1", "value2") + val record = testOutput.readKeyValue + assertEquals("1", record.key) + assertNull(record.value) + } + { + testInput.pipeInput("2", "value1") + val record = testOutput.readKeyValue + assertEquals("2", record.key) + assertEquals(1, record.value) + } + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testJoinCorrectlyRecords(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source1" + val sourceTopic2 = "source2" + val sinkTopic = "sink" + + val table1 = builder.stream[String, String](sourceTopic1).groupBy((key, _) => key).count() + val table2 = builder.stream[String, String](sourceTopic2).groupBy((key, _) => key).count() + table1.join(table2)((a, b) => a + b).toStream.to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput1 = testDriver.createInput[String, String](sourceTopic1) + val testInput2 = testDriver.createInput[String, String](sourceTopic2) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + testInput1.pipeInput("1", "topic1value1") + testInput2.pipeInput("1", "topic2value1") + assertEquals(2, testOutput.readValue) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testJoinCorrectlyRecordsAndStateStore(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source1" + val sourceTopic2 = "source2" + val sinkTopic = "sink" + val stateStore = "store" + val materialized = Materialized.as[String, Long, ByteArrayKeyValueStore](stateStore) + + val table1 = builder.stream[String, String](sourceTopic1).groupBy((key, _) => key).count() + val table2 = builder.stream[String, String](sourceTopic2).groupBy((key, _) => key).count() + table1.join(table2, materialized)((a, b) => a + b).toStream.to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput1 = testDriver.createInput[String, String](sourceTopic1) + val testInput2 = testDriver.createInput[String, String](sourceTopic2) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + testInput1.pipeInput("1", "topic1value1") + testInput2.pipeInput("1", "topic2value1") + assertEquals(2, testOutput.readValue) + assertEquals(2, testDriver.getKeyValueStore[String, Long](stateStore).get("1")) + + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testCorrectlySuppressResultsUsingSuppressedUntilTimeLimit(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + val window = TimeWindows.ofSizeAndGrace(Duration.ofSeconds(1L), Duration.ofHours(24)) + val suppression = JSuppressed.untilTimeLimit[Windowed[String]](Duration.ofSeconds(2L), BufferConfig.unbounded()) + + val table: KTable[Windowed[String], Long] = builder + .stream[String, String](sourceTopic) + .groupByKey + .windowedBy(window) + .count() + .suppress(suppression) + + table.toStream((k, _) => s"${k.window().start()}:${k.window().end()}:${k.key()}").to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + { + // publish key=1 @ time 0 => count==1 + testInput.pipeInput("1", "value1", 0L) + assertTrue(testOutput.isEmpty) + } + { + // publish key=1 @ time 1 => count==2 + testInput.pipeInput("1", "value2", 1L) + assertTrue(testOutput.isEmpty) + } + { + // move event time past the first window, but before the suppression window + testInput.pipeInput("2", "value1", 1001L) + assertTrue(testOutput.isEmpty) + } + { + // move event time riiiight before suppression window ends + testInput.pipeInput("2", "value2", 1999L) + assertTrue(testOutput.isEmpty) + } + { + // publish a late event before suppression window terminates => count==3 + testInput.pipeInput("1", "value3", 999L) + assertTrue(testOutput.isEmpty) + } + { + // move event time right past the suppression window of the first window. + testInput.pipeInput("2", "value3", 2001L) + val record = testOutput.readKeyValue + assertEquals("0:1000:1", record.key) + assertEquals(3L, record.value) + } + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testCorrectlyGroupByKeyWindowedBySlidingWindow(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + val window = SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(1000L), ofMillis(1000L)) + val suppression = JSuppressed.untilWindowCloses(BufferConfig.unbounded()) + + val table: KTable[Windowed[String], Long] = builder + .stream[String, String](sourceTopic) + .groupByKey + .windowedBy(window) + .count() + .suppress(suppression) + + table.toStream((k, _) => s"${k.window().start()}:${k.window().end()}:${k.key()}").to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + { + // publish key=1 @ time 0 => count==1 + testInput.pipeInput("1", "value1", 0L) + assertTrue(testOutput.isEmpty) + } + { + // move event time right past the grace period of the first window. + testInput.pipeInput("2", "value3", 5001L) + val record = testOutput.readKeyValue + assertEquals("0:1000:1", record.key) + assertEquals(1L, record.value) + } + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testCorrectlySuppressResultsUsingSuppressedUntilWindowClosesByWindowed(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + val window = TimeWindows.ofSizeAndGrace(Duration.ofSeconds(1L), Duration.ofSeconds(1L)) + val suppression = JSuppressed.untilWindowCloses(BufferConfig.unbounded()) + + val table: KTable[Windowed[String], Long] = builder + .stream[String, String](sourceTopic) + .groupByKey + .windowedBy(window) + .count() + .suppress(suppression) + + table.toStream((k, _) => s"${k.window().start()}:${k.window().end()}:${k.key()}").to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + { + // publish key=1 @ time 0 => count==1 + testInput.pipeInput("1", "value1", 0L) + assertTrue(testOutput.isEmpty) + } + { + // publish key=1 @ time 1 => count==2 + testInput.pipeInput("1", "value2", 1L) + assertTrue(testOutput.isEmpty) + } + { + // move event time past the window, but before the grace period + testInput.pipeInput("2", "value1", 1001L) + assertTrue(testOutput.isEmpty) + } + { + // move event time riiiight before grace period ends + testInput.pipeInput("2", "value2", 1999L) + assertTrue(testOutput.isEmpty) + } + { + // publish a late event before grace period terminates => count==3 + testInput.pipeInput("1", "value3", 999L) + assertTrue(testOutput.isEmpty) + } + { + // move event time right past the grace period of the first window. + testInput.pipeInput("2", "value3", 2001L) + val record = testOutput.readKeyValue + assertEquals("0:1000:1", record.key) + assertEquals(3L, record.value) + } + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testCorrectlySuppressResultsUsingSuppressedUntilWindowClosesBySession(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + // Very similar to SuppressScenarioTest.shouldSupportFinalResultsForSessionWindows + val window = SessionWindows.ofInactivityGapAndGrace(Duration.ofMillis(5L), Duration.ofMillis(10L)) + val suppression = JSuppressed.untilWindowCloses(BufferConfig.unbounded()) + + val table: KTable[Windowed[String], Long] = builder + .stream[String, String](sourceTopic) + .groupByKey + .windowedBy(window) + .count() + .suppress(suppression) + + table.toStream((k, _) => s"${k.window().start()}:${k.window().end()}:${k.key()}").to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + { + // first window + testInput.pipeInput("k1", "v1", 0L) + assertTrue(testOutput.isEmpty) + } + { + // first window + testInput.pipeInput("k1", "v1", 1L) + assertTrue(testOutput.isEmpty) + } + { + // new window, but grace period hasn't ended for first window + testInput.pipeInput("k1", "v1", 8L) + assertTrue(testOutput.isEmpty) + } + { + // out-of-order event for first window, included since grade period hasn't passed + testInput.pipeInput("k1", "v1", 2L) + assertTrue(testOutput.isEmpty) + } + { + // add to second window + testInput.pipeInput("k1", "v1", 13L) + assertTrue(testOutput.isEmpty) + } + { + // add out-of-order to second window + testInput.pipeInput("k1", "v1", 10L) + assertTrue(testOutput.isEmpty) + } + { + // push stream time forward to flush other events through + testInput.pipeInput("k1", "v1", 30L) + // late event should get dropped from the stream + testInput.pipeInput("k1", "v1", 3L) + // should now have to results + val r1 = testOutput.readRecord + assertEquals("0:2:k1", r1.key) + assertEquals(3L, r1.value) + assertEquals(2L, r1.timestamp) + val r2 = testOutput.readRecord + assertEquals("8:13:k1", r2.key) + assertEquals(3L, r2.value) + assertEquals(13L, r2.timestamp) + } + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testCorrectlySuppressResultsUsingSuppressedUntilTimeLimtByNonWindowed(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + val suppression = JSuppressed.untilTimeLimit[String](Duration.ofSeconds(2L), BufferConfig.unbounded()) + + val table: KTable[String, Long] = builder + .stream[String, String](sourceTopic) + .groupByKey + .count() + .suppress(suppression) + + table.toStream.to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + val testOutput = testDriver.createOutput[String, Long](sinkTopic) + + { + // publish key=1 @ time 0 => count==1 + testInput.pipeInput("1", "value1", 0L) + assertTrue(testOutput.isEmpty) + } + { + // publish key=1 @ time 1 => count==2 + testInput.pipeInput("1", "value2", 1L) + assertTrue(testOutput.isEmpty) + } + { + // move event time past the window, but before the grace period + testInput.pipeInput("2", "value1", 1001L) + assertTrue(testOutput.isEmpty) + } + { + // move event time right before grace period ends + testInput.pipeInput("2", "value2", 1999L) + assertTrue(testOutput.isEmpty) + } + { + // publish a late event before grace period terminates => count==3 + testInput.pipeInput("1", "value3", 999L) + assertTrue(testOutput.isEmpty) + } + { + // move event time right past the grace period of the first window. + testInput.pipeInput("2", "value3", 2001L) + val record = testOutput.readKeyValue + assertEquals("1", record.key) + assertEquals(3L, record.value) + } + assertTrue(testOutput.isEmpty) + + testDriver.close() + } + + @Test + def testSettingNameOnFilterProcessor(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val table = builder.stream[String, String](sourceTopic).groupBy((key, _) => key).count() + table + .filter((key, value) => key.equals("a") && value == 1, Named.as("my-name")) + .toStream + .to(sinkTopic) + + import scala.jdk.CollectionConverters._ + + val filterNode = builder.build().describe().subtopologies().asScala.toList(1).nodes().asScala.toList(3) + assertEquals("my-name", filterNode.name()) + } + + @Test + def testSettingNameOnCountProcessor(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val sinkTopic = "sink" + + val table = builder.stream[String, String](sourceTopic).groupBy((key, _) => key).count(Named.as("my-name")) + table.toStream.to(sinkTopic) + + import scala.jdk.CollectionConverters._ + + val countNode = builder.build().describe().subtopologies().asScala.toList(1).nodes().asScala.toList(1) + assertEquals("my-name", countNode.name()) + } + + @Test + def testSettingNameOnJoinProcessor(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source1" + val sourceTopic2 = "source2" + val sinkTopic = "sink" + + val table1 = builder.stream[String, String](sourceTopic1).groupBy((key, _) => key).count() + val table2 = builder.stream[String, String](sourceTopic2).groupBy((key, _) => key).count() + table1 + .join(table2, Named.as("my-name"))((a, b) => a + b) + .toStream + .to(sinkTopic) + + val joinNodeLeft = builder.build().describe().subtopologies().asScala.toList(1).nodes().asScala.toList(6) + val joinNodeRight = builder.build().describe().subtopologies().asScala.toList(1).nodes().asScala.toList(7) + assertTrue(joinNodeLeft.name().contains("my-name")) + assertTrue(joinNodeRight.name().contains("my-name")) + } + + @Test + def testMapValuesWithValueMapperWithMaterialized(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val stateStore = "store" + val materialized = Materialized.as[String, Long, ByteArrayKeyValueStore](stateStore) + + val table = builder.stream[String, String](sourceTopic).toTable + table.mapValues(value => value.length.toLong, materialized) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + + testInput.pipeInput("1", "topic1value1") + assertEquals(12, testDriver.getKeyValueStore[String, Long](stateStore).get("1")) + + testDriver.close() + } + + @Test + def testMapValuesWithValueMapperWithKeyAndWithMaterialized(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic = "source" + val stateStore = "store" + val materialized = Materialized.as[String, Long, ByteArrayKeyValueStore](stateStore) + + val table = builder.stream[String, String](sourceTopic).toTable + table.mapValues((key, value) => key.length + value.length.toLong, materialized) + + val testDriver = createTestDriver(builder) + val testInput = testDriver.createInput[String, String](sourceTopic) + + testInput.pipeInput("1", "topic1value1") + assertEquals(13, testDriver.getKeyValueStore[String, Long](stateStore).get("1")) + + testDriver.close() + } + + @Test + def testJoinWithBiFunctionKeyExtractor(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source1" + val sourceTopic2 = "source2" + val sinkTopic = "sink" + + val table1 = builder.stream[String, String](sourceTopic1).toTable + val table2 = builder.stream[String, String](sourceTopic2).toTable + + table1 + .join[String, String, String]( + table2, + (key: String, value: String) => s"$key-$value", + joiner = (v1: String, v2: String) => s"$v1+$v2", + materialized = Materialized.`with`[String, String, ByteArrayKeyValueStore] + ) + .toStream + .to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput1 = testDriver.createInput[String, String](sourceTopic1) + val testInput2 = testDriver.createInput[String, String](sourceTopic2) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + testInput1.pipeInput("k1", "v1") + testInput2.pipeInput("k1-v1", "v2") + + val record = testOutput.readKeyValue + assertEquals("k1", record.key) + assertEquals("v1+v2", record.value) + + testDriver.close() + } + + @Test + def testLeftJoinWithBiFunctionKeyExtractor(): Unit = { + val builder = new StreamsBuilder() + val sourceTopic1 = "source1" + val sourceTopic2 = "source2" + val sinkTopic = "sink" + + val table1 = builder.stream[String, String](sourceTopic1).toTable + val table2 = builder.stream[String, String](sourceTopic2).toTable + + table1 + .leftJoin[String, String, String]( + table2, + (key: String, value: String) => s"$key-$value", + joiner = (v1: String, v2: String) => s"${v1}+${Option(v2).getOrElse("null")}", + materialized = Materialized.`with`[String, String, ByteArrayKeyValueStore] + ) + .toStream + .to(sinkTopic) + + val testDriver = createTestDriver(builder) + val testInput1 = testDriver.createInput[String, String](sourceTopic1) + val testInput2 = testDriver.createInput[String, String](sourceTopic2) + val testOutput = testDriver.createOutput[String, String](sinkTopic) + + // First insert into the foreign key table (table2) + testInput2.pipeInput("k1-v1", "v2") + + // Then insert into the primary table (table1) + testInput1.pipeInput("k1", "v1") + + val record1 = testOutput.readKeyValue + assertEquals("k1", record1.key) + assertEquals("v1+v2", record1.value) + + // Test with non-matching foreign key (should still output due to left join) + testInput1.pipeInput("k2", "v3") + + val record2 = testOutput.readKeyValue + assertEquals("k2", record2.key) + assertEquals("v3+null", record2.value) + + testDriver.close() + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/MaterializedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/MaterializedTest.scala new file mode 100644 index 0000000000..9e0c466c26 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/MaterializedTest.scala @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.internals.MaterializedInternal +import org.apache.kafka.streams.scala._ +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.apache.kafka.streams.state.Stores +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +import java.time.Duration + +class MaterializedTest { + + @Test + def testCreateMaterializedWithSerdes(): Unit = { + val materialized: Materialized[String, Long, ByteArrayKeyValueStore] = + Materialized.`with`[String, Long, ByteArrayKeyValueStore] + + val internalMaterialized = new MaterializedInternal(materialized) + assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) + } + + @Test + def testCreateMaterializedWithSerdesAndStoreName(): Unit = { + val storeName = "store" + val materialized: Materialized[String, Long, ByteArrayKeyValueStore] = + Materialized.as[String, Long, ByteArrayKeyValueStore](storeName) + + val internalMaterialized = new MaterializedInternal(materialized) + assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) + assertEquals(storeName, internalMaterialized.storeName) + } + + @Test + def testCreateMaterializedWithSerdesAndWindowStoreSupplier(): Unit = { + val storeSupplier = Stores.persistentWindowStore("store", Duration.ofMillis(1), Duration.ofMillis(1), true) + val materialized: Materialized[String, Long, ByteArrayWindowStore] = + Materialized.as[String, Long](storeSupplier) + + val internalMaterialized = new MaterializedInternal(materialized) + assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) + assertEquals(storeSupplier, internalMaterialized.storeSupplier) + } + + @Test + def testCreateMaterializedWithSerdesAndKeyValueStoreSupplier(): Unit = { + val storeSupplier = Stores.persistentKeyValueStore("store") + val materialized: Materialized[String, Long, ByteArrayKeyValueStore] = + Materialized.as[String, Long](storeSupplier) + + val internalMaterialized = new MaterializedInternal(materialized) + assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) + assertEquals(storeSupplier, internalMaterialized.storeSupplier) + } + + @Test + def testCreateMaterializedWithSerdesAndSessionStoreSupplier(): Unit = { + val storeSupplier = Stores.persistentSessionStore("store", Duration.ofMillis(1)) + val materialized: Materialized[String, Long, ByteArraySessionStore] = + Materialized.as[String, Long](storeSupplier) + + val internalMaterialized = new MaterializedInternal(materialized) + assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) + assertEquals(storeSupplier, internalMaterialized.storeSupplier) + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ProducedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ProducedTest.scala new file mode 100644 index 0000000000..6fca794bfb --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ProducedTest.scala @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.internals.ProducedInternal +import org.apache.kafka.streams.processor.StreamPartitioner +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +import java.util +import java.util.Optional + +class ProducedTest { + + @Test + def testCreateProducedWithSerdes(): Unit = { + val produced: Produced[String, Long] = Produced.`with`[String, Long] + + val internalProduced = new ProducedInternal(produced) + assertEquals(Serdes.stringSerde.getClass, internalProduced.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalProduced.valueSerde.getClass) + } + + @Test + def testCreateProducedWithSerdesAndStreamPartitioner(): Unit = { + val partitioner = new StreamPartitioner[String, Long] { + override def partitions( + topic: String, + key: String, + value: Long, + numPartitions: Int + ): Optional[util.Set[Integer]] = { + val partitions = new util.HashSet[Integer]() + partitions.add(Int.box(0)) + Optional.of(partitions) + } + } + val produced: Produced[String, Long] = Produced.`with`(partitioner) + + val internalProduced = new ProducedInternal(produced) + assertEquals(Serdes.stringSerde.getClass, internalProduced.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalProduced.valueSerde.getClass) + assertEquals(partitioner, internalProduced.streamPartitioner) + } +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/RepartitionedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/RepartitionedTest.scala new file mode 100644 index 0000000000..ee3515ac61 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/RepartitionedTest.scala @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.internals.RepartitionedInternal +import org.apache.kafka.streams.processor.StreamPartitioner +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +import java.util +import java.util.Optional + +class RepartitionedTest { + + @Test + def testCreateRepartitionedWithSerdes(): Unit = { + val repartitioned: Repartitioned[String, Long] = Repartitioned.`with`[String, Long] + + val internalRepartitioned = new RepartitionedInternal(repartitioned) + assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) + } + + @Test + def testCreateRepartitionedWithSerdesAndNumPartitions(): Unit = { + val repartitioned: Repartitioned[String, Long] = Repartitioned.`with`[String, Long](5) + + val internalRepartitioned = new RepartitionedInternal(repartitioned) + assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) + assertEquals(5, internalRepartitioned.numberOfPartitions) + + } + + @Test + def testCreateRepartitionedWithSerdesAndTopicName(): Unit = { + val repartitioned: Repartitioned[String, Long] = Repartitioned.`with`[String, Long]("repartitionTopic") + + val internalRepartitioned = new RepartitionedInternal(repartitioned) + assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) + assertEquals("repartitionTopic", internalRepartitioned.name) + } + + @Test + def testCreateRepartitionedWithSerdesAndTopicNameAndNumPartitionsAndStreamPartitioner(): Unit = { + val partitioner = new StreamPartitioner[String, Long] { + override def partitions( + topic: String, + key: String, + value: Long, + numPartitions: Int + ): Optional[util.Set[Integer]] = { + val partitions = new util.HashSet[Integer]() + partitions.add(Int.box(0)) + Optional.of(partitions) + } + } + val repartitioned: Repartitioned[String, Long] = Repartitioned.`with`[String, Long](partitioner) + + val internalRepartitioned = new RepartitionedInternal(repartitioned) + assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) + assertEquals(partitioner, internalRepartitioned.streamPartitioner) + } + + @Test + def testCreateRepartitionedWithTopicNameAndNumPartitionsAndStreamPartitioner(): Unit = { + val partitioner = new StreamPartitioner[String, Long] { + override def partitions( + topic: String, + key: String, + value: Long, + numPartitions: Int + ): Optional[util.Set[Integer]] = { + val partitions = new util.HashSet[Integer]() + partitions.add(Int.box(0)) + Optional.of(partitions) + } + } + val repartitioned: Repartitioned[String, Long] = + Repartitioned + .`with`[String, Long](5) + .withName("repartitionTopic") + .withStreamPartitioner(partitioner) + + val internalRepartitioned = new RepartitionedInternal(repartitioned) + assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) + assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) + assertEquals(5, internalRepartitioned.numberOfPartitions) + assertEquals("repartitionTopic", internalRepartitioned.name) + assertEquals(partitioner, internalRepartitioned.streamPartitioner) + } + +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/StreamJoinedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/StreamJoinedTest.scala new file mode 100644 index 0000000000..7a5a48ccb9 --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/StreamJoinedTest.scala @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.kstream + +import org.apache.kafka.streams.kstream.internals.{InternalStreamsBuilder, StreamJoinedInternal} +import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder +import org.apache.kafka.streams.scala.serialization.Serdes +import org.apache.kafka.streams.scala.serialization.Serdes._ +import org.apache.kafka.streams.state.Stores +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.extension.ExtendWith +import org.junit.jupiter.api.{BeforeEach, Test} +import org.mockito.Mockito.{mock, when} +import org.mockito.junit.jupiter.{MockitoExtension, MockitoSettings} +import org.mockito.quality.Strictness + +import java.time.Duration + +@ExtendWith(Array(classOf[MockitoExtension])) +@MockitoSettings(strictness = Strictness.STRICT_STUBS) +class StreamJoinedTest { + + val builder: InternalStreamsBuilder = mock(classOf[InternalStreamsBuilder]) + val topoBuilder: InternalTopologyBuilder = mock(classOf[InternalTopologyBuilder]) + + @BeforeEach + def before(): Unit = { + when(builder.internalTopologyBuilder()).thenReturn(topoBuilder) + when(topoBuilder.topologyConfigs()).thenReturn(null) + } + + @Test + def testCreateStreamJoinedWithSerdes(): Unit = { + val streamJoined: StreamJoined[String, String, Long] = StreamJoined.`with`[String, String, Long] + + val streamJoinedInternal = new StreamJoinedInternal[String, String, Long](streamJoined, builder) + assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.keySerde().getClass) + assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.valueSerde().getClass) + assertEquals(Serdes.longSerde.getClass, streamJoinedInternal.otherValueSerde().getClass) + } + + @Test + def testCreateStreamJoinedWithSerdesAndStoreSuppliers(): Unit = { + val storeSupplier = Stores.inMemoryWindowStore("myStore", Duration.ofMillis(500), Duration.ofMillis(250), false) + + val otherStoreSupplier = + Stores.inMemoryWindowStore("otherStore", Duration.ofMillis(500), Duration.ofMillis(250), false) + + val streamJoined: StreamJoined[String, String, Long] = + StreamJoined.`with`[String, String, Long](storeSupplier, otherStoreSupplier) + + val streamJoinedInternal = new StreamJoinedInternal[String, String, Long](streamJoined, builder) + assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.keySerde().getClass) + assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.valueSerde().getClass) + assertEquals(Serdes.longSerde.getClass, streamJoinedInternal.otherValueSerde().getClass) + assertEquals(otherStoreSupplier, streamJoinedInternal.otherStoreSupplier()) + assertEquals(storeSupplier, streamJoinedInternal.thisStoreSupplier()) + } + + @Test + def testCreateStreamJoinedWithSerdesAndStateStoreName(): Unit = { + val streamJoined: StreamJoined[String, String, Long] = StreamJoined.as[String, String, Long]("myStoreName") + + val streamJoinedInternal = new StreamJoinedInternal[String, String, Long](streamJoined, builder) + assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.keySerde().getClass) + assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.valueSerde().getClass) + assertEquals(Serdes.longSerde.getClass, streamJoinedInternal.otherValueSerde().getClass) + assertEquals("myStoreName", streamJoinedInternal.storeName()) + } + +} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/TestDriver.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/TestDriver.scala new file mode 100644 index 0000000000..23a24178fb --- /dev/null +++ b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/TestDriver.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.streams.scala.utils + +import java.time.Instant +import java.util.Properties + +import org.apache.kafka.common.serialization.Serde +import org.apache.kafka.streams.scala.StreamsBuilder +import org.apache.kafka.streams.{StreamsConfig, TestInputTopic, TestOutputTopic, TopologyTestDriver} +import org.apache.kafka.test.TestUtils + +trait TestDriver { + def createTestDriver(builder: StreamsBuilder, initialWallClockTime: Instant = Instant.now()): TopologyTestDriver = { + val config = new Properties() + config.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath) + new TopologyTestDriver(builder.build(), config, initialWallClockTime) + } + + implicit class TopologyTestDriverOps(inner: TopologyTestDriver) { + def createInput[K, V](topic: String)(implicit serdeKey: Serde[K], serdeValue: Serde[V]): TestInputTopic[K, V] = + inner.createInputTopic(topic, serdeKey.serializer, serdeValue.serializer) + + def createOutput[K, V](topic: String)(implicit serdeKey: Serde[K], serdeValue: Serde[V]): TestOutputTopic[K, V] = + inner.createOutputTopic(topic, serdeKey.deserializer, serdeValue.deserializer) + } +} diff --git a/streams/test-utils/bin/test/log4j2.yaml b/streams/test-utils/bin/test/log4j2.yaml new file mode 100644 index 0000000000..be546a18b5 --- /dev/null +++ b/streams/test-utils/bin/test/log4j2.yaml @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Configuration: + Properties: + Property: + - name: "logPattern" + value: "[%d] %p %m (%c:%L)%n" + + Appenders: + Console: + name: STDOUT + PatternLayout: + pattern: "${logPattern}" + + Loggers: + Root: + level: INFO + AppenderRef: + - ref: STDOUT + Logger: + - name: org.apache.kafka + level: INFO diff --git a/test-common/test-common-internal-api/bin/main/org/apache/kafka/common/test/api/README.md b/test-common/test-common-internal-api/bin/main/org/apache/kafka/common/test/api/README.md new file mode 100644 index 0000000000..e69a381161 --- /dev/null +++ b/test-common/test-common-internal-api/bin/main/org/apache/kafka/common/test/api/README.md @@ -0,0 +1,192 @@ +This document describes a custom JUnit extension which allows for running the same JUnit tests against multiple Kafka +cluster configurations. + +# Annotations + +Three annotations are provided for defining a template of a Kafka cluster. + +* `@ClusterTest`: declarative style cluster definition +* `@ClusterTests`: wrapper around multiple `@ClusterTest`-s +* `@ClusterTemplate`: points to a function for imperative cluster definition + +Another helper annotation `@ClusterTestDefaults` allows overriding the defaults for +all `@ClusterTest` in a single test class. + +# Usage + +The simplest usage is `@ClusterTest` by itself which will use some reasonable defaults. + +```java +public class SampleTest { + @ClusterTest + void testSomething() { ... } +} +``` + +The defaults can be modified by setting specific paramters on the annotation. + +```java +public class SampleTest { + @ClusterTest(brokers = 3, metadataVersion = MetadataVersion.IBP_4_0_IV3) + void testSomething() { ... } +} +``` + +It is also possible to modify the defaults for a whole class using `@ClusterTestDefaults`. + +```java +@ClusterTestDefaults(brokers = 3, metadataVersion = MetadataVersion.IBP_4_0_IV3) +public class SampleTest { + @ClusterTest + void testSomething() { ... } +} +``` + +To set some specific config, an array of `@ClusterProperty` annotations can be +given. + +```java +public class SampleTest { + @ClusterTest( + types = {Type.KRAFT}, + brokerSecurityProtocol = SecurityProtocol.PLAINTEXT, + properties = { + @ClusterProperty(key = "inter.broker.protocol.version", value = "2.7-IV2"), + @ClusterProperty(key = "socket.send.buffer.bytes", value = "10240"), + }) + void testSomething() { ... } +} +``` + +Using the `@ClusterTests` annotation, multiple declarative cluster templates can +be given. + +```java +public class SampleTest { + @ClusterTests({ + @ClusterTest(brokerSecurityProtocol = SecurityProtocol.PLAINTEXT), + @ClusterTest(brokerSecurityProtocol = SecurityProtocol.SASL_PLAINTEXT) + }) + void testSomething() { ... } +} +``` + +# Dynamic Configuration + +In order to allow for more flexible cluster configuration, a `@ClusterTemplate` +annotation is also introduced. This annotation takes a single string value which +references a static method on the test class. This method is used to produce any +number of test configurations using a fluent builder style API. + +```java +import java.util.List; + +@ClusterTemplate("generateConfigs") +void testSomething() { ... } + +static List generateConfigs() { + ClusterConfig config1 = ClusterConfig.defaultClusterBuilder() + .name("Generated Test 1") + .serverProperties(props1) + .setMetadataVersion(MetadataVersion.IBP_2_7_IV1) + .build(); + ClusterConfig config2 = ClusterConfig.defaultClusterBuilder() + .name("Generated Test 2") + .serverProperties(props2) + .setMetadataVersion(MetadataVersion.IBP_2_7_IV2) + .build(); + ClusterConfig config3 = ClusterConfig.defaultClusterBuilder() + .name("Generated Test 3") + .serverProperties(props3) + .build(); + return List.of(config1, config2, config3); +} +``` + +This alternate configuration style makes it easy to create any number of complex +configurations. Each returned ClusterConfig by a template method will result in +an additional variation of the run. + + +# JUnit Extension + +The core logic of our test framework lies in `ClusterTestExtensions` which is a +JUnit extension. It is automatically registered using SPI and will look for test +methods that include one of the three annotations mentioned above. + +This way of dynamically generating tests uses the JUnit concept of test templates. + +# JUnit Lifecycle + +JUnit discovers test template methods that are annotated with `@ClusterTest`, +`@ClusterTests`, or `@ClusterTemplate`. These annotations are processed and some +number of test invocations are created. + +For each generated test invocation we have the following lifecycle: + +* Static `@BeforeAll` methods are called +* Test class is instantiated +* Kafka Cluster is started (if autoStart=true) +* Non-static `@BeforeEach` methods are called +* Test method is invoked +* Kafka Cluster is stopped +* Non-static `@AfterEach` methods are called +* Static `@AfterAll` methods are called + +`@BeforeEach` methods give an opportunity to set up additional test dependencies +after the cluster has started but before the test method is run. + +# Dependency Injection + +A ClusterInstance object can be injected into the test method or the test class constructor. +This object is a shim to the underlying test framework and provides access to things like +SocketServers and has convenience factory methods for getting a client. + +The class is introduced to provide context to the underlying cluster and to provide reusable +functionality that was previously garnered from the test hierarchy. + +Common usage is to inject this class into a test method + +```java +class SampleTest { + @ClusterTest + public void testOne(ClusterInstance cluster) { + this.cluster.admin().createTopics(...); + // Test code + } +} +``` + +For cases where there is common setup code that involves the cluster (such as +creating topics), it is possible to access the ClusterInstance from a `@BeforeEach` +method. This requires injecting the object in the constructor. For example, + +```java +class SampleTest { + private final ClusterInstance cluster; + + SampleTest(ClusterInstance cluster) { + this.cluster = cluster; + } + + @BeforeEach + public void setup() { + // Common setup code with started ClusterInstance + this.cluster.admin().createTopics(...); + } + + @ClusterTest + public void testOne() { + // Test code + } +} +``` + +It is okay to inject the ClusterInstance in both ways. The same object will be +provided in either case. + +# Gotchas +* Cluster tests are not compatible with other test templates like `@ParameterizedTest` +* Test methods annotated with JUnit's `@Test` will still be run, but no cluster will be started and no dependency + injection will happen. This is generally not what you want. +* Even though ClusterConfig is accessible, it is immutable inside the test method. diff --git a/test-common/test-common-internal-api/bin/test/log4j2.yaml b/test-common/test-common-internal-api/bin/test/log4j2.yaml new file mode 100644 index 0000000000..be546a18b5 --- /dev/null +++ b/test-common/test-common-internal-api/bin/test/log4j2.yaml @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Configuration: + Properties: + Property: + - name: "logPattern" + value: "[%d] %p %m (%c:%L)%n" + + Appenders: + Console: + name: STDOUT + PatternLayout: + pattern: "${logPattern}" + + Loggers: + Root: + level: INFO + AppenderRef: + - ref: STDOUT + Logger: + - name: org.apache.kafka + level: INFO diff --git a/test-common/test-common-runtime/bin/main/META-INF/services/org.junit.jupiter.api.extension.Extension b/test-common/test-common-runtime/bin/main/META-INF/services/org.junit.jupiter.api.extension.Extension new file mode 100644 index 0000000000..d7f05be7df --- /dev/null +++ b/test-common/test-common-runtime/bin/main/META-INF/services/org.junit.jupiter.api.extension.Extension @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.kafka.common.test.junit.ClusterTestExtensions \ No newline at end of file diff --git a/test-common/test-common-runtime/bin/main/log4j2.yaml b/test-common/test-common-runtime/bin/main/log4j2.yaml new file mode 100644 index 0000000000..be546a18b5 --- /dev/null +++ b/test-common/test-common-runtime/bin/main/log4j2.yaml @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Configuration: + Properties: + Property: + - name: "logPattern" + value: "[%d] %p %m (%c:%L)%n" + + Appenders: + Console: + name: STDOUT + PatternLayout: + pattern: "${logPattern}" + + Loggers: + Root: + level: INFO + AppenderRef: + - ref: STDOUT + Logger: + - name: org.apache.kafka + level: INFO diff --git a/transaction-coordinator/bin/main/common/message/TransactionLogKey.json b/transaction-coordinator/bin/main/common/message/TransactionLogKey.json new file mode 100644 index 0000000000..6caa46e4b6 --- /dev/null +++ b/transaction-coordinator/bin/main/common/message/TransactionLogKey.json @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "apiKey": 0, + "type": "coordinator-key", + "name": "TransactionLogKey", + "validVersions": "0", + "flexibleVersions": "none", + "fields": [ + { "name": "TransactionalId", "type": "string", "versions": "0", + "about": "The transactional id of the transaction."} + ] +} diff --git a/transaction-coordinator/bin/main/common/message/TransactionLogValue.json b/transaction-coordinator/bin/main/common/message/TransactionLogValue.json new file mode 100644 index 0000000000..93762afa38 --- /dev/null +++ b/transaction-coordinator/bin/main/common/message/TransactionLogValue.json @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +{ + "apiKey": 0, + "type": "coordinator-value", + "name": "TransactionLogValue", + // Version 1 is the first flexible version. + // KIP-915: bumping the version will no longer make this record backward compatible. + // We suggest to add/remove only tagged fields to maintain backward compatibility. + "validVersions": "0-1", + "flexibleVersions": "1+", + "fields": [ + { "name": "ProducerId", "type": "int64", "versions": "0+", + "about": "Producer id in use by the transactional id."}, + { "name": "PreviousProducerId", "type": "int64", "taggedVersions": "1+", "tag": 0, "default": -1, + "about": "Producer id used by the last committed transaction."}, + { "name": "NextProducerId", "type": "int64", "taggedVersions": "1+", "tag": 1, "default": -1, + "about": "Latest producer ID sent to the producer for the given transactional id."}, + { "name": "ProducerEpoch", "type": "int16", "versions": "0+", + "about": "Epoch associated with the producer id."}, + { "name": "NextProducerEpoch", "type": "int16", "default": -1, "taggedVersions": "1+", "tag": 3, + "about": "Producer epoch associated with the NextProducerId"}, + { "name": "TransactionTimeoutMs", "type": "int32", "versions": "0+", + "about": "Transaction timeout in milliseconds."}, + { "name": "TransactionStatus", "type": "int8", "versions": "0+", + "about": "TransactionState the transaction is in."}, + { "name": "TransactionPartitions", "type": "[]PartitionsSchema", "versions": "0+", "nullableVersions": "0+", + "about": "Partitions involved in the transaction.", "fields": [ + { "name": "Topic", "type": "string", "versions": "0+", + "about": "Topic involved in the transaction."}, + { "name": "PartitionIds", "type": "[]int32", "versions": "0+", + "about": "Partition ids involved in the transaction."}]}, + { "name": "TransactionLastUpdateTimestampMs", "type": "int64", "versions": "0+", + "about": "Time the transaction was last updated."}, + { "name": "TransactionStartTimestampMs", "type": "int64", "versions": "0+", + "about": "Time the transaction was started."}, + { "name": "ClientTransactionVersion", "type": "int16", "default": 0, "taggedVersions": "1+", "tag": 2, + "about": "The transaction version used by the client."} + ] +} From 0030427f2a9687803bcbeaed8d2cdc2c361ab20f Mon Sep 17 00:00:00 2001 From: Giuseppe Lillo Date: Fri, 9 Jan 2026 16:27:05 +0100 Subject: [PATCH 2/7] wip --- core/src/main/scala/kafka/server/ReplicaManager.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/kafka/server/ReplicaManager.scala b/core/src/main/scala/kafka/server/ReplicaManager.scala index ba25484fac..580e7f1a40 100644 --- a/core/src/main/scala/kafka/server/ReplicaManager.scala +++ b/core/src/main/scala/kafka/server/ReplicaManager.scala @@ -1834,16 +1834,16 @@ class ReplicaManager(val config: KafkaConfig, false } } - println("canDisklessFetch: " + canDisklessFetch) - println("needsClassicFetch: " + needsClassicFetch) +// println("canDisklessFetch: " + canDisklessFetch) +// println("needsClassicFetch: " + needsClassicFetch) (canDisklessFetch, initialClassicFetchInfos ++ needsClassicFetch) case _ => (disklessTopicFetchInfosWithTopicId, initialClassicFetchInfos) } - println("disklessFetchInfos: " + disklessFetchInfos) - println("classicFetchInfos: " + classicFetchInfos) +// println("disklessFetchInfos: " + disklessFetchInfos) +// println("classicFetchInfos: " + classicFetchInfos) // if (params.isFromFollower && disklessFetchInfos.nonEmpty) { From 5fa25e4d9e0149d0d01d4b423a766a0fa0a16732 Mon Sep 17 00:00:00 2001 From: Giuseppe Lillo Date: Fri, 9 Jan 2026 16:43:06 +0100 Subject: [PATCH 3/7] a --- .../bin/test/log4j2.yaml | 38 - ...pache.kafka.connect.source.SourceConnector | 18 - connect/mirror/bin/test/log4j2.yaml | 41 - ...policy.ConnectorClientConfigOverridePolicy | 18 - ...org.apache.kafka.connect.storage.Converter | 22 - ...ache.kafka.connect.storage.HeaderConverter | 22 - ...he.kafka.connect.rest.ConnectRestExtension | 16 - ...rg.apache.kafka.connect.sink.SinkConnector | 22 - ...pache.kafka.connect.source.SourceConnector | 26 - ...org.apache.kafka.connect.storage.Converter | 22 - ...ache.kafka.connect.storage.HeaderConverter | 22 - ...he.kafka.connect.transforms.Transformation | 23 - ...ka.connect.transforms.predicates.Predicate | 17 - connect/runtime/bin/test/log4j2.yaml | 48 - ...org.apache.kafka.connect.storage.Converter | 16 - ...org.apache.kafka.connect.storage.Converter | 16 - ...policy.ConnectorClientConfigOverridePolicy | 16 - ...he.kafka.connect.rest.ConnectRestExtension | 16 - ...rg.apache.kafka.connect.sink.SinkConnector | 22 - ...org.apache.kafka.connect.storage.Converter | 20 - ...ache.kafka.connect.storage.HeaderConverter | 17 - ...org.apache.kafka.connect.storage.Converter | 17 - ...org.apache.kafka.connect.storage.Converter | 17 - ...policy.ConnectorClientConfigOverridePolicy | 16 - ...org.apache.kafka.connect.storage.Converter | 16 - .../read-version-from-resource-v1/version | 16 - ...org.apache.kafka.connect.storage.Converter | 16 - .../read-version-from-resource-v2/version | 16 - ...afka.common.config.provider.ConfigProvider | 16 - ...org.apache.kafka.connect.storage.Converter | 16 - ...rg.apache.kafka.connect.sink.SinkConnector | 16 - ...org.apache.kafka.connect.storage.Converter | 16 - ...ache.kafka.connect.storage.HeaderConverter | 16 - ...org.apache.kafka.connect.storage.Converter | 16 - .../services/test.plugins.ServiceLoadedClass | 16 - ...policy.ConnectorClientConfigOverridePolicy | 16 - ...org.apache.kafka.connect.storage.Converter | 16 - ...org.apache.kafka.connect.storage.Converter | 16 - ...ache.kafka.connect.storage.HeaderConverter | 16 - ...ka.connect.transforms.predicates.Predicate | 16 - ...rg.apache.kafka.connect.sink.SinkConnector | 16 - ...pache.kafka.connect.source.SourceConnector | 16 - ...he.kafka.connect.transforms.Transformation | 16 - ...rg.apache.kafka.connect.sink.SinkConnector | 17 - ...pache.kafka.connect.source.SourceConnector | 18 - .../bin/main/message/ProducerSnapshot.json | 85 -- .../RemoteLogSegmentMetadataRecord.json | 142 --- ...emoteLogSegmentMetadataSnapshotRecord.json | 108 -- .../RemoteLogSegmentMetadataUpdateRecord.json | 90 -- .../RemotePartitionDeleteMetadataRecord.json | 68 -- storage/bin/test/log4j2.yaml | 57 - .../org/apache/kafka/tiered/storage/README.md | 11 - .../V10__Covering_index_on_batches.sql | 9 - .../V11__Add_diskless_offsets_to_logs.sql | 3 - .../main/db/migration/V1__Create_tables.sql | 1035 ----------------- ...eed_up_delete_topic_and_delete_records.sql | 121 -- .../main/db/migration/V3__Add_logs_size.sql | 300 ----- .../migration/V4__Retention_enforcement.sql | 133 --- .../V5__Fix_deadlock_in_delete_records_v1.sql | 90 -- ...e_understandability_of_delete_topic_v1.sql | 143 --- .../migration/V7__Find_batches_function.sql | 114 -- .../V8__Find_batches_function_with_limit.sql | 88 -- .../V9__Retention_enforcement_with_limits.sql | 132 --- .../inkless/bin/main/message/CacheKey.json | 52 - .../inkless/bin/main/message/FileExtent.json | 58 - .../net.jqwik.api.providers.ArbitraryProvider | 6 - storage/inkless/bin/test/log4j.properties | 7 - .../bin/test/test_gcs_credentials.json | 6 - .../integration-tests/bin/test/log4j2.yaml | 65 -- ...inScalaIntegrationTestImplicitSerdes.scala | 175 --- .../streams/integration/WordCountTest.scala | 250 ---- ...mToTableJoinScalaIntegrationTestBase.scala | 144 --- .../utils/StreamToTableJoinTestData.scala | 60 - .../apache/kafka/common/test/api/README.md | 192 --- .../bin/test/log4j2.yaml | 35 - .../org.junit.jupiter.api.extension.Extension | 16 - .../test-common-runtime/bin/main/log4j2.yaml | 35 - .../common/message/TransactionLogKey.json | 26 - .../common/message/TransactionLogValue.json | 53 - 79 files changed, 4776 deletions(-) delete mode 100644 clients/clients-integration-tests/bin/test/log4j2.yaml delete mode 100644 connect/mirror/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector delete mode 100644 connect/mirror/bin/test/log4j2.yaml delete mode 100644 connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy delete mode 100644 connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.Converter delete mode 100644 connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter delete mode 100644 connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension delete mode 100644 connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.sink.SinkConnector delete mode 100644 connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.source.SourceConnector delete mode 100644 connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.Converter delete mode 100644 connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter delete mode 100644 connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.Transformation delete mode 100644 connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate delete mode 100644 connect/runtime/bin/test/log4j2.yaml delete mode 100644 connect/runtime/bin/test/test-plugins/aliased-static-field/META-INF/services/org.apache.kafka.connect.storage.Converter delete mode 100644 connect/runtime/bin/test/test-plugins/always-throw-exception/META-INF/services/org.apache.kafka.connect.storage.Converter delete mode 100644 connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy delete mode 100644 connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension delete mode 100644 connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.sink.SinkConnector delete mode 100644 connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.Converter delete mode 100644 connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter delete mode 100644 connect/runtime/bin/test/test-plugins/classpath-converter/META-INF/services/org.apache.kafka.connect.storage.Converter delete mode 100644 connect/runtime/bin/test/test-plugins/multiple-plugins-in-jar/META-INF/services/org.apache.kafka.connect.storage.Converter delete mode 100644 connect/runtime/bin/test/test-plugins/non-migrated/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy delete mode 100644 connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/META-INF/services/org.apache.kafka.connect.storage.Converter delete mode 100644 connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/version delete mode 100644 connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/META-INF/services/org.apache.kafka.connect.storage.Converter delete mode 100644 connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/version delete mode 100644 connect/runtime/bin/test/test-plugins/sampling-config-provider/META-INF/services/org.apache.kafka.common.config.provider.ConfigProvider delete mode 100644 connect/runtime/bin/test/test-plugins/sampling-configurable/META-INF/services/org.apache.kafka.connect.storage.Converter delete mode 100644 connect/runtime/bin/test/test-plugins/sampling-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector delete mode 100644 connect/runtime/bin/test/test-plugins/sampling-converter/META-INF/services/org.apache.kafka.connect.storage.Converter delete mode 100644 connect/runtime/bin/test/test-plugins/sampling-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter delete mode 100644 connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/org.apache.kafka.connect.storage.Converter delete mode 100644 connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/test.plugins.ServiceLoadedClass delete mode 100644 connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy delete mode 100644 connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.storage.Converter delete mode 100644 connect/runtime/bin/test/test-plugins/versioned-converter/META-INF/services/org.apache.kafka.connect.storage.Converter delete mode 100644 connect/runtime/bin/test/test-plugins/versioned-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter delete mode 100644 connect/runtime/bin/test/test-plugins/versioned-predicate/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate delete mode 100644 connect/runtime/bin/test/test-plugins/versioned-sink-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector delete mode 100644 connect/runtime/bin/test/test-plugins/versioned-source-connector/META-INF/services/org.apache.kafka.connect.source.SourceConnector delete mode 100644 connect/runtime/bin/test/test-plugins/versioned-transformation/META-INF/services/org.apache.kafka.connect.transforms.Transformation delete mode 100644 connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector delete mode 100644 connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector delete mode 100644 storage/bin/main/message/ProducerSnapshot.json delete mode 100644 storage/bin/main/message/RemoteLogSegmentMetadataRecord.json delete mode 100644 storage/bin/main/message/RemoteLogSegmentMetadataSnapshotRecord.json delete mode 100644 storage/bin/main/message/RemoteLogSegmentMetadataUpdateRecord.json delete mode 100644 storage/bin/main/message/RemotePartitionDeleteMetadataRecord.json delete mode 100644 storage/bin/test/log4j2.yaml delete mode 100644 storage/bin/test/org/apache/kafka/tiered/storage/README.md delete mode 100644 storage/inkless/bin/main/db/migration/V10__Covering_index_on_batches.sql delete mode 100644 storage/inkless/bin/main/db/migration/V11__Add_diskless_offsets_to_logs.sql delete mode 100644 storage/inkless/bin/main/db/migration/V1__Create_tables.sql delete mode 100644 storage/inkless/bin/main/db/migration/V2__Speed_up_delete_topic_and_delete_records.sql delete mode 100644 storage/inkless/bin/main/db/migration/V3__Add_logs_size.sql delete mode 100644 storage/inkless/bin/main/db/migration/V4__Retention_enforcement.sql delete mode 100644 storage/inkless/bin/main/db/migration/V5__Fix_deadlock_in_delete_records_v1.sql delete mode 100644 storage/inkless/bin/main/db/migration/V6__Improve_understandability_of_delete_topic_v1.sql delete mode 100644 storage/inkless/bin/main/db/migration/V7__Find_batches_function.sql delete mode 100644 storage/inkless/bin/main/db/migration/V8__Find_batches_function_with_limit.sql delete mode 100644 storage/inkless/bin/main/db/migration/V9__Retention_enforcement_with_limits.sql delete mode 100644 storage/inkless/bin/main/message/CacheKey.json delete mode 100644 storage/inkless/bin/main/message/FileExtent.json delete mode 100644 storage/inkless/bin/test/META-INF/services/net.jqwik.api.providers.ArbitraryProvider delete mode 100644 storage/inkless/bin/test/log4j.properties delete mode 100644 storage/inkless/bin/test/test_gcs_credentials.json delete mode 100644 streams/integration-tests/bin/test/log4j2.yaml delete mode 100644 streams/integration-tests/bin/test/org/apache/kafka/streams/integration/StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala delete mode 100644 streams/integration-tests/bin/test/org/apache/kafka/streams/integration/WordCountTest.scala delete mode 100644 streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinScalaIntegrationTestBase.scala delete mode 100644 streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinTestData.scala delete mode 100644 test-common/test-common-internal-api/bin/main/org/apache/kafka/common/test/api/README.md delete mode 100644 test-common/test-common-internal-api/bin/test/log4j2.yaml delete mode 100644 test-common/test-common-runtime/bin/main/META-INF/services/org.junit.jupiter.api.extension.Extension delete mode 100644 test-common/test-common-runtime/bin/main/log4j2.yaml delete mode 100644 transaction-coordinator/bin/main/common/message/TransactionLogKey.json delete mode 100644 transaction-coordinator/bin/main/common/message/TransactionLogValue.json diff --git a/clients/clients-integration-tests/bin/test/log4j2.yaml b/clients/clients-integration-tests/bin/test/log4j2.yaml deleted file mode 100644 index 9825ef7c7d..0000000000 --- a/clients/clients-integration-tests/bin/test/log4j2.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -Configuration: - Properties: - Property: - - name: "logPattern" - value: "[%d] %p %m (%c:%L)%n" - - Appenders: - Console: - name: STDOUT - PatternLayout: - pattern: "${logPattern}" - - Loggers: - Root: - level: OFF - AppenderRef: - - ref: STDOUT - Logger: - - name: kafka - level: WARN - - - name: org.apache.kafka - level: WARN \ No newline at end of file diff --git a/connect/mirror/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector b/connect/mirror/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector deleted file mode 100644 index 4836e08f3e..0000000000 --- a/connect/mirror/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector +++ /dev/null @@ -1,18 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.mirror.MirrorCheckpointConnector -org.apache.kafka.connect.mirror.MirrorHeartbeatConnector -org.apache.kafka.connect.mirror.MirrorSourceConnector \ No newline at end of file diff --git a/connect/mirror/bin/test/log4j2.yaml b/connect/mirror/bin/test/log4j2.yaml deleted file mode 100644 index b63606d0ba..0000000000 --- a/connect/mirror/bin/test/log4j2.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -Configuration: - Properties: - Property: - - name: "logPattern" - value: "[%d] %p %X{connector.context}%m (%c:%L)%n" - - Appenders: - Console: - name: STDOUT - PatternLayout: - pattern: "${logPattern}" - - Loggers: - Root: - level: INFO - AppenderRef: - - ref: STDOUT - Logger: - - name: kafka - level: WARN - - - name: state.change.logger - level: "OFF" - - - name: org.apache.kafka.connect - level: DEBUG diff --git a/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy b/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy deleted file mode 100644 index 8b76ce452b..0000000000 --- a/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy +++ /dev/null @@ -1,18 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.connector.policy.AllConnectorClientConfigOverridePolicy -org.apache.kafka.connect.connector.policy.PrincipalConnectorClientConfigOverridePolicy -org.apache.kafka.connect.connector.policy.NoneConnectorClientConfigOverridePolicy \ No newline at end of file diff --git a/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.Converter deleted file mode 100644 index 364a2276f1..0000000000 --- a/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.Converter +++ /dev/null @@ -1,22 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.converters.ByteArrayConverter -org.apache.kafka.connect.converters.DoubleConverter -org.apache.kafka.connect.converters.FloatConverter -org.apache.kafka.connect.converters.IntegerConverter -org.apache.kafka.connect.converters.LongConverter -org.apache.kafka.connect.converters.ShortConverter -org.apache.kafka.connect.converters.BooleanConverter diff --git a/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter b/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter deleted file mode 100644 index 364a2276f1..0000000000 --- a/connect/runtime/bin/main/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter +++ /dev/null @@ -1,22 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.converters.ByteArrayConverter -org.apache.kafka.connect.converters.DoubleConverter -org.apache.kafka.connect.converters.FloatConverter -org.apache.kafka.connect.converters.IntegerConverter -org.apache.kafka.connect.converters.LongConverter -org.apache.kafka.connect.converters.ShortConverter -org.apache.kafka.connect.converters.BooleanConverter diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension deleted file mode 100644 index 0a1ef88924..0000000000 --- a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.runtime.isolation.PluginsTest$TestConnectRestExtension \ No newline at end of file diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.sink.SinkConnector b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.sink.SinkConnector deleted file mode 100644 index 818d09e618..0000000000 --- a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.sink.SinkConnector +++ /dev/null @@ -1,22 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.integration.BlockingConnectorTest$BlockingSinkConnector -org.apache.kafka.connect.integration.BlockingConnectorTest$TaskInitializeBlockingSinkConnector -org.apache.kafka.connect.integration.ErrantRecordSinkConnector -org.apache.kafka.connect.integration.MonitorableSinkConnector -org.apache.kafka.connect.integration.TestableSinkConnector -org.apache.kafka.connect.runtime.SampleSinkConnector -org.apache.kafka.connect.integration.ConnectWorkerIntegrationTest$EmptyTaskConfigsConnector \ No newline at end of file diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.source.SourceConnector b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.source.SourceConnector deleted file mode 100644 index 8ff259f887..0000000000 --- a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.source.SourceConnector +++ /dev/null @@ -1,26 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.integration.BlockingConnectorTest$BlockingConnector -org.apache.kafka.connect.integration.BlockingConnectorTest$InitializeBlockingConnector -org.apache.kafka.connect.integration.BlockingConnectorTest$ConfigBlockingConnector -org.apache.kafka.connect.integration.BlockingConnectorTest$ValidateBlockingConnector -org.apache.kafka.connect.integration.BlockingConnectorTest$BlockingSourceConnector -org.apache.kafka.connect.integration.BlockingConnectorTest$TaskInitializeBlockingSourceConnector -org.apache.kafka.connect.integration.ExactlyOnceSourceIntegrationTest$NaughtyConnector -org.apache.kafka.connect.integration.MonitorableSourceConnector -org.apache.kafka.connect.integration.TestableSourceConnector -org.apache.kafka.connect.runtime.SampleSourceConnector -org.apache.kafka.connect.runtime.rest.resources.ConnectorPluginsResourceTest$ConnectorPluginsResourceTestConnector diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.Converter deleted file mode 100644 index c58e40f243..0000000000 --- a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.Converter +++ /dev/null @@ -1,22 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.runtime.SampleConverterWithHeaders -org.apache.kafka.connect.runtime.ErrorHandlingTaskTest$FaultyConverter -org.apache.kafka.connect.runtime.isolation.PluginsTest$TestConverter -org.apache.kafka.connect.runtime.isolation.PluginsTest$TestInternalConverter -org.apache.kafka.connect.runtime.isolation.PluginUtilsTest$CollidingConverter -org.apache.kafka.connect.integration.ConnectorValidationIntegrationTest$TestConverterWithSinglePropertyConfigDef -org.apache.kafka.connect.integration.ConnectorValidationIntegrationTest$TestConverterWithNoConfigDef \ No newline at end of file diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter deleted file mode 100644 index b14690acaf..0000000000 --- a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter +++ /dev/null @@ -1,22 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.runtime.SampleHeaderConverter -org.apache.kafka.connect.runtime.ErrorHandlingTaskTest$FaultyConverter -org.apache.kafka.connect.runtime.isolation.PluginsTest$TestHeaderConverter -org.apache.kafka.connect.runtime.isolation.PluginsTest$TestInternalConverter -org.apache.kafka.connect.runtime.isolation.PluginUtilsTest$CollidingHeaderConverter -org.apache.kafka.connect.integration.ConnectorValidationIntegrationTest$TestConverterWithSinglePropertyConfigDef -org.apache.kafka.connect.integration.ConnectorValidationIntegrationTest$TestConverterWithNoConfigDef \ No newline at end of file diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.Transformation b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.Transformation deleted file mode 100644 index 6d36ee9088..0000000000 --- a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.Transformation +++ /dev/null @@ -1,23 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.integration.ErrorHandlingIntegrationTest$FaultyPassthrough -org.apache.kafka.connect.runtime.ErrorHandlingTaskTest$FaultyPassthrough -org.apache.kafka.connect.runtime.ConnectorConfigTest$SimpleTransformation -org.apache.kafka.connect.runtime.ConnectorConfigTest$HasDuplicateConfigTransformation -org.apache.kafka.connect.runtime.ConnectorConfigTest$AbstractKeyValueTransformation$Key -org.apache.kafka.connect.runtime.ConnectorConfigTest$AbstractKeyValueTransformation$Value -org.apache.kafka.connect.runtime.SampleTransformation -org.apache.kafka.connect.runtime.isolation.PluginUtilsTest$Colliding diff --git a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate b/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate deleted file mode 100644 index b235b1fec5..0000000000 --- a/connect/runtime/bin/test/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate +++ /dev/null @@ -1,17 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.runtime.ConnectorConfigTest$TestPredicate -org.apache.kafka.connect.runtime.SamplePredicate \ No newline at end of file diff --git a/connect/runtime/bin/test/log4j2.yaml b/connect/runtime/bin/test/log4j2.yaml deleted file mode 100644 index 45faa63537..0000000000 --- a/connect/runtime/bin/test/log4j2.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -Configuration: - Properties: - Property: - - name: "logPattern" - value: "[%d] %p %X{connector.context}%m (%c:%L)%n" - - Appenders: - Console: - name: STDOUT - PatternLayout: - pattern: "${logPattern}" - - Loggers: - Root: - level: INFO - AppenderRef: - - ref: STDOUT - Logger: - - name: kafka - level: WARN - - - name: state.change.logger - level: "OFF" - - - name: org.apache.kafka.connect - level: DEBUG - - # Troubleshooting KAFKA-17493. - - name: org.apache.kafka.consumer - level: DEBUG - - - name: org.apache.kafka.coordinator.group - level: DEBUG diff --git a/connect/runtime/bin/test/test-plugins/aliased-static-field/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/aliased-static-field/META-INF/services/org.apache.kafka.connect.storage.Converter deleted file mode 100644 index 1cdad430ed..0000000000 --- a/connect/runtime/bin/test/test-plugins/aliased-static-field/META-INF/services/org.apache.kafka.connect.storage.Converter +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.AliasedStaticField diff --git a/connect/runtime/bin/test/test-plugins/always-throw-exception/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/always-throw-exception/META-INF/services/org.apache.kafka.connect.storage.Converter deleted file mode 100644 index 084c96f96c..0000000000 --- a/connect/runtime/bin/test/test-plugins/always-throw-exception/META-INF/services/org.apache.kafka.connect.storage.Converter +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.AlwaysThrowException diff --git a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy deleted file mode 100644 index 9299056576..0000000000 --- a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.NoDefaultConstructorOverridePolicy diff --git a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension deleted file mode 100644 index 8b0f488148..0000000000 --- a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.rest.ConnectRestExtension +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.StaticInitializerThrowsRestExtension \ No newline at end of file diff --git a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.sink.SinkConnector b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.sink.SinkConnector deleted file mode 100644 index 8e843327a6..0000000000 --- a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.sink.SinkConnector +++ /dev/null @@ -1,22 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.DefaultConstructorPrivateConnector -test.plugins.DefaultConstructorThrowsConnector -test.plugins.InnocuousSinkConnector -test.plugins.NoDefaultConstructorConnector -test.plugins.StaticInitializerThrowsConnector -test.plugins.OuterClass$InnerClass -test.plugins.VersionMethodThrowsConnector diff --git a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.Converter deleted file mode 100644 index b4696e6999..0000000000 --- a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.Converter +++ /dev/null @@ -1,20 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.CoLocatedPlugin -test.plugins.DefaultConstructorThrowsConverter -test.plugins.MissingSuperclassConverter -test.plugins.NoDefaultConstructorConverter - diff --git a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter b/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter deleted file mode 100644 index dcf9c16d9a..0000000000 --- a/connect/runtime/bin/test/test-plugins/bad-packaging/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter +++ /dev/null @@ -1,17 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.DefaultConstructorThrowsConverter -test.plugins.NoDefaultConstructorConverter diff --git a/connect/runtime/bin/test/test-plugins/classpath-converter/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/classpath-converter/META-INF/services/org.apache.kafka.connect.storage.Converter deleted file mode 100644 index ae9c2a5820..0000000000 --- a/connect/runtime/bin/test/test-plugins/classpath-converter/META-INF/services/org.apache.kafka.connect.storage.Converter +++ /dev/null @@ -1,17 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.converters.ByteArrayConverter - diff --git a/connect/runtime/bin/test/test-plugins/multiple-plugins-in-jar/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/multiple-plugins-in-jar/META-INF/services/org.apache.kafka.connect.storage.Converter deleted file mode 100644 index ff148703cb..0000000000 --- a/connect/runtime/bin/test/test-plugins/multiple-plugins-in-jar/META-INF/services/org.apache.kafka.connect.storage.Converter +++ /dev/null @@ -1,17 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.ThingOne -test.plugins.ThingTwo diff --git a/connect/runtime/bin/test/test-plugins/non-migrated/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy b/connect/runtime/bin/test/test-plugins/non-migrated/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy deleted file mode 100644 index 82400f7255..0000000000 --- a/connect/runtime/bin/test/test-plugins/non-migrated/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.NonMigratedMultiPlugin diff --git a/connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/META-INF/services/org.apache.kafka.connect.storage.Converter deleted file mode 100644 index 89b0af611e..0000000000 --- a/connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/META-INF/services/org.apache.kafka.connect.storage.Converter +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.ReadVersionFromResource diff --git a/connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/version b/connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/version deleted file mode 100644 index 49584c02d0..0000000000 --- a/connect/runtime/bin/test/test-plugins/read-version-from-resource-v1/version +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -1.0.0 \ No newline at end of file diff --git a/connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/META-INF/services/org.apache.kafka.connect.storage.Converter deleted file mode 100644 index 89b0af611e..0000000000 --- a/connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/META-INF/services/org.apache.kafka.connect.storage.Converter +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.ReadVersionFromResource diff --git a/connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/version b/connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/version deleted file mode 100644 index 56d688a444..0000000000 --- a/connect/runtime/bin/test/test-plugins/read-version-from-resource-v2/version +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -2.0.0 \ No newline at end of file diff --git a/connect/runtime/bin/test/test-plugins/sampling-config-provider/META-INF/services/org.apache.kafka.common.config.provider.ConfigProvider b/connect/runtime/bin/test/test-plugins/sampling-config-provider/META-INF/services/org.apache.kafka.common.config.provider.ConfigProvider deleted file mode 100644 index 62d8df254b..0000000000 --- a/connect/runtime/bin/test/test-plugins/sampling-config-provider/META-INF/services/org.apache.kafka.common.config.provider.ConfigProvider +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.SamplingConfigProvider diff --git a/connect/runtime/bin/test/test-plugins/sampling-configurable/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/sampling-configurable/META-INF/services/org.apache.kafka.connect.storage.Converter deleted file mode 100644 index 232b881a39..0000000000 --- a/connect/runtime/bin/test/test-plugins/sampling-configurable/META-INF/services/org.apache.kafka.connect.storage.Converter +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.SamplingConfigurable diff --git a/connect/runtime/bin/test/test-plugins/sampling-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector b/connect/runtime/bin/test/test-plugins/sampling-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector deleted file mode 100644 index e83aba248a..0000000000 --- a/connect/runtime/bin/test/test-plugins/sampling-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.SamplingConnector diff --git a/connect/runtime/bin/test/test-plugins/sampling-converter/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/sampling-converter/META-INF/services/org.apache.kafka.connect.storage.Converter deleted file mode 100644 index 00ece8187b..0000000000 --- a/connect/runtime/bin/test/test-plugins/sampling-converter/META-INF/services/org.apache.kafka.connect.storage.Converter +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.SamplingConverter diff --git a/connect/runtime/bin/test/test-plugins/sampling-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter b/connect/runtime/bin/test/test-plugins/sampling-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter deleted file mode 100644 index 66291d24c6..0000000000 --- a/connect/runtime/bin/test/test-plugins/sampling-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.SamplingHeaderConverter diff --git a/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/org.apache.kafka.connect.storage.Converter deleted file mode 100644 index 3dfaea697b..0000000000 --- a/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/org.apache.kafka.connect.storage.Converter +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.ServiceLoaderPlugin diff --git a/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/test.plugins.ServiceLoadedClass b/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/test.plugins.ServiceLoadedClass deleted file mode 100644 index b8db865648..0000000000 --- a/connect/runtime/bin/test/test-plugins/service-loader/META-INF/services/test.plugins.ServiceLoadedClass +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.ServiceLoadedSubclass \ No newline at end of file diff --git a/connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy b/connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy deleted file mode 100644 index e4f915c855..0000000000 --- a/connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.SubclassOfClasspathOverridePolicy diff --git a/connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.storage.Converter deleted file mode 100644 index 418027308e..0000000000 --- a/connect/runtime/bin/test/test-plugins/subclass-of-classpath/META-INF/services/org.apache.kafka.connect.storage.Converter +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.SubclassOfClasspathConverter diff --git a/connect/runtime/bin/test/test-plugins/versioned-converter/META-INF/services/org.apache.kafka.connect.storage.Converter b/connect/runtime/bin/test/test-plugins/versioned-converter/META-INF/services/org.apache.kafka.connect.storage.Converter deleted file mode 100644 index d37bb90859..0000000000 --- a/connect/runtime/bin/test/test-plugins/versioned-converter/META-INF/services/org.apache.kafka.connect.storage.Converter +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.VersionedConverter diff --git a/connect/runtime/bin/test/test-plugins/versioned-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter b/connect/runtime/bin/test/test-plugins/versioned-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter deleted file mode 100644 index 25e4b7665b..0000000000 --- a/connect/runtime/bin/test/test-plugins/versioned-header-converter/META-INF/services/org.apache.kafka.connect.storage.HeaderConverter +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.VersionedHeaderConverter diff --git a/connect/runtime/bin/test/test-plugins/versioned-predicate/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate b/connect/runtime/bin/test/test-plugins/versioned-predicate/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate deleted file mode 100644 index af841817ab..0000000000 --- a/connect/runtime/bin/test/test-plugins/versioned-predicate/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.VersionedPredicate diff --git a/connect/runtime/bin/test/test-plugins/versioned-sink-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector b/connect/runtime/bin/test/test-plugins/versioned-sink-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector deleted file mode 100644 index a5c560853f..0000000000 --- a/connect/runtime/bin/test/test-plugins/versioned-sink-connector/META-INF/services/org.apache.kafka.connect.sink.SinkConnector +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.VersionedSinkConnector diff --git a/connect/runtime/bin/test/test-plugins/versioned-source-connector/META-INF/services/org.apache.kafka.connect.source.SourceConnector b/connect/runtime/bin/test/test-plugins/versioned-source-connector/META-INF/services/org.apache.kafka.connect.source.SourceConnector deleted file mode 100644 index efee272749..0000000000 --- a/connect/runtime/bin/test/test-plugins/versioned-source-connector/META-INF/services/org.apache.kafka.connect.source.SourceConnector +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.VersionedSourceConnector diff --git a/connect/runtime/bin/test/test-plugins/versioned-transformation/META-INF/services/org.apache.kafka.connect.transforms.Transformation b/connect/runtime/bin/test/test-plugins/versioned-transformation/META-INF/services/org.apache.kafka.connect.transforms.Transformation deleted file mode 100644 index 7fed78370f..0000000000 --- a/connect/runtime/bin/test/test-plugins/versioned-transformation/META-INF/services/org.apache.kafka.connect.transforms.Transformation +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -test.plugins.VersionedTransformation diff --git a/connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector b/connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector deleted file mode 100644 index 170043754d..0000000000 --- a/connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector +++ /dev/null @@ -1,17 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.tools.MockSinkConnector -org.apache.kafka.connect.tools.VerifiableSinkConnector diff --git a/connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector b/connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector deleted file mode 100644 index acc2ddce71..0000000000 --- a/connect/test-plugins/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector +++ /dev/null @@ -1,18 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.tools.MockSourceConnector -org.apache.kafka.connect.tools.SchemaSourceConnector -org.apache.kafka.connect.tools.VerifiableSourceConnector \ No newline at end of file diff --git a/storage/bin/main/message/ProducerSnapshot.json b/storage/bin/main/message/ProducerSnapshot.json deleted file mode 100644 index cdb9da8330..0000000000 --- a/storage/bin/main/message/ProducerSnapshot.json +++ /dev/null @@ -1,85 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -{ - "type": "data", - "name": "ProducerSnapshot", - "validVersions": "1", - "flexibleVersions": "none", - "fields": [ - { - "name": "Crc", - "type": "uint32", - "versions": "1", - "about": "CRC of the snapshot data" - }, - { - "name": "ProducerEntries", - "type": "[]ProducerEntry", - "versions": "1", - "about": "The entries in the producer table", - "fields": [ - { - "name": "ProducerId", - "type": "int64", - "versions": "1", - "about": "The producer ID" - }, - { - "name": "Epoch", - "type": "int16", - "versions": "1", - "about": "Current epoch of the producer" - }, - { - "name": "LastSequence", - "type": "int32", - "versions": "1", - "about": "Last written sequence of the producer" - }, - { - "name": "LastOffset", - "type": "int64", - "versions": "1", - "about": "Last written offset of the producer" - }, - { - "name": "OffsetDelta", - "type": "int32", - "versions": "1", - "about": "The difference of the last sequence and first sequence in the last written batch" - }, - { - "name": "Timestamp", - "type": "int64", - "versions": "1", - "about": "Max timestamp from the last written entry" - }, - { - "name": "CoordinatorEpoch", - "type": "int32", - "versions": "1", - "about": "The epoch of the last transaction coordinator to send an end transaction marker" - }, - { - "name": "CurrentTxnFirstOffset", - "type": "int64", - "versions": "1", - "about": "The first offset of the on-going transaction (-1 if there is none)" - } - ] - } - ] -} diff --git a/storage/bin/main/message/RemoteLogSegmentMetadataRecord.json b/storage/bin/main/message/RemoteLogSegmentMetadataRecord.json deleted file mode 100644 index 9c035f5263..0000000000 --- a/storage/bin/main/message/RemoteLogSegmentMetadataRecord.json +++ /dev/null @@ -1,142 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -{ - "apiKey": 0, - "type": "metadata", - "name": "RemoteLogSegmentMetadataRecord", - "validVersions": "0", - "flexibleVersions": "0+", - "fields": [ - { - "name": "RemoteLogSegmentId", - "type": "RemoteLogSegmentIdEntry", - "versions": "0+", - "about": "Unique representation of the remote log segment.", - "fields": [ - { - "name": "TopicIdPartition", - "type": "TopicIdPartitionEntry", - "versions": "0+", - "about": "Represents unique topic partition.", - "fields": [ - { - "name": "Name", - "type": "string", - "versions": "0+", - "about": "Topic name." - }, - { - "name": "Id", - "type": "uuid", - "versions": "0+", - "about": "Unique identifier of the topic." - }, - { - "name": "Partition", - "type": "int32", - "versions": "0+", - "about": "Partition number." - } - ] - }, - { - "name": "Id", - "type": "uuid", - "versions": "0+", - "about": "Unique identifier of the remote log segment." - } - ] - }, - { - "name": "StartOffset", - "type": "int64", - "versions": "0+", - "about": "Start offset of the segment." - }, - { - "name": "EndOffset", - "type": "int64", - "versions": "0+", - "about": "End offset of the segment." - }, - { - "name": "BrokerId", - "type": "int32", - "versions": "0+", - "about": "Broker id from which this event is generated." - }, - { - "name": "MaxTimestampMs", - "type": "int64", - "versions": "0+", - "about": "Maximum timestamp in milli seconds with in this segment." - }, - { - "name": "EventTimestampMs", - "type": "int64", - "versions": "0+", - "about": "Epoch time in milli seconds at which this event is generated." - }, - { - "name": "SegmentLeaderEpochs", - "type": "[]SegmentLeaderEpochEntry", - "versions": "0+", - "about": "Leader epoch to start-offset mappings for the records with in this segment.", - "fields": [ - { - "name": "LeaderEpoch", - "type": "int32", - "versions": "0+", - "about": "Leader epoch" - }, - { - "name": "Offset", - "type": "int64", - "versions": "0+", - "about": "Start offset for the leader epoch." - } - ] - }, - { - "name": "SegmentSizeInBytes", - "type": "int32", - "versions": "0+", - "about": "Segment size in bytes." - }, - { - "name": "CustomMetadata", - "type": "bytes", - "default": "null", - "versions": "0+", - "nullableVersions": "0+", - "about": "Custom metadata." - }, - { - "name": "RemoteLogSegmentState", - "type": "int8", - "versions": "0+", - "about": "State identifier of the remote log segment, which is RemoteLogSegmentState.id()." - }, - { - "name": "TxnIndexEmpty", - "type": "bool", - "versions": "0+", - "about": "Flag to indicate if the transaction index is empty.", - "taggedVersions": "0+", - "tag": 0 - } - ] -} \ No newline at end of file diff --git a/storage/bin/main/message/RemoteLogSegmentMetadataSnapshotRecord.json b/storage/bin/main/message/RemoteLogSegmentMetadataSnapshotRecord.json deleted file mode 100644 index f4a1f19dca..0000000000 --- a/storage/bin/main/message/RemoteLogSegmentMetadataSnapshotRecord.json +++ /dev/null @@ -1,108 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -{ - "apiKey": 3, - "type": "metadata", - "name": "RemoteLogSegmentMetadataSnapshotRecord", - "validVersions": "0", - "flexibleVersions": "0+", - "fields": [ - { - "name": "SegmentId", - "type": "uuid", - "versions": "0+", - "about": "Unique identifier of the log segment" - }, - { - "name": "StartOffset", - "type": "int64", - "versions": "0+", - "about": "Start offset of the segment." - }, - { - "name": "EndOffset", - "type": "int64", - "versions": "0+", - "about": "End offset of the segment." - }, - { - "name": "BrokerId", - "type": "int32", - "versions": "0+", - "about": "Broker (controller or leader) id from which this event is created or updated." - }, - { - "name": "MaxTimestampMs", - "type": "int64", - "versions": "0+", - "about": "Maximum timestamp with in this segment." - }, - { - "name": "EventTimestampMs", - "type": "int64", - "versions": "0+", - "about": "Event timestamp of this segment." - }, - { - "name": "SegmentLeaderEpochs", - "type": "[]SegmentLeaderEpochEntry", - "versions": "0+", - "about": "Leader epochs of this segment.", - "fields": [ - { - "name": "LeaderEpoch", - "type": "int32", - "versions": "0+", - "about": "Leader epoch" - }, - { - "name": "Offset", - "type": "int64", - "versions": "0+", - "about": "Start offset for the leader epoch" - } - ] - }, - { - "name": "SegmentSizeInBytes", - "type": "int32", - "versions": "0+", - "about": "Segment size in bytes" - }, - { - "name": "CustomMetadata", - "type": "bytes", - "default": "null", - "versions": "0+", - "nullableVersions": "0+", - "about": "Custom metadata." - }, - { - "name": "RemoteLogSegmentState", - "type": "int8", - "versions": "0+", - "about": "State of the remote log segment" - }, - { - "name": "TxnIndexEmpty", - "type": "bool", - "versions": "0+", - "about": "Flag to indicate if the transaction index is empty.", - "taggedVersions": "0+", - "tag": 0 - } - ] -} \ No newline at end of file diff --git a/storage/bin/main/message/RemoteLogSegmentMetadataUpdateRecord.json b/storage/bin/main/message/RemoteLogSegmentMetadataUpdateRecord.json deleted file mode 100644 index 48aa34d4e9..0000000000 --- a/storage/bin/main/message/RemoteLogSegmentMetadataUpdateRecord.json +++ /dev/null @@ -1,90 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -{ - "apiKey": 1, - "type": "metadata", - "name": "RemoteLogSegmentMetadataUpdateRecord", - "validVersions": "0", - "flexibleVersions": "0+", - "fields": [ - { - "name": "RemoteLogSegmentId", - "type": "RemoteLogSegmentIdEntry", - "versions": "0+", - "about": "Unique representation of the remote log segment.", - "fields": [ - { - "name": "TopicIdPartition", - "type": "TopicIdPartitionEntry", - "versions": "0+", - "about": "Represents unique topic partition.", - "fields": [ - { - "name": "Name", - "type": "string", - "versions": "0+", - "about": "Topic name." - }, - { - "name": "Id", - "type": "uuid", - "versions": "0+", - "about": "Unique identifier of the topic." - }, - { - "name": "Partition", - "type": "int32", - "versions": "0+", - "about": "Partition number." - } - ] - }, - { - "name": "Id", - "type": "uuid", - "versions": "0+", - "about": "Unique identifier of the remote log segment." - } - ] - }, - { - "name": "BrokerId", - "type": "int32", - "versions": "0+", - "about": "Broker id from which this event is generated." - }, - { - "name": "EventTimestampMs", - "type": "int64", - "versions": "0+", - "about": "Epoch time in milli seconds at which this event is generated." - }, - { - "name": "CustomMetadata", - "type": "bytes", - "default": "null", - "versions": "0+", - "nullableVersions": "0+", - "about": "Custom metadata." - }, - { - "name": "RemoteLogSegmentState", - "type": "int8", - "versions": "0+", - "about": "State identifier of the remote log segment, which is RemoteLogSegmentState.id()." - } - ] -} \ No newline at end of file diff --git a/storage/bin/main/message/RemotePartitionDeleteMetadataRecord.json b/storage/bin/main/message/RemotePartitionDeleteMetadataRecord.json deleted file mode 100644 index f5e955bcea..0000000000 --- a/storage/bin/main/message/RemotePartitionDeleteMetadataRecord.json +++ /dev/null @@ -1,68 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -{ - "apiKey": 2, - "type": "metadata", - "name": "RemotePartitionDeleteMetadataRecord", - "validVersions": "0", - "flexibleVersions": "0+", - "fields": [ - { - "name": "TopicIdPartition", - "type": "TopicIdPartitionEntry", - "versions": "0+", - "about": "Represents unique topic partition.", - "fields": [ - { - "name": "Name", - "type": "string", - "versions": "0+", - "about": "Topic name." - }, - { - "name": "Id", - "type": "uuid", - "versions": "0+", - "about": "Unique identifier of the topic." - }, - { - "name": "Partition", - "type": "int32", - "versions": "0+", - "about": "Partition number." - } - ] - }, - { - "name": "BrokerId", - "type": "int32", - "versions": "0+", - "about": "Broker (controller or leader) id from which this event is created. DELETE_PARTITION_MARKED is sent by the controller. DELETE_PARTITION_STARTED and DELETE_PARTITION_FINISHED are sent by remote log metadata topic partition leader." - }, - { - "name": "EventTimestampMs", - "type": "int64", - "versions": "0+", - "about": "Epoch time in milli seconds at which this event is generated." - }, - { - "name": "RemotePartitionDeleteState", - "type": "int8", - "versions": "0+", - "about": "Deletion state identifier of the remote partition, which is RemotePartitionDeleteState.id()." - } - ] -} \ No newline at end of file diff --git a/storage/bin/test/log4j2.yaml b/storage/bin/test/log4j2.yaml deleted file mode 100644 index 2d017d3b58..0000000000 --- a/storage/bin/test/log4j2.yaml +++ /dev/null @@ -1,57 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -Configuration: - Properties: - Property: - - name: "logPattern" - value: "[%d] %p %m (%c:%L)%n" - - name: "fileLogPattern" - value: "%d [%t] %-5p %c %x - %m%n" - - Appenders: - Console: - name: STDOUT - PatternLayout: - pattern: "${logPattern}" - RollingFile: - - name: FileAppender - fileName: build/kafka-storage-test/storage.log - filePattern: "build/kafka-storage-test/storage-%d{yyyy-MM-dd}.log" - PatternLayout: - pattern: "${fileLogPattern}" - TimeBasedTriggeringPolicy: - interval: 1 - - Loggers: - Root: - level: OFF - AppenderRef: - - ref: STDOUT - Logger: - - name: org.apache.kafka.server.log.remote.storage - level: INFO - AppenderRef: - - ref: FileAppender - - - name: org.apache.kafka.server.log.remote.metadata.storage - level: INFO - AppenderRef: - - ref: FileAppender - - - name: kafka.log.remote - level: INFO - AppenderRef: - - ref: FileAppender diff --git a/storage/bin/test/org/apache/kafka/tiered/storage/README.md b/storage/bin/test/org/apache/kafka/tiered/storage/README.md deleted file mode 100644 index ddfc102167..0000000000 --- a/storage/bin/test/org/apache/kafka/tiered/storage/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# The Test Flow - -Step 1: For every test, setup is done via TieredStorageTestHarness which extends IntegrationTestHarness and sets up a cluster with TS enabled on it. - -Step 2: The test is written as a specification consisting of sequential actions and assertions. The spec for the complete test is written down first which creates "actions" to be executed. - -Step 3: Once we have the test spec in-place (which includes assertion actions), we execute the test which will execute each action sequentially. - -Step 4: The test execution stops when any of the action throws an exception (or an assertion error). - -Step 5: Clean-up for the test is performed on test exit \ No newline at end of file diff --git a/storage/inkless/bin/main/db/migration/V10__Covering_index_on_batches.sql b/storage/inkless/bin/main/db/migration/V10__Covering_index_on_batches.sql deleted file mode 100644 index 7ddd08aa3c..0000000000 --- a/storage/inkless/bin/main/db/migration/V10__Covering_index_on_batches.sql +++ /dev/null @@ -1,9 +0,0 @@ --- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ - --- Create index on the same columns as batches_by_last_offset_idx, but including also other --- columns that are useful to speed up the scans done by RetentionEnforcer. -CREATE INDEX batches_by_last_offset_covering_idx ON batches (topic_id, partition, last_offset) - INCLUDE (base_offset, byte_size, timestamp_type, batch_max_timestamp, log_append_timestamp); - --- This index now is a duplicate and it's not needed anymore. -DROP INDEX batches_by_last_offset_idx; diff --git a/storage/inkless/bin/main/db/migration/V11__Add_diskless_offsets_to_logs.sql b/storage/inkless/bin/main/db/migration/V11__Add_diskless_offsets_to_logs.sql deleted file mode 100644 index 1d6cc731c9..0000000000 --- a/storage/inkless/bin/main/db/migration/V11__Add_diskless_offsets_to_logs.sql +++ /dev/null @@ -1,3 +0,0 @@ --- Copyright (c) 2024-2025 Aiven, Helsinki, Finland. https://aiven.io/ -ALTER TABLE logs ADD COLUMN diskless_start_offset offset_nullable_t DEFAULT NULL; -ALTER TABLE logs ADD COLUMN diskless_end_offset offset_nullable_t DEFAULT NULL; \ No newline at end of file diff --git a/storage/inkless/bin/main/db/migration/V1__Create_tables.sql b/storage/inkless/bin/main/db/migration/V1__Create_tables.sql deleted file mode 100644 index 3eec1ae8db..0000000000 --- a/storage/inkless/bin/main/db/migration/V1__Create_tables.sql +++ /dev/null @@ -1,1035 +0,0 @@ --- Copyright (c) 2024-2025 Aiven, Helsinki, Finland. https://aiven.io/ -CREATE DOMAIN broker_id_t AS INT NOT NULL; - -CREATE DOMAIN topic_id_t AS UUID NOT NULL; - -CREATE DOMAIN partition_t AS INT NOT NULL -CHECK (VALUE >= 0); - -CREATE DOMAIN topic_name_t VARCHAR(255) NOT NULL; - -CREATE DOMAIN magic_t AS SMALLINT NOT NULL -CHECK (VALUE >= 0 AND VALUE <= 2); - -CREATE DOMAIN format_t AS SMALLINT NOT NULL -CHECK (value >= 1 AND VALUE <= 3); - -CREATE DOMAIN offset_nullable_t BIGINT -CHECK (VALUE IS NULL OR VALUE >= 0); -CREATE DOMAIN offset_t AS offset_nullable_t -CHECK (VALUE IS NOT NULL); -CREATE DOMAIN offset_with_minus_one_t BIGINT -CHECK (VALUE IS NOT NULL AND VALUE >= -1); - -CREATE DOMAIN byte_offset_t BIGINT NOT NULL -CHECK (VALUE >= 0); - -CREATE DOMAIN byte_size_t BIGINT NOT NULL -CHECK (VALUE >= 0); - -CREATE DOMAIN object_key_t AS VARCHAR(1024) NOT NULL; - -CREATE DOMAIN timestamp_type_t AS SMALLINT NOT NULL -CHECK (VALUE >= -1 AND VALUE <= 1); - -CREATE DOMAIN timestamp_t AS BIGINT NOT NULL -CHECK (VALUE >= -5); - -CREATE DOMAIN producer_id_t AS BIGINT NOT NULL -CHECK (VALUE >= -1); - -CREATE DOMAIN producer_epoch_t AS SMALLINT NOT NULL -CHECK (VALUE >= -1); - -CREATE DOMAIN sequence_t AS INT NOT NULL -CHECK (VALUE >= -1); - -CREATE TABLE logs ( - topic_id topic_id_t, - partition partition_t, - topic_name topic_name_t, - log_start_offset offset_t, - high_watermark offset_t, - PRIMARY KEY(topic_id, partition) -); - --- The reasons why a file on the remote storage exists. -CREATE TYPE file_reason_t AS ENUM ( - -- Uploaded by a broker as the result of producing. - 'produce', - -- Uploaded by a broker as the result of merging. - 'merge' -); - -CREATE TYPE file_state_t AS ENUM ( - -- Uploaded by a broker, in use, etc. - 'uploaded', - -- Marked for deletion. - 'deleting' -); - -CREATE TABLE files ( - file_id BIGSERIAL PRIMARY KEY, - object_key object_key_t UNIQUE NOT NULL, - format format_t, - reason file_reason_t NOT NULL, - state file_state_t NOT NULL, - uploader_broker_id broker_id_t, - committed_at TIMESTAMP WITH TIME ZONE, - marked_for_deletion_at TIMESTAMP WITH TIME ZONE, - size byte_size_t -); - -CREATE INDEX files_by_state_only_deleting_idx ON files (state) WHERE state = 'deleting'; - -CREATE TABLE batches ( - batch_id BIGSERIAL PRIMARY KEY, - magic magic_t, - topic_id topic_id_t, - partition partition_t, - base_offset offset_t, - last_offset offset_t, - file_id BIGINT NOT NULL, - byte_offset byte_offset_t, - byte_size byte_size_t, - timestamp_type timestamp_type_t, - log_append_timestamp timestamp_t, - batch_max_timestamp timestamp_t, - CONSTRAINT fk_batches_logs FOREIGN KEY (topic_id, partition) REFERENCES logs(topic_id, partition) - ON DELETE NO ACTION ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED, -- allow deleting logs before batches - CONSTRAINT fk_batches_files FOREIGN KEY (file_id) REFERENCES files(file_id) ON DELETE RESTRICT ON UPDATE CASCADE -); --- This index should also cover fk_batches_logs. -CREATE INDEX batches_by_last_offset_idx ON batches (topic_id, partition, last_offset); --- This index covers fk_batches_files. -CREATE INDEX batches_by_file ON batches (file_id); - -CREATE TABLE producer_state ( - topic_id topic_id_t, - partition partition_t, - producer_id producer_id_t, - row_id BIGSERIAL, - producer_epoch producer_epoch_t, - base_sequence sequence_t, - last_sequence sequence_t, - assigned_offset offset_t, - batch_max_timestamp timestamp_t, - PRIMARY KEY (topic_id, partition, producer_id, row_id) -); - -CREATE TYPE commit_batch_request_v1 AS ( - magic magic_t, - topic_id topic_id_t, - partition partition_t, - byte_offset byte_offset_t, - byte_size byte_size_t, - base_offset offset_t, - last_offset offset_t, - timestamp_type timestamp_type_t, - batch_max_timestamp timestamp_t, - producer_id producer_id_t, - producer_epoch producer_epoch_t, - base_sequence sequence_t, - last_sequence sequence_t -); - -CREATE TYPE commit_batch_response_error_v1 AS ENUM ( - 'none', - -- errors - 'nonexistent_log', - 'invalid_producer_epoch', - 'sequence_out_of_order', - 'duplicate_batch' -); - -CREATE TYPE commit_batch_response_v1 AS ( - topic_id topic_id_t, - partition partition_t, - log_start_offset offset_nullable_t, - assigned_base_offset offset_nullable_t, - batch_timestamp timestamp_t, - error commit_batch_response_error_v1 -); - -CREATE FUNCTION commit_file_v1( - arg_object_key object_key_t, - arg_format format_t, - arg_uploader_broker_id broker_id_t, - arg_file_size byte_size_t, - arg_now TIMESTAMP WITH TIME ZONE, - arg_requests commit_batch_request_v1[] -) -RETURNS SETOF commit_batch_response_v1 LANGUAGE plpgsql VOLATILE AS $$ -DECLARE - l_new_file_id BIGINT; - l_request RECORD; - l_log logs%ROWTYPE; - l_duplicate RECORD; - l_assigned_offset offset_nullable_t; - l_new_high_watermark offset_nullable_t; - l_last_sequence_in_producer_epoch BIGINT; -BEGIN - INSERT INTO files (object_key, format, reason, state, uploader_broker_id, committed_at, size) - VALUES (arg_object_key, arg_format, 'produce', 'uploaded', arg_uploader_broker_id, arg_now, arg_file_size) - RETURNING file_id - INTO l_new_file_id; - - -- We use this temporary table to perform the write operations in loop on it first - -- and only then dump the result on the real table. This reduces the WAL pressure and latency of the function. - CREATE TEMPORARY TABLE logs_tmp - ON COMMIT DROP - AS - -- Extract the relevant logs into the temporary table and simultaneously lock them. - -- topic_name and log_start_offset aren't technically needed, but having them allows declaring `l_log logs%ROWTYPE`. - SELECT * - FROM logs - WHERE (topic_id, partition) IN (SELECT DISTINCT topic_id, partition FROM unnest(arg_requests)) - ORDER BY topic_id, partition -- ordering is important to prevent deadlocks - FOR UPDATE; - - FOR l_request IN - SELECT * - FROM unnest(arg_requests) - LOOP - -- A small optimization: select the log into a variable only if it's a different topic-partition. - -- Batches are sorted by topic-partitions, so this makes sense. - IF l_log.topic_id IS DISTINCT FROM l_request.topic_id - OR l_log.partition IS DISTINCT FROM l_request.partition THEN - - SELECT * - FROM logs_tmp - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - INTO l_log; - - IF NOT FOUND THEN - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'nonexistent_log')::commit_batch_response_v1; - CONTINUE; - END IF; - END IF; - - l_assigned_offset = l_log.high_watermark; - - -- Validate that the new request base sequence is not larger than the previous batch last sequence - IF l_request.producer_id > -1 AND l_request.producer_epoch > -1 - THEN - -- If there are previous batches for the producer, check that the producer epoch is not smaller than the last batch - IF EXISTS ( - SELECT 1 - FROM producer_state - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND producer_id = l_request.producer_id - AND producer_epoch > l_request.producer_epoch - ) THEN - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'invalid_producer_epoch')::commit_batch_response_v1; - CONTINUE; - END IF; - - SELECT MAX(last_sequence) - INTO l_last_sequence_in_producer_epoch - FROM producer_state - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND producer_id = l_request.producer_id - AND producer_epoch = l_request.producer_epoch; - - -- If there are previous batches for the producer - IF l_last_sequence_in_producer_epoch IS NULL THEN - -- If there are no previous batches for the producer, the base sequence must be 0 - IF l_request.base_sequence <> 0 - THEN - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'sequence_out_of_order')::commit_batch_response_v1; - CONTINUE; - END IF; - ELSE - -- Check for duplicates - SELECT * - FROM producer_state - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND producer_id = l_request.producer_id - AND producer_epoch = l_request.producer_epoch - AND base_sequence = l_request.base_sequence - AND last_sequence = l_request.last_sequence - INTO l_duplicate; - IF FOUND THEN - RETURN NEXT (l_request.topic_id, l_request.partition, l_log.log_start_offset, l_duplicate.assigned_offset, l_duplicate.batch_max_timestamp, 'duplicate_batch')::commit_batch_response_v1; - CONTINUE; - END IF; - - -- Check that the sequence is not out of order. - -- A sequence is out of order if the base sequence is not a continuation of the last sequence - -- or, in case of wraparound, the base sequence must be 0 and the last sequence must be 2147483647 (Integer.MAX_VALUE). - IF (l_request.base_sequence - 1) <> l_last_sequence_in_producer_epoch OR (l_last_sequence_in_producer_epoch = 2147483647 AND l_request.base_sequence <> 0) THEN - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'sequence_out_of_order')::commit_batch_response_v1; - CONTINUE; - END IF; - END IF; - - INSERT INTO producer_state ( - topic_id, partition, producer_id, - producer_epoch, base_sequence, last_sequence, assigned_offset, batch_max_timestamp - ) - VALUES ( - l_request.topic_id, l_request.partition, l_request.producer_id, - l_request.producer_epoch, l_request.base_sequence, l_request.last_sequence, l_assigned_offset, l_request.batch_max_timestamp - ); - -- Keep only the last 5 records. - -- 5 == org.apache.kafka.storage.internals.log.ProducerStateEntry.NUM_BATCHES_TO_RETAIN - DELETE FROM producer_state - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND producer_id = l_request.producer_id - AND row_id <= ( - SELECT row_id - FROM producer_state - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND producer_id = l_request.producer_id - ORDER BY row_id DESC - LIMIT 1 - OFFSET 5 - ); - END IF; - - UPDATE logs_tmp - SET high_watermark = high_watermark + (l_request.last_offset - l_request.base_offset + 1) - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - RETURNING high_watermark - INTO l_new_high_watermark; - - l_log.high_watermark = l_new_high_watermark; - - INSERT INTO batches ( - magic, - topic_id, partition, - base_offset, - last_offset, - file_id, - byte_offset, byte_size, - timestamp_type, log_append_timestamp, batch_max_timestamp - ) - VALUES ( - l_request.magic, - l_request.topic_id, l_request.partition, - l_assigned_offset, - l_new_high_watermark - 1, - l_new_file_id, - l_request.byte_offset, l_request.byte_size, - l_request.timestamp_type, - (EXTRACT(EPOCH FROM arg_now AT TIME ZONE 'UTC') * 1000)::BIGINT, - l_request.batch_max_timestamp - ); - - RETURN NEXT (l_request.topic_id, l_request.partition, l_log.log_start_offset, l_assigned_offset, l_request.batch_max_timestamp, 'none')::commit_batch_response_v1; - END LOOP; - - -- Transfer from the temporary to real table. - UPDATE logs - SET high_watermark = logs_tmp.high_watermark - FROM logs_tmp - WHERE logs.topic_id = logs_tmp.topic_id - AND logs.partition = logs_tmp.partition; - - IF NOT EXISTS (SELECT 1 FROM batches WHERE file_id = l_new_file_id LIMIT 1) THEN - PERFORM mark_file_to_delete_v1(arg_now, l_new_file_id); - END IF; -END; -$$ -; - -CREATE FUNCTION delete_topic_v1( - arg_now TIMESTAMP WITH TIME ZONE, - arg_topic_ids UUID[] -) -RETURNS VOID LANGUAGE plpgsql VOLATILE AS $$ -DECLARE - l_log RECORD; -BEGIN - FOR l_log IN - DELETE FROM logs - WHERE topic_id = ANY(arg_topic_ids) - RETURNING logs.* - LOOP - PERFORM delete_batch_v1(arg_now, batch_id) - FROM batches - WHERE topic_id = l_log.topic_id - AND partition = l_log.partition; - END LOOP; -END; -$$ -; - -CREATE DOMAIN bigint_not_nullable_t BIGINT -CHECK (VALUE IS NOT NULL); -CREATE TYPE delete_records_request_v1 AS ( - topic_id topic_id_t, - partition partition_t, - -- We need to accept values lower than -1 so we can return the correct offset_out_of_range error for them. - "offset" bigint_not_nullable_t -); - -CREATE TYPE delete_records_response_error_v1 AS ENUM ( - 'unknown_topic_or_partition', 'offset_out_of_range' -); - -CREATE TYPE delete_records_response_v1 AS ( - topic_id topic_id_t, - partition partition_t, - error delete_records_response_error_v1, - log_start_offset offset_nullable_t -); - -CREATE FUNCTION delete_records_v1( - arg_now TIMESTAMP WITH TIME ZONE, - arg_requests delete_records_request_v1[] -) -RETURNS SETOF delete_records_response_v1 LANGUAGE plpgsql VOLATILE AS $$ -DECLARE - l_request RECORD; - l_log RECORD; - l_converted_offset BIGINT = -1; -BEGIN - FOR l_request IN - SELECT * - FROM unnest(arg_requests) - LOOP - SELECT * - FROM logs - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - ORDER BY topic_id, partition -- ordering is important to prevent deadlocks - FOR UPDATE - INTO l_log; - - IF NOT FOUND THEN - RETURN NEXT (l_request.topic_id, l_request.partition, 'unknown_topic_or_partition', NULL)::delete_records_response_v1; - CONTINUE; - END IF; - - l_converted_offset = CASE - -- -1 = org.apache.kafka.common.requests.DeleteRecordsRequest.HIGH_WATERMARK - WHEN l_request.offset = -1 THEN l_log.high_watermark - ELSE l_request.offset - END; - - IF l_converted_offset < 0 OR l_converted_offset > l_log.high_watermark THEN - RETURN NEXT (l_request.topic_id, l_request.partition, 'offset_out_of_range', NULL)::delete_records_response_v1; - CONTINUE; - END IF; - - IF l_converted_offset > l_log.log_start_offset THEN - UPDATE logs - SET log_start_offset = l_converted_offset - WHERE topic_id = l_log.topic_id - AND partition = l_log.partition; - l_log.log_start_offset = l_converted_offset; - END IF; - - PERFORM delete_batch_v1(arg_now, batches.batch_id) - FROM batches - WHERE topic_id = l_log.topic_id - AND partition = l_log.partition - AND last_offset < l_log.log_start_offset; - - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, l_log.log_start_offset)::delete_records_response_v1; - END LOOP; -END; -$$ -; - -CREATE FUNCTION delete_batch_v1( - arg_now TIMESTAMP WITH TIME ZONE, - arg_batch_id BIGINT -) -RETURNS VOID LANGUAGE plpgsql VOLATILE AS $$ -DECLARE - l_file_id BIGINT; -BEGIN - DELETE FROM batches - WHERE batch_id = arg_batch_id - RETURNING file_id - INTO l_file_id; - - IF NOT EXISTS (SELECT 1 FROM batches WHERE file_id = l_file_id LIMIT 1) THEN - PERFORM mark_file_to_delete_v1(arg_now, l_file_id); - END IF; -END; -$$ -; - -CREATE FUNCTION mark_file_to_delete_v1( - arg_now TIMESTAMP WITH TIME ZONE, - arg_file_id BIGINT -) -RETURNS VOID LANGUAGE plpgsql VOLATILE AS $$ -BEGIN - UPDATE files - SET state = 'deleting', - marked_for_deletion_at = arg_now - WHERE file_id = arg_file_id; -END; -$$ -; - -CREATE FUNCTION delete_files_v1( - arg_paths object_key_t[] -) -RETURNS VOID LANGUAGE plpgsql VOLATILE AS $$ -BEGIN - WITH file_ids_to_delete AS ( - SELECT file_id - FROM files - WHERE object_key = ANY(arg_paths) - AND state = 'deleting' - ), - deleted_work_items AS ( - DELETE FROM file_merge_work_item_files - WHERE file_id IN (SELECT file_id FROM file_ids_to_delete) - ) - DELETE FROM files - WHERE file_id IN (SELECT file_id FROM file_ids_to_delete); -END; -$$ -; - -CREATE TYPE list_offsets_request_v1 AS ( - topic_id topic_id_t, - partition partition_t, - timestamp timestamp_t -); - -CREATE TYPE list_offsets_response_error_v1 AS ENUM ( - 'none', - -- errors - 'unknown_topic_or_partition', - 'unsupported_special_timestamp' -); - -CREATE TYPE list_offsets_response_v1 AS ( - topic_id topic_id_t, - partition partition_t, - timestamp timestamp_t, - "offset" offset_with_minus_one_t, - error list_offsets_response_error_v1 -); - -CREATE FUNCTION list_offsets_v1( - arg_requests list_offsets_request_v1[] -) -RETURNS SETOF list_offsets_response_v1 LANGUAGE plpgsql STABLE AS $$ -DECLARE - l_request RECORD; - l_log RECORD; - l_max_timestamp BIGINT = NULL; - l_found_timestamp BIGINT = NULL; - l_found_timestamp_offset BIGINT = NULL; -BEGIN - FOR l_request IN - SELECT * - FROM unnest(arg_requests) - LOOP - -- Note that we're not doing locking ("FOR UPDATE") here, as it's not really needed for this read-only function. - SELECT * - FROM logs - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - INTO l_log; - - IF NOT FOUND THEN - -- -1 = org.apache.kafka.common.record.RecordBatch.NO_TIMESTAMP - RETURN NEXT (l_request.topic_id, l_request.partition, -1, -1, 'unknown_topic_or_partition')::list_offsets_response_v1; - CONTINUE; - END IF; - - -- -2 = org.apache.kafka.common.requests.ListOffsetsRequest.EARLIEST_TIMESTAMP - -- -4 = org.apache.kafka.common.requests.ListOffsetsRequest.EARLIEST_LOCAL_TIMESTAMP - IF l_request.timestamp = -2 OR l_request.timestamp = -4 THEN - -- -1 = org.apache.kafka.common.record.RecordBatch.NO_TIMESTAMP - RETURN NEXT (l_request.topic_id, l_request.partition, -1, l_log.log_start_offset, 'none')::list_offsets_response_v1; - CONTINUE; - END IF; - - -- -1 = org.apache.kafka.common.requests.ListOffsetsRequest.LATEST_TIMESTAMP - IF l_request.timestamp = -1 THEN - -- -1 = org.apache.kafka.common.record.RecordBatch.NO_TIMESTAMP - RETURN NEXT (l_request.topic_id, l_request.partition, -1, l_log.high_watermark, 'none')::list_offsets_response_v1; - CONTINUE; - END IF; - - -- -3 = org.apache.kafka.common.requests.ListOffsetsRequest.MAX_TIMESTAMP - IF l_request.timestamp = -3 THEN - SELECT MAX(batch_timestamp(timestamp_type, batch_max_timestamp, log_append_timestamp)) - INTO l_max_timestamp - FROM batches - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition; - - SELECT last_offset - INTO l_found_timestamp_offset - FROM batches - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND batch_timestamp(timestamp_type, batch_max_timestamp, log_append_timestamp) = l_max_timestamp - ORDER BY batch_id - LIMIT 1; - - IF l_found_timestamp_offset IS NULL THEN - -- -1 = org.apache.kafka.common.record.RecordBatch.NO_TIMESTAMP - RETURN NEXT (l_request.topic_id, l_request.partition, -1, -1, 'none')::list_offsets_response_v1; - ELSE - RETURN NEXT (l_request.topic_id, l_request.partition, l_max_timestamp, l_found_timestamp_offset, 'none')::list_offsets_response_v1; - END IF; - CONTINUE; - END IF; - - -- -5 = org.apache.kafka.common.requests.ListOffsetsRequest.LATEST_TIERED_TIMESTAMP - IF l_request.timestamp = -5 THEN - -- -1 = org.apache.kafka.common.record.RecordBatch.NO_TIMESTAMP - RETURN NEXT (l_request.topic_id, l_request.partition, -1, -1, 'none')::list_offsets_response_v1; - CONTINUE; - END IF; - - IF l_request.timestamp < 0 THEN - -- Unsupported special timestamp. - -- -1 = org.apache.kafka.common.record.RecordBatch.NO_TIMESTAMP - RETURN NEXT (l_request.topic_id, l_request.partition, -1, -1, 'unsupported_special_timestamp')::list_offsets_response_v1; - CONTINUE; - END IF; - - SELECT batch_timestamp(timestamp_type, batch_max_timestamp, log_append_timestamp), base_offset - INTO l_found_timestamp, l_found_timestamp_offset - FROM batches - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND batch_timestamp(timestamp_type, batch_max_timestamp, log_append_timestamp) >= l_request.timestamp - ORDER BY batch_id - LIMIT 1; - - IF l_found_timestamp_offset IS NULL THEN - -- -1 = org.apache.kafka.common.record.RecordBatch.NO_TIMESTAMP - RETURN NEXT (l_request.topic_id, l_request.partition, -1, -1, 'none')::list_offsets_response_v1; - ELSE - RETURN NEXT ( - l_request.topic_id, l_request.partition, l_found_timestamp, - GREATEST(l_found_timestamp_offset, l_log.log_start_offset), - 'none' - )::list_offsets_response_v1; - END IF; - CONTINUE; - END LOOP; -END; -$$ -; - -CREATE TABLE file_merge_work_items ( - work_item_id BIGSERIAL PRIMARY KEY, - created_at TIMESTAMP WITH TIME ZONE -); - -CREATE TABLE file_merge_work_item_files ( - work_item_id BIGINT REFERENCES file_merge_work_items(work_item_id), - file_id BIGINT REFERENCES files(file_id), - PRIMARY KEY (work_item_id, file_id) -); -CREATE INDEX file_merge_work_item_files_by_file ON file_merge_work_item_files (file_id); - -CREATE TYPE batch_metadata_v1 AS ( - magic magic_t, - topic_id topic_id_t, - topic_name topic_name_t, - partition partition_t, - byte_offset byte_offset_t, - byte_size byte_size_t, - base_offset offset_t, - last_offset offset_t, - log_append_timestamp timestamp_t, - batch_max_timestamp timestamp_t, - timestamp_type timestamp_type_t -); - -CREATE TYPE file_merge_work_item_response_batch_v1 AS ( - batch_id BIGINT, - object_key object_key_t, - metadata batch_metadata_v1 -); - -CREATE TYPE file_merge_work_item_response_file_v1 AS ( - file_id BIGINT, - object_key object_key_t, - format format_t, - size byte_size_t, - batches file_merge_work_item_response_batch_v1[] -); - -CREATE TYPE file_merge_work_item_response_v1 AS ( - work_item_id BIGINT, - created_at TIMESTAMP WITH TIME ZONE, - file_ids file_merge_work_item_response_file_v1[] -); - -CREATE FUNCTION get_file_merge_work_item_v1( - arg_now TIMESTAMP WITH TIME ZONE, - arg_expiration_interval INTERVAL, - arg_merge_file_size_threshold byte_size_t -) -RETURNS SETOF file_merge_work_item_response_v1 LANGUAGE plpgsql VOLATILE AS $$ -DECLARE - l_expired_work_item RECORD; - l_file_ids BIGINT[]; - l_new_work_item_id BIGINT; - l_existing_file_id BIGINT; -BEGIN - -- Delete any expired work items - FOR l_expired_work_item IN - SELECT * - FROM file_merge_work_items - WHERE created_at <= arg_now - arg_expiration_interval - LOOP - DELETE FROM file_merge_work_item_files - WHERE work_item_id = l_expired_work_item.work_item_id; - - DELETE FROM file_merge_work_items - WHERE work_item_id = l_expired_work_item.work_item_id; - END LOOP; - - -- Identify files to merge based on threshold size - WITH file_candidates AS ( - SELECT - file_id, - committed_at, - size - FROM files - WHERE state = 'uploaded' - AND reason != 'merge' - AND NOT EXISTS ( - SELECT 1 - FROM file_merge_work_item_files - WHERE file_id = files.file_id - ) - ), - running_sums AS ( - SELECT - file_id, - size, - SUM(size) OVER ( - ORDER BY committed_at, file_id - ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW - ) as cumulative_size, - SUM(size) OVER ( - ORDER BY committed_at, file_id - ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING - ) as previous_sum - FROM file_candidates - ), - threshold_point AS ( - SELECT MIN(file_id) as last_file_id - FROM running_sums - WHERE cumulative_size >= arg_merge_file_size_threshold - ) - SELECT array_agg(rs.file_id ORDER BY rs.file_id) - INTO l_file_ids - FROM running_sums rs - WHERE rs.file_id <= (SELECT last_file_id FROM threshold_point); - - -- Return if no files to merge - IF l_file_ids IS NULL OR array_length(l_file_ids, 1) = 0 THEN - RETURN; - END IF; - - -- Create new work item - INSERT INTO file_merge_work_items(created_at) - VALUES (arg_now) - RETURNING work_item_id - INTO l_new_work_item_id; - - -- Add files to work item - FOREACH l_existing_file_id IN ARRAY l_file_ids - LOOP - INSERT INTO file_merge_work_item_files(work_item_id, file_id) - VALUES (l_new_work_item_id, l_existing_file_id); - END LOOP; - - -- Return work item - RETURN NEXT ( - l_new_work_item_id, - arg_now, - ARRAY( - SELECT ( - f.file_id, - files.object_key, - files.format, - files.size, - ARRAY( - SELECT ( - batches.batch_id, - files.object_key, - ( - batches.magic, - logs.topic_id, - logs.topic_name, - batches.partition, - batches.byte_offset, - batches.byte_size, - batches.base_offset, - batches.last_offset, - batches.log_append_timestamp, - batches.batch_max_timestamp, - batches.timestamp_type - )::batch_metadata_v1 - )::file_merge_work_item_response_batch_v1 - FROM batches - JOIN files ON batches.file_id = files.file_id - JOIN logs ON batches.topic_id = logs.topic_id AND batches.partition = logs.partition - WHERE batches.file_id = f.file_id - ) - )::file_merge_work_item_response_file_v1 - FROM unnest(l_file_ids) AS f(file_id) - JOIN files ON f.file_id = files.file_id - ) - )::file_merge_work_item_response_v1; -END; -$$ -; - -CREATE TYPE commit_file_merge_work_item_batch_v1 AS ( - metadata batch_metadata_v1, - parent_batch_ids BIGINT[] -); - -CREATE TYPE commit_file_merge_work_item_error_v1 AS ENUM ( - 'none', - 'file_merge_work_item_not_found', - 'invalid_parent_batch_count', - 'batch_not_part_of_work_item' -); - -CREATE TYPE commit_file_merge_work_item_response_v1 AS ( - error commit_file_merge_work_item_error_v1, - error_batch commit_file_merge_work_item_batch_v1 -); - -CREATE FUNCTION commit_file_merge_work_item_v1( - arg_now TIMESTAMP WITH TIME ZONE, - arg_existing_work_item_id BIGINT, - arg_object_key object_key_t, - arg_format format_t, - arg_uploader_broker_id broker_id_t, - arg_file_size byte_size_t, - arg_merge_file_batches commit_file_merge_work_item_batch_v1[] -) -RETURNS commit_file_merge_work_item_response_v1 LANGUAGE plpgsql VOLATILE AS $$ -DECLARE - l_work_item RECORD; - l_new_file_id BIGINT; - l_found_batches_size BIGINT; - l_work_item_file RECORD; - l_merge_file_batch commit_file_merge_work_item_batch_v1; -BEGIN - -- check that the work item exists - SELECT * FROM file_merge_work_items - WHERE work_item_id = arg_existing_work_item_id - FOR UPDATE - INTO l_work_item; - - IF NOT FOUND THEN - -- do not remove the file if this condition is hit because it may be a retry from a valid work item - -- only delete the object key when a failure condition is found - - RETURN ROW('file_merge_work_item_not_found'::commit_file_merge_work_item_error_v1, NULL)::commit_file_merge_work_item_response_v1; - END IF; - - -- check that the number of parent batches is 1 (limitation of the current implementation) - FOR l_merge_file_batch IN - SELECT * - FROM unnest(arg_merge_file_batches) b - LOOP - IF array_length(l_merge_file_batch.parent_batch_ids, 1) IS NULL OR array_length(l_merge_file_batch.parent_batch_ids, 1) != 1 THEN - -- insert new empty file to be deleted - INSERT INTO files (object_key, format, reason, state, uploader_broker_id, committed_at, size) - VALUES (arg_object_key, arg_format, 'merge', 'uploaded', arg_uploader_broker_id, arg_now, 0) - RETURNING file_id - INTO l_new_file_id; - PERFORM mark_file_to_delete_v1(arg_now, l_new_file_id); - - -- Do not remove the work item, because another non-buggy worker may eventually succeed. - - RETURN ROW('invalid_parent_batch_count'::commit_file_merge_work_item_error_v1, l_merge_file_batch)::commit_file_merge_work_item_response_v1; - END IF; - END LOOP; - - -- Lock logs to prevent concurrent modifications. - PERFORM - FROM logs - WHERE (topic_id, partition) IN ( - SELECT logs.topic_id, logs.partition - FROM unnest(arg_merge_file_batches) AS mfb - INNER JOIN batches ON mfb.parent_batch_ids[1] = batches.batch_id - INNER JOIN logs ON batches.topic_id = logs.topic_id AND batches.partition = logs.partition - ) - ORDER BY topic_id, partition -- ordering is important to prevent deadlocks - FOR UPDATE; - - -- filter arg_merge_file_batches to only include the ones where logs exist - arg_merge_file_batches := ARRAY( - SELECT b - FROM unnest(arg_merge_file_batches) b - JOIN batches ON b.parent_batch_ids[1] = batches.batch_id - JOIN logs ON batches.topic_id = logs.topic_id AND batches.partition = logs.partition - ); - - -- check if the found batch file id is part of the work item - SELECT SUM(batches.byte_size) - FROM batches - WHERE EXISTS ( - SELECT 1 - FROM unnest(arg_merge_file_batches) b - WHERE batch_id = ANY(b.parent_batch_ids) - ) - INTO l_found_batches_size; - - IF l_found_batches_size IS NULL THEN - -- insert new empty file - INSERT INTO files (object_key, format, reason, state, uploader_broker_id, committed_at, size) - VALUES (arg_object_key, arg_format, 'merge', 'uploaded', arg_uploader_broker_id, arg_now, 0) - RETURNING file_id - INTO l_new_file_id; - PERFORM mark_file_to_delete_v1(arg_now, l_new_file_id); - - -- delete work item - PERFORM release_file_merge_work_item_v1(arg_existing_work_item_id); - - RETURN ROW('none'::commit_file_merge_work_item_error_v1, NULL)::commit_file_merge_work_item_response_v1; - END IF; - - -- check that all parent batch files are part of work item files - FOR l_merge_file_batch IN - SELECT * - FROM unnest(arg_merge_file_batches) b - WHERE NOT EXISTS ( - SELECT 1 - FROM file_merge_work_item_files - JOIN batches ON file_merge_work_item_files.file_id = batches.file_id - WHERE work_item_id = arg_existing_work_item_id - AND batch_id = ANY(b.parent_batch_ids) - ) - LOOP - -- insert new empty file to be deleted - INSERT INTO files (object_key, format, reason, state, uploader_broker_id, committed_at, size) - VALUES (arg_object_key, arg_format, 'merge', 'uploaded', arg_uploader_broker_id, arg_now, 0) - RETURNING file_id - INTO l_new_file_id; - PERFORM mark_file_to_delete_v1(arg_now, l_new_file_id); - - -- Do not remove the work item, because another non-buggy worker may eventually succeed. - - RETURN ROW('batch_not_part_of_work_item'::commit_file_merge_work_item_error_v1, l_merge_file_batch)::commit_file_merge_work_item_response_v1; - END LOOP; - - -- delete old files - PERFORM mark_file_to_delete_v1(arg_now, file_id) - FROM file_merge_work_item_files - WHERE work_item_id = arg_existing_work_item_id; - - -- insert new file - INSERT INTO files (object_key, format, reason, state, uploader_broker_id, committed_at, size) - VALUES (arg_object_key, arg_format, 'merge', 'uploaded', arg_uploader_broker_id, arg_now, arg_file_size) - RETURNING file_id - INTO l_new_file_id; - - -- delete old batches - DELETE FROM batches - WHERE EXISTS ( - SELECT 1 - FROM unnest(arg_merge_file_batches) b - WHERE batch_id = ANY(b.parent_batch_ids) - ); - - -- insert new batches - INSERT INTO batches ( - magic, - topic_id, partition, - base_offset, - last_offset, - file_id, - byte_offset, byte_size, - log_append_timestamp, - batch_max_timestamp, - timestamp_type - ) - SELECT DISTINCT - (unnest(arg_merge_file_batches)).metadata.magic, - (unnest(arg_merge_file_batches)).metadata.topic_id, - (unnest(arg_merge_file_batches)).metadata.partition, - (unnest(arg_merge_file_batches)).metadata.base_offset, - (unnest(arg_merge_file_batches)).metadata.last_offset, - l_new_file_id, - (unnest(arg_merge_file_batches)).metadata.byte_offset, - (unnest(arg_merge_file_batches)).metadata.byte_size, - (unnest(arg_merge_file_batches)).metadata.log_append_timestamp, - (unnest(arg_merge_file_batches)).metadata.batch_max_timestamp, - (unnest(arg_merge_file_batches)).metadata.timestamp_type - FROM unnest(arg_merge_file_batches) - ORDER BY (unnest(arg_merge_file_batches)).metadata.topic_id, - (unnest(arg_merge_file_batches)).metadata.partition, - (unnest(arg_merge_file_batches)).metadata.base_offset; - - -- delete work item - PERFORM release_file_merge_work_item_v1(arg_existing_work_item_id); - - RETURN ROW('none'::commit_file_merge_work_item_error_v1, NULL)::commit_file_merge_work_item_response_v1; -END; -$$ -; - -CREATE TYPE release_file_merge_work_item_error_v1 AS ENUM ( - 'none', - 'file_merge_work_item_not_found' -); - -CREATE TYPE release_file_merge_work_item_response_v1 AS ( - error release_file_merge_work_item_error_v1 -); - -CREATE FUNCTION release_file_merge_work_item_v1( - arg_existing_work_item_id BIGINT -) -RETURNS release_file_merge_work_item_response_v1 LANGUAGE plpgsql VOLATILE AS $$ -BEGIN - PERFORM * FROM file_merge_work_items - WHERE work_item_id = arg_existing_work_item_id - FOR UPDATE; - - IF NOT FOUND THEN - RETURN ROW('file_merge_work_item_not_found'::release_file_merge_work_item_error_v1)::release_file_merge_work_item_response_v1; - END IF; - - DELETE FROM file_merge_work_item_files - WHERE work_item_id = arg_existing_work_item_id; - - DELETE FROM file_merge_work_items - WHERE work_item_id = arg_existing_work_item_id; - - RETURN ROW('none'::release_file_merge_work_item_error_v1)::release_file_merge_work_item_response_v1; -END; -$$ -; - -CREATE FUNCTION batch_timestamp( - arg_timestamp_type timestamp_type_t, - arg_batch_max_timestamp timestamp_t, - arg_log_append_timestamp timestamp_t -) -RETURNS timestamp_t LANGUAGE plpgsql IMMUTABLE AS $$ -BEGIN - -- See how timestamps are assigned in - -- https://github.com/aiven/inkless/blob/e124d3975bdb3a9ec85eee2fba7a1b0a6967d3a6/storage/src/main/java/org/apache/kafka/storage/internals/log/LogValidator.java#L271-L276 - RETURN CASE arg_timestamp_type - WHEN 1 THEN arg_log_append_timestamp -- org.apache.kafka.common.record.TimestampType.LOG_APPEND_TIME - ELSE arg_batch_max_timestamp - END; -END -$$ -; diff --git a/storage/inkless/bin/main/db/migration/V2__Speed_up_delete_topic_and_delete_records.sql b/storage/inkless/bin/main/db/migration/V2__Speed_up_delete_topic_and_delete_records.sql deleted file mode 100644 index f4db7f1171..0000000000 --- a/storage/inkless/bin/main/db/migration/V2__Speed_up_delete_topic_and_delete_records.sql +++ /dev/null @@ -1,121 +0,0 @@ --- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ - -CREATE OR REPLACE FUNCTION delete_topic_v1( - arg_now TIMESTAMP WITH TIME ZONE, - arg_topic_ids UUID[] -) -RETURNS VOID LANGUAGE plpgsql VOLATILE AS $$ -BEGIN - -- First delete the logs of the deleted topics. - DELETE FROM logs - WHERE topic_id = ANY(arg_topic_ids); - - DROP TABLE IF EXISTS affected_files; - -- Delete the affected batches and remember what files are affected. - -- We need to separate deleting batches and finding empty files because if they are in the same requests as CTE, - -- the query below will see the MVCC snapshot from before deleting batches. - CREATE TEMPORARY TABLE affected_files - ON COMMIT DROP - AS - WITH deleted_batches AS ( - DELETE FROM batches - WHERE topic_id = ANY(arg_topic_ids) - RETURNING file_id - ) - SELECT file_id - FROM deleted_batches; - - -- Out of the affected files, select those that are now empty (i.e. no batch refers to them) - -- and mark them for deletion. - PERFORM mark_file_to_delete_v1(arg_now, file_id) - FROM ( - SELECT DISTINCT af.file_id - FROM affected_files AS af - LEFT JOIN batches AS b ON af.file_id = b.file_id - WHERE b.batch_id IS NULL - ); -END; -$$ -; - -CREATE OR REPLACE FUNCTION delete_records_v1( - arg_now TIMESTAMP WITH TIME ZONE, - arg_requests delete_records_request_v1[] -) -RETURNS SETOF delete_records_response_v1 LANGUAGE plpgsql VOLATILE AS $$ -DECLARE - l_request RECORD; - l_log RECORD; - l_converted_offset BIGINT = -1; -BEGIN - - DROP TABLE IF EXISTS affected_files; - CREATE TEMPORARY TABLE affected_files ( - file_id BIGINT PRIMARY KEY - ) - ON COMMIT DROP; - - FOR l_request IN - SELECT * - FROM unnest(arg_requests) - LOOP - SELECT * - FROM logs - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - ORDER BY topic_id, partition -- ordering is important to prevent deadlocks - FOR UPDATE - INTO l_log; - - IF NOT FOUND THEN - RETURN NEXT (l_request.topic_id, l_request.partition, 'unknown_topic_or_partition', NULL)::delete_records_response_v1; - CONTINUE; - END IF; - - l_converted_offset = CASE - -- -1 = org.apache.kafka.common.requests.DeleteRecordsRequest.HIGH_WATERMARK - WHEN l_request.offset = -1 THEN l_log.high_watermark - ELSE l_request.offset - END; - - IF l_converted_offset < 0 OR l_converted_offset > l_log.high_watermark THEN - RETURN NEXT (l_request.topic_id, l_request.partition, 'offset_out_of_range', NULL)::delete_records_response_v1; - CONTINUE; - END IF; - - IF l_converted_offset > l_log.log_start_offset THEN - UPDATE logs - SET log_start_offset = l_converted_offset - WHERE topic_id = l_log.topic_id - AND partition = l_log.partition; - l_log.log_start_offset = l_converted_offset; - END IF; - - -- Delete the affected batches and remember what files are affected. - WITH affected_files_local AS ( - DELETE FROM batches - WHERE topic_id = l_log.topic_id - AND partition = l_log.partition - AND last_offset < l_log.log_start_offset - RETURNING file_id - ) - INSERT INTO affected_files (file_id) - SELECT DISTINCT file_id - FROM affected_files_local - ON CONFLICT DO NOTHING; -- ignore duplicates - - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, l_log.log_start_offset)::delete_records_response_v1; - END LOOP; - - -- Out of the affected files, select those that are now empty (i.e. no batch refers to them) - -- and mark them for deletion. - PERFORM mark_file_to_delete_v1(arg_now, file_id) - FROM ( - SELECT DISTINCT af.file_id - FROM affected_files AS af - LEFT JOIN batches AS b ON af.file_id = b.file_id - WHERE b.batch_id IS NULL - ); -END; -$$ -; diff --git a/storage/inkless/bin/main/db/migration/V3__Add_logs_size.sql b/storage/inkless/bin/main/db/migration/V3__Add_logs_size.sql deleted file mode 100644 index f32e9539e7..0000000000 --- a/storage/inkless/bin/main/db/migration/V3__Add_logs_size.sql +++ /dev/null @@ -1,300 +0,0 @@ --- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ - --- 1. Introduce the `byte_size` column. - -ALTER TABLE logs -ADD COLUMN byte_size byte_size_t DEFAULT 0; - -UPDATE logs -SET byte_size = aggregated.total_byte_size -FROM ( - SELECT topic_id, partition, SUM(byte_size) AS total_byte_size - FROM batches - GROUP BY topic_id, partition -) AS aggregated -WHERE logs.topic_id = aggregated.topic_id - AND logs.partition = aggregated.partition; - --- 2. Update functions to support `byte_size`. - -CREATE OR REPLACE FUNCTION commit_file_v1( - arg_object_key object_key_t, - arg_format format_t, - arg_uploader_broker_id broker_id_t, - arg_file_size byte_size_t, - arg_now TIMESTAMP WITH TIME ZONE, - arg_requests commit_batch_request_v1[] -) -RETURNS SETOF commit_batch_response_v1 LANGUAGE plpgsql VOLATILE AS $$ -DECLARE - l_new_file_id BIGINT; - l_request RECORD; - l_log logs%ROWTYPE; - l_duplicate RECORD; - l_assigned_offset offset_nullable_t; - l_new_high_watermark offset_nullable_t; - l_last_sequence_in_producer_epoch BIGINT; -BEGIN - INSERT INTO files (object_key, format, reason, state, uploader_broker_id, committed_at, size) - VALUES (arg_object_key, arg_format, 'produce', 'uploaded', arg_uploader_broker_id, arg_now, arg_file_size) - RETURNING file_id - INTO l_new_file_id; - - -- We use this temporary table to perform the write operations in loop on it first - -- and only then dump the result on the real table. This reduces the WAL pressure and latency of the function. - DROP TABLE IF EXISTS logs_tmp; - CREATE TEMPORARY TABLE logs_tmp - ON COMMIT DROP - AS - -- Extract the relevant logs into the temporary table and simultaneously lock them. - -- topic_name and log_start_offset aren't technically needed, but having them allows declaring `l_log logs%ROWTYPE`. - SELECT * - FROM logs - WHERE (topic_id, partition) IN (SELECT DISTINCT topic_id, partition FROM unnest(arg_requests)) - ORDER BY topic_id, partition -- ordering is important to prevent deadlocks - FOR UPDATE; - - FOR l_request IN - SELECT * - FROM unnest(arg_requests) - LOOP - -- A small optimization: select the log into a variable only if it's a different topic-partition. - -- Batches are sorted by topic-partitions, so this makes sense. - IF l_log.topic_id IS DISTINCT FROM l_request.topic_id - OR l_log.partition IS DISTINCT FROM l_request.partition THEN - - SELECT * - FROM logs_tmp - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - INTO l_log; - - IF NOT FOUND THEN - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'nonexistent_log')::commit_batch_response_v1; - CONTINUE; - END IF; - END IF; - - l_assigned_offset = l_log.high_watermark; - - -- Validate that the new request base sequence is not larger than the previous batch last sequence - IF l_request.producer_id > -1 AND l_request.producer_epoch > -1 - THEN - -- If there are previous batches for the producer, check that the producer epoch is not smaller than the last batch - IF EXISTS ( - SELECT 1 - FROM producer_state - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND producer_id = l_request.producer_id - AND producer_epoch > l_request.producer_epoch - ) THEN - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'invalid_producer_epoch')::commit_batch_response_v1; - CONTINUE; - END IF; - - SELECT MAX(last_sequence) - INTO l_last_sequence_in_producer_epoch - FROM producer_state - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND producer_id = l_request.producer_id - AND producer_epoch = l_request.producer_epoch; - - -- If there are previous batches for the producer - IF l_last_sequence_in_producer_epoch IS NULL THEN - -- If there are no previous batches for the producer, the base sequence must be 0 - IF l_request.base_sequence <> 0 - THEN - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'sequence_out_of_order')::commit_batch_response_v1; - CONTINUE; - END IF; - ELSE - -- Check for duplicates - SELECT * - FROM producer_state - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND producer_id = l_request.producer_id - AND producer_epoch = l_request.producer_epoch - AND base_sequence = l_request.base_sequence - AND last_sequence = l_request.last_sequence - INTO l_duplicate; - IF FOUND THEN - RETURN NEXT (l_request.topic_id, l_request.partition, l_log.log_start_offset, l_duplicate.assigned_offset, l_duplicate.batch_max_timestamp, 'duplicate_batch')::commit_batch_response_v1; - CONTINUE; - END IF; - - -- Check that the sequence is not out of order. - -- A sequence is out of order if the base sequence is not a continuation of the last sequence - -- or, in case of wraparound, the base sequence must be 0 and the last sequence must be 2147483647 (Integer.MAX_VALUE). - IF (l_request.base_sequence - 1) <> l_last_sequence_in_producer_epoch OR (l_last_sequence_in_producer_epoch = 2147483647 AND l_request.base_sequence <> 0) THEN - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, NULL, -1, 'sequence_out_of_order')::commit_batch_response_v1; - CONTINUE; - END IF; - END IF; - - INSERT INTO producer_state ( - topic_id, partition, producer_id, - producer_epoch, base_sequence, last_sequence, assigned_offset, batch_max_timestamp - ) - VALUES ( - l_request.topic_id, l_request.partition, l_request.producer_id, - l_request.producer_epoch, l_request.base_sequence, l_request.last_sequence, l_assigned_offset, l_request.batch_max_timestamp - ); - -- Keep only the last 5 records. - -- 5 == org.apache.kafka.storage.internals.log.ProducerStateEntry.NUM_BATCHES_TO_RETAIN - DELETE FROM producer_state - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND producer_id = l_request.producer_id - AND row_id <= ( - SELECT row_id - FROM producer_state - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND producer_id = l_request.producer_id - ORDER BY row_id DESC - LIMIT 1 - OFFSET 5 - ); - END IF; - - UPDATE logs_tmp - SET high_watermark = high_watermark + (l_request.last_offset - l_request.base_offset + 1), - byte_size = byte_size + l_request.byte_size - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - RETURNING high_watermark - INTO l_new_high_watermark; - - l_log.high_watermark = l_new_high_watermark; - - INSERT INTO batches ( - magic, - topic_id, partition, - base_offset, - last_offset, - file_id, - byte_offset, byte_size, - timestamp_type, log_append_timestamp, batch_max_timestamp - ) - VALUES ( - l_request.magic, - l_request.topic_id, l_request.partition, - l_assigned_offset, - l_new_high_watermark - 1, - l_new_file_id, - l_request.byte_offset, l_request.byte_size, - l_request.timestamp_type, - (EXTRACT(EPOCH FROM arg_now AT TIME ZONE 'UTC') * 1000)::BIGINT, - l_request.batch_max_timestamp - ); - - RETURN NEXT (l_request.topic_id, l_request.partition, l_log.log_start_offset, l_assigned_offset, l_request.batch_max_timestamp, 'none')::commit_batch_response_v1; - END LOOP; - - -- Transfer from the temporary to real table. - UPDATE logs - SET high_watermark = logs_tmp.high_watermark, - byte_size = logs_tmp.byte_size - FROM logs_tmp - WHERE logs.topic_id = logs_tmp.topic_id - AND logs.partition = logs_tmp.partition; - - IF NOT EXISTS (SELECT 1 FROM batches WHERE file_id = l_new_file_id LIMIT 1) THEN - PERFORM mark_file_to_delete_v1(arg_now, l_new_file_id); - END IF; -END; -$$ -; - - -CREATE OR REPLACE FUNCTION delete_records_v1( - arg_now TIMESTAMP WITH TIME ZONE, - arg_requests delete_records_request_v1[] -) -RETURNS SETOF delete_records_response_v1 LANGUAGE plpgsql VOLATILE AS $$ -DECLARE - l_request RECORD; - l_log RECORD; - l_converted_offset BIGINT = -1; - l_deleted_bytes BIGINT; -BEGIN - - DROP TABLE IF EXISTS affected_files; - CREATE TEMPORARY TABLE affected_files ( - file_id BIGINT PRIMARY KEY - ) - ON COMMIT DROP; - - FOR l_request IN - SELECT * - FROM unnest(arg_requests) - LOOP - SELECT * - FROM logs - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - ORDER BY topic_id, partition -- ordering is important to prevent deadlocks - FOR UPDATE - INTO l_log; - - IF NOT FOUND THEN - RETURN NEXT (l_request.topic_id, l_request.partition, 'unknown_topic_or_partition', NULL)::delete_records_response_v1; - CONTINUE; - END IF; - - l_converted_offset = CASE - -- -1 = org.apache.kafka.common.requests.DeleteRecordsRequest.HIGH_WATERMARK - WHEN l_request.offset = -1 THEN l_log.high_watermark - ELSE l_request.offset - END; - - IF l_converted_offset < 0 OR l_converted_offset > l_log.high_watermark THEN - RETURN NEXT (l_request.topic_id, l_request.partition, 'offset_out_of_range', NULL)::delete_records_response_v1; - CONTINUE; - END IF; - - l_converted_offset = GREATEST(l_converted_offset, l_log.log_start_offset); - - -- Delete the affected batches. - WITH deleted_batches AS ( - DELETE FROM batches - WHERE topic_id = l_log.topic_id - AND partition = l_log.partition - AND last_offset < l_converted_offset - RETURNING file_id, byte_size - ), - -- Remember what files were affected. - _1 AS ( - INSERT INTO affected_files (file_id) - SELECT DISTINCT file_id - FROM deleted_batches - ON CONFLICT DO NOTHING -- ignore duplicates - ) - SELECT COALESCE(SUM(byte_size), 0) - FROM deleted_batches - INTO l_deleted_bytes; - - UPDATE logs - SET log_start_offset = l_converted_offset, - byte_size = byte_size - l_deleted_bytes - WHERE topic_id = l_log.topic_id - AND partition = l_log.partition; - - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, l_converted_offset)::delete_records_response_v1; - END LOOP; - - -- Out of the affected files, select those that are now empty (i.e. no batch refers to them) - -- and mark them for deletion. - PERFORM mark_file_to_delete_v1(arg_now, file_id) - FROM ( - SELECT DISTINCT af.file_id - FROM affected_files AS af - LEFT JOIN batches AS b ON af.file_id = b.file_id - WHERE b.batch_id IS NULL - ); -END; -$$ -; diff --git a/storage/inkless/bin/main/db/migration/V4__Retention_enforcement.sql b/storage/inkless/bin/main/db/migration/V4__Retention_enforcement.sql deleted file mode 100644 index a5db67cce8..0000000000 --- a/storage/inkless/bin/main/db/migration/V4__Retention_enforcement.sql +++ /dev/null @@ -1,133 +0,0 @@ --- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ - -CREATE DOMAIN retention_t AS BIGINT NOT NULL -CHECK (VALUE >= -1); - -CREATE TYPE enforce_retention_request_v1 AS ( - topic_id topic_id_t, - partition partition_t, - retention_bytes retention_t, - retention_ms retention_t -); - -CREATE TYPE enforce_retention_response_error_v1 AS ENUM ( - 'unknown_topic_or_partition' -); - -CREATE TYPE enforce_retention_response_v1 AS ( - topic_id topic_id_t, - partition partition_t, - error enforce_retention_response_error_v1, - batches_deleted INT, - bytes_deleted BIGINT, - log_start_offset offset_nullable_t -); - -CREATE FUNCTION enforce_retention_v1( - arg_now TIMESTAMP WITH TIME ZONE, - arg_requests enforce_retention_request_v1[] -) -RETURNS SETOF enforce_retention_response_v1 LANGUAGE plpgsql VOLATILE AS $$ -DECLARE - l_request RECORD; - l_log logs%ROWTYPE; - l_base_offset_of_first_batch_to_keep offset_nullable_t; - l_batches_deleted INT; - l_bytes_deleted BIGINT; - l_delete_records_response delete_records_response_v1; -BEGIN - FOR l_request IN - SELECT * - FROM unnest(arg_requests) - ORDER BY topic_id, partition -- ordering is important to prevent deadlocks - LOOP - SELECT * - FROM logs - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - INTO l_log - FOR UPDATE; - - IF NOT FOUND THEN - RETURN NEXT (l_request.topic_id, l_request.partition, 'unknown_topic_or_partition', NULL, NULL, NULL)::enforce_retention_response_v1; - CONTINUE; - END IF; - - l_base_offset_of_first_batch_to_keep = NULL; - - IF l_request.retention_bytes >= 0 OR l_request.retention_ms >= 0 THEN - WITH augmented_batches AS ( - -- For retention by size: - -- Associate with each batch the number of bytes that the log would have if this batch and later batches are retained. - -- In other words, this is the reverse aggregated size (counted from the end to the beginning). - -- An example: - -- Batch size | Aggregated | Reverse aggregated | - -- (in order) | size | size | - -- 1 | 1 | 10 - 1 + 1 = 10 | - -- 2 | 1 + 2 = 3 | 10 - 3 + 2 = 9 | - -- 3 | 3 + 3 = 6 | 10 - 6 + 3 = 7 | - -- 4 | 6 + 4 = 10 | 10 - 10 + 4 = 4 | - -- The reverse aggregated size is equal to what the aggregated size would be if the sorting order is reverse, - -- but doing so explicitly might be costly, hence the formula. - -- For retention by time: - -- Associate with each batch its effective timestamp. - SELECT topic_id, partition, last_offset, - base_offset, - l_log.byte_size - SUM(byte_size) OVER (ORDER BY topic_id, partition, last_offset) + byte_size AS reverse_agg_byte_size, - batch_timestamp(timestamp_type, batch_max_timestamp, log_append_timestamp) AS effective_timestamp - FROM batches - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - ORDER BY topic_id, partition, last_offset - ) - -- Look for the first batch that complies with both retention policies (if they are enabled): - -- For size: - -- The first batch which being retained with the subsequent batches would make the total log size <= retention_bytes. - -- For time: - -- The first batch which effective timestamp is greater or equal to the last timestamp to retain. - SELECT base_offset - FROM augmented_batches - WHERE (l_request.retention_bytes < 0 OR reverse_agg_byte_size <= l_request.retention_bytes) - AND (l_request.retention_ms < 0 OR effective_timestamp >= (EXTRACT(EPOCH FROM arg_now AT TIME ZONE 'UTC') * 1000)::BIGINT - l_request.retention_ms) - ORDER BY topic_id, partition, last_offset - LIMIT 1 - INTO l_base_offset_of_first_batch_to_keep; - - -- No batch satisfy the retention policy == delete everything, i.e. up to HWM. - l_base_offset_of_first_batch_to_keep = COALESCE(l_base_offset_of_first_batch_to_keep, l_log.high_watermark); - END IF; - - -- Nothing to delete. - IF l_base_offset_of_first_batch_to_keep IS NULL THEN - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, 0, 0::BIGINT, l_log.log_start_offset)::enforce_retention_response_v1; - CONTINUE; - END IF; - - SELECT COUNT(*), SUM(byte_size) - FROM batches - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND last_offset < l_base_offset_of_first_batch_to_keep - INTO l_batches_deleted, l_bytes_deleted; - - SELECT * - FROM delete_records_v1(arg_now, array[ROW(l_request.topic_id, l_request.partition, l_base_offset_of_first_batch_to_keep)::delete_records_request_v1]) - INTO l_delete_records_response; - - -- This should never happen, just fail. - IF l_delete_records_response.error IS DISTINCT FROM NULL THEN - RAISE 'delete_records_v1 returned unexpected error: %', l_delete_records_response; - END IF; - - RETURN NEXT ( - l_request.topic_id, - l_request.partition, - NULL::enforce_retention_response_error_v1, - COALESCE(l_batches_deleted, 0), - COALESCE(l_bytes_deleted, 0), - l_delete_records_response.log_start_offset - )::enforce_retention_response_v1; - END LOOP; -END; -$$ -; diff --git a/storage/inkless/bin/main/db/migration/V5__Fix_deadlock_in_delete_records_v1.sql b/storage/inkless/bin/main/db/migration/V5__Fix_deadlock_in_delete_records_v1.sql deleted file mode 100644 index 675d463677..0000000000 --- a/storage/inkless/bin/main/db/migration/V5__Fix_deadlock_in_delete_records_v1.sql +++ /dev/null @@ -1,90 +0,0 @@ --- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ - -CREATE OR REPLACE FUNCTION delete_records_v1( - arg_now TIMESTAMP WITH TIME ZONE, - arg_requests delete_records_request_v1[] -) -RETURNS SETOF delete_records_response_v1 LANGUAGE plpgsql VOLATILE AS $$ -DECLARE - l_request RECORD; - l_log RECORD; - l_converted_offset BIGINT = -1; - l_deleted_bytes BIGINT; -BEGIN - - DROP TABLE IF EXISTS affected_files; - CREATE TEMPORARY TABLE affected_files ( - file_id BIGINT PRIMARY KEY - ) - ON COMMIT DROP; - - FOR l_request IN - SELECT * - FROM unnest(arg_requests) - ORDER BY topic_id, partition -- ordering is important to prevent deadlocks - LOOP - SELECT * - FROM logs - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - FOR UPDATE - INTO l_log; - - IF NOT FOUND THEN - RETURN NEXT (l_request.topic_id, l_request.partition, 'unknown_topic_or_partition', NULL)::delete_records_response_v1; - CONTINUE; - END IF; - - l_converted_offset = CASE - -- -1 = org.apache.kafka.common.requests.DeleteRecordsRequest.HIGH_WATERMARK - WHEN l_request.offset = -1 THEN l_log.high_watermark - ELSE l_request.offset - END; - - IF l_converted_offset < 0 OR l_converted_offset > l_log.high_watermark THEN - RETURN NEXT (l_request.topic_id, l_request.partition, 'offset_out_of_range', NULL)::delete_records_response_v1; - CONTINUE; - END IF; - - l_converted_offset = GREATEST(l_converted_offset, l_log.log_start_offset); - - -- Delete the affected batches. - WITH deleted_batches AS ( - DELETE FROM batches - WHERE topic_id = l_log.topic_id - AND partition = l_log.partition - AND last_offset < l_converted_offset - RETURNING file_id, byte_size - ), - -- Remember what files were affected. - _1 AS ( - INSERT INTO affected_files (file_id) - SELECT DISTINCT file_id - FROM deleted_batches - ON CONFLICT DO NOTHING -- ignore duplicates - ) - SELECT COALESCE(SUM(byte_size), 0) - FROM deleted_batches - INTO l_deleted_bytes; - - UPDATE logs - SET log_start_offset = l_converted_offset, - byte_size = byte_size - l_deleted_bytes - WHERE topic_id = l_log.topic_id - AND partition = l_log.partition; - - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, l_converted_offset)::delete_records_response_v1; - END LOOP; - - -- Out of the affected files, select those that are now empty (i.e. no batch refers to them) - -- and mark them for deletion. - PERFORM mark_file_to_delete_v1(arg_now, file_id) - FROM ( - SELECT DISTINCT af.file_id - FROM affected_files AS af - LEFT JOIN batches AS b ON af.file_id = b.file_id - WHERE b.batch_id IS NULL - ); -END; -$$ -; diff --git a/storage/inkless/bin/main/db/migration/V6__Improve_understandability_of_delete_topic_v1.sql b/storage/inkless/bin/main/db/migration/V6__Improve_understandability_of_delete_topic_v1.sql deleted file mode 100644 index 85f8cc914f..0000000000 --- a/storage/inkless/bin/main/db/migration/V6__Improve_understandability_of_delete_topic_v1.sql +++ /dev/null @@ -1,143 +0,0 @@ --- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ - -ALTER TABLE batches -ALTER CONSTRAINT fk_batches_logs NOT DEFERRABLE; - -CREATE OR REPLACE FUNCTION delete_topic_v1( - arg_now TIMESTAMP WITH TIME ZONE, - arg_topic_ids UUID[] -) -RETURNS VOID LANGUAGE plpgsql VOLATILE AS $$ -BEGIN - -- Ensure no other transaction commits or does anything else to the affected partitions while this transaction is in progress. - PERFORM - FROM logs - WHERE topic_id = ANY(arg_topic_ids) - ORDER BY topic_id, partition -- ordering is important to prevent deadlocks - FOR UPDATE; - - DROP TABLE IF EXISTS affected_files; - -- Delete the affected batches and remember what files are affected. - -- We need to separate deleting batches and finding empty files because if they are in the same requests as CTE, - -- the query below will see the MVCC snapshot from before deleting batches. - CREATE TEMPORARY TABLE affected_files - ON COMMIT DROP - AS - WITH deleted_batches AS ( - DELETE FROM batches - WHERE topic_id = ANY(arg_topic_ids) - RETURNING file_id - ) - SELECT file_id - FROM deleted_batches; - - DELETE FROM logs - WHERE topic_id = ANY(arg_topic_ids); - - -- Out of the affected files, select those that are now empty (i.e. no batch refers to them) - -- and mark them for deletion. - PERFORM mark_file_to_delete_v1(arg_now, file_id) - FROM ( - SELECT DISTINCT af.file_id - FROM affected_files AS af - WHERE NOT EXISTS ( - SELECT 1 - FROM batches AS b - WHERE b.file_id = af.file_id - ) - ); -END; -$$ -; - -CREATE OR REPLACE FUNCTION delete_records_v1( - arg_now TIMESTAMP WITH TIME ZONE, - arg_requests delete_records_request_v1[] -) -RETURNS SETOF delete_records_response_v1 LANGUAGE plpgsql VOLATILE AS $$ -DECLARE - l_request RECORD; - l_log RECORD; - l_converted_offset BIGINT = -1; - l_deleted_bytes BIGINT; -BEGIN - - DROP TABLE IF EXISTS affected_files; - CREATE TEMPORARY TABLE affected_files ( - file_id BIGINT PRIMARY KEY - ) - ON COMMIT DROP; - - FOR l_request IN - SELECT * - FROM unnest(arg_requests) - ORDER BY topic_id, partition -- ordering is important to prevent deadlocks - LOOP - SELECT * - FROM logs - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - FOR UPDATE - INTO l_log; - - IF NOT FOUND THEN - RETURN NEXT (l_request.topic_id, l_request.partition, 'unknown_topic_or_partition', NULL)::delete_records_response_v1; - CONTINUE; - END IF; - - l_converted_offset = CASE - -- -1 = org.apache.kafka.common.requests.DeleteRecordsRequest.HIGH_WATERMARK - WHEN l_request.offset = -1 THEN l_log.high_watermark - ELSE l_request.offset - END; - - IF l_converted_offset < 0 OR l_converted_offset > l_log.high_watermark THEN - RETURN NEXT (l_request.topic_id, l_request.partition, 'offset_out_of_range', NULL)::delete_records_response_v1; - CONTINUE; - END IF; - - l_converted_offset = GREATEST(l_converted_offset, l_log.log_start_offset); - - -- Delete the affected batches. - WITH deleted_batches AS ( - DELETE FROM batches - WHERE topic_id = l_log.topic_id - AND partition = l_log.partition - AND last_offset < l_converted_offset - RETURNING file_id, byte_size - ), - -- Remember what files were affected. - _1 AS ( - INSERT INTO affected_files (file_id) - SELECT DISTINCT file_id - FROM deleted_batches - ON CONFLICT DO NOTHING -- ignore duplicates - ) - SELECT COALESCE(SUM(byte_size), 0) - FROM deleted_batches - INTO l_deleted_bytes; - - UPDATE logs - SET log_start_offset = l_converted_offset, - byte_size = byte_size - l_deleted_bytes - WHERE topic_id = l_log.topic_id - AND partition = l_log.partition; - - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, l_converted_offset)::delete_records_response_v1; - END LOOP; - - -- Out of the affected files, select those that are now empty (i.e. no batch refers to them) - -- and mark them for deletion. - PERFORM mark_file_to_delete_v1(arg_now, file_id) - FROM ( - SELECT DISTINCT af.file_id - FROM affected_files AS af - WHERE NOT EXISTS ( - SELECT 1 - FROM batches AS b - WHERE b.file_id = af.file_id - ) - ); -END; -$$ -; diff --git a/storage/inkless/bin/main/db/migration/V7__Find_batches_function.sql b/storage/inkless/bin/main/db/migration/V7__Find_batches_function.sql deleted file mode 100644 index 4b26f377ed..0000000000 --- a/storage/inkless/bin/main/db/migration/V7__Find_batches_function.sql +++ /dev/null @@ -1,114 +0,0 @@ --- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ - -CREATE TYPE find_batches_request_v1 AS ( - topic_id topic_id_t, - partition partition_t, - starting_offset BIGINT, - max_partition_fetch_bytes INT -); - -CREATE TYPE batch_info_v1 AS ( - batch_id BIGINT, - object_key object_key_t, - batch_metadata batch_metadata_v1 -); - -CREATE TYPE find_batches_response_error_v1 AS ENUM ( - 'offset_out_of_range', - 'unknown_topic_or_partition' -); - -CREATE TYPE find_batches_response_v1 AS ( - topic_id topic_id_t, - partition partition_t, - log_start_offset offset_with_minus_one_t, - high_watermark offset_with_minus_one_t, - batches batch_info_v1[], - error find_batches_response_error_v1 -); - -CREATE OR REPLACE FUNCTION find_batches_v1( - arg_requests find_batches_request_v1[], - fetch_max_bytes INT -) -RETURNS SETOF find_batches_response_v1 LANGUAGE sql STABLE AS $$ - WITH - requests AS ( - SELECT - r.topic_id, - r.partition, - r.starting_offset, - r.max_partition_fetch_bytes, - r.ordinality AS idx -- for preserving original order - FROM unnest(arg_requests) WITH ORDINALITY AS r(topic_id, partition, starting_offset, max_partition_fetch_bytes, ordinality) - ), - requests_with_log_info AS ( - SELECT - r.idx, r.topic_id, r.partition, r.starting_offset, r.max_partition_fetch_bytes, - l.log_start_offset, l.high_watermark, l.topic_name, - CASE - WHEN l.topic_id IS NULL THEN 'unknown_topic_or_partition'::find_batches_response_error_v1 - WHEN r.starting_offset < 0 OR r.starting_offset > l.high_watermark THEN 'offset_out_of_range'::find_batches_response_error_v1 - ELSE NULL - END AS error - FROM requests r - LEFT JOIN logs l ON r.topic_id = l.topic_id AND r.partition = l.partition - ), - all_batches_with_metadata AS ( - SELECT - r.idx, - ( - b.batch_id, - f.object_key, - ( - b.magic, b.topic_id, r.topic_name, b.partition, b.byte_offset, b.byte_size, - b.base_offset, b.last_offset, b.log_append_timestamp, b.batch_max_timestamp, - b.timestamp_type - )::batch_metadata_v1 - )::batch_info_v1 AS batch_data, - b.byte_size, b.base_offset, r.max_partition_fetch_bytes, - ROW_NUMBER() OVER (PARTITION BY r.idx ORDER BY b.base_offset) as rn, - SUM(b.byte_size) OVER (PARTITION BY r.idx ORDER BY b.base_offset) as partition_cumulative_bytes - FROM requests_with_log_info r - JOIN batches b ON r.topic_id = b.topic_id AND r.partition = b.partition - JOIN files f ON b.file_id = f.file_id - WHERE r.error IS NULL - AND b.last_offset >= r.starting_offset - AND b.base_offset < r.high_watermark - ), - per_partition_limited_batches AS ( - SELECT idx, batch_data, byte_size, base_offset, rn - FROM all_batches_with_metadata - WHERE rn = 1 -- each partition gets always at least one batch - -- include also last batch, even if it overflows max.partition.fetch.bytes - OR (partition_cumulative_bytes - byte_size) < max_partition_fetch_bytes - ), - final_batch_set AS ( - SELECT idx, batch_data, base_offset, rn - FROM ( - SELECT *, SUM(byte_size) OVER (ORDER BY idx, base_offset) as global_cumulative_bytes - FROM per_partition_limited_batches - ) AS sized_batches - WHERE rn = 1 OR -- each partition gets always at least one batch - -- include also last batch, even if it overflows fetch.max.bytes - (global_cumulative_bytes - byte_size) < fetch_max_bytes - ), - aggregated_batches AS ( - SELECT - idx, - array_agg(batch_data ORDER BY base_offset) AS batches - FROM final_batch_set - GROUP BY idx - ) - SELECT - r.topic_id, - r.partition, - COALESCE(r.log_start_offset, -1), - COALESCE(r.high_watermark, -1), - CASE WHEN r.error IS NULL THEN COALESCE(ab.batches, '{}'::batch_info_v1[]) ELSE NULL END, - r.error - FROM requests_with_log_info r - LEFT JOIN aggregated_batches ab ON r.idx = ab.idx - ORDER BY r.idx; -$$; - diff --git a/storage/inkless/bin/main/db/migration/V8__Find_batches_function_with_limit.sql b/storage/inkless/bin/main/db/migration/V8__Find_batches_function_with_limit.sql deleted file mode 100644 index 62a4ea57b6..0000000000 --- a/storage/inkless/bin/main/db/migration/V8__Find_batches_function_with_limit.sql +++ /dev/null @@ -1,88 +0,0 @@ --- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ -CREATE OR REPLACE FUNCTION find_batches_v2( - arg_requests find_batches_request_v1[], - fetch_max_bytes INT, - max_batches_per_partition INT DEFAULT 0 -) -RETURNS SETOF find_batches_response_v1 LANGUAGE sql STABLE AS $$ - WITH - requests AS ( - SELECT - r.topic_id, - r.partition, - r.starting_offset, - r.max_partition_fetch_bytes, - r.ordinality AS idx -- for preserving original order - FROM unnest(arg_requests) WITH ORDINALITY AS r(topic_id, partition, starting_offset, max_partition_fetch_bytes, ordinality) - ), - requests_with_log_info AS ( - SELECT - r.idx, r.topic_id, r.partition, r.starting_offset, r.max_partition_fetch_bytes, - l.log_start_offset, l.high_watermark, l.topic_name, - CASE - WHEN l.topic_id IS NULL THEN 'unknown_topic_or_partition'::find_batches_response_error_v1 - WHEN r.starting_offset < 0 OR r.starting_offset > l.high_watermark THEN 'offset_out_of_range'::find_batches_response_error_v1 - ELSE NULL - END AS error - FROM requests r - LEFT JOIN logs l ON r.topic_id = l.topic_id AND r.partition = l.partition - ), - all_batches_with_metadata AS ( - SELECT - r.idx, - ( - b.batch_id, - f.object_key, - ( - b.magic, b.topic_id, r.topic_name, b.partition, b.byte_offset, b.byte_size, - b.base_offset, b.last_offset, b.log_append_timestamp, b.batch_max_timestamp, - b.timestamp_type - )::batch_metadata_v1 - )::batch_info_v1 AS batch_data, - b.byte_size, b.base_offset, r.max_partition_fetch_bytes, - ROW_NUMBER() OVER (PARTITION BY r.idx ORDER BY b.base_offset) as rn, - SUM(b.byte_size) OVER (PARTITION BY r.idx ORDER BY b.base_offset) as partition_cumulative_bytes - FROM requests_with_log_info r - JOIN batches b ON r.topic_id = b.topic_id AND r.partition = b.partition - JOIN files f ON b.file_id = f.file_id - WHERE r.error IS NULL - AND b.last_offset >= r.starting_offset - AND b.base_offset < r.high_watermark - ), - per_partition_limited_batches AS ( - SELECT idx, batch_data, byte_size, base_offset, rn - FROM all_batches_with_metadata - WHERE (rn = 1 -- each partition gets always at least one batch - -- include also last batch, even if it overflows max.partition.fetch.bytes - OR (partition_cumulative_bytes - byte_size) < max_partition_fetch_bytes - ) AND (max_batches_per_partition = 0 OR rn <= max_batches_per_partition) - ), - final_batch_set AS ( - SELECT idx, batch_data, base_offset, rn - FROM ( - SELECT *, SUM(byte_size) OVER (ORDER BY idx, base_offset) as global_cumulative_bytes - FROM per_partition_limited_batches - ) AS sized_batches - WHERE rn = 1 OR -- each partition gets always at least one batch - -- include also last batch, even if it overflows fetch.max.bytes - (global_cumulative_bytes - byte_size) < fetch_max_bytes - ), - aggregated_batches AS ( - SELECT - idx, - array_agg(batch_data ORDER BY base_offset) AS batches - FROM final_batch_set - GROUP BY idx - ) - SELECT - r.topic_id, - r.partition, - COALESCE(r.log_start_offset, -1), - COALESCE(r.high_watermark, -1), - CASE WHEN r.error IS NULL THEN COALESCE(ab.batches, '{}'::batch_info_v1[]) ELSE NULL END, - r.error - FROM requests_with_log_info r - LEFT JOIN aggregated_batches ab ON r.idx = ab.idx - ORDER BY r.idx; -$$; - diff --git a/storage/inkless/bin/main/db/migration/V9__Retention_enforcement_with_limits.sql b/storage/inkless/bin/main/db/migration/V9__Retention_enforcement_with_limits.sql deleted file mode 100644 index 83c80baba2..0000000000 --- a/storage/inkless/bin/main/db/migration/V9__Retention_enforcement_with_limits.sql +++ /dev/null @@ -1,132 +0,0 @@ --- Copyright (c) 2025 Aiven, Helsinki, Finland. https://aiven.io/ --- Similar to v1, with the addition of max_batches_per_request to limit the number of batches to be deleted per partition --- as a way to cope with the performance impact of executing the updates and further deletion of batches. -CREATE FUNCTION enforce_retention_v2( - arg_now TIMESTAMP WITH TIME ZONE, - arg_requests enforce_retention_request_v1[], - max_batches_per_request INT DEFAULT 0 -) -RETURNS SETOF enforce_retention_response_v1 LANGUAGE plpgsql VOLATILE AS $$ -DECLARE - l_request RECORD; - l_log logs%ROWTYPE; - l_base_offset_of_first_batch_to_keep offset_nullable_t; - l_batches_deleted INT; - l_bytes_deleted BIGINT; - l_delete_records_response delete_records_response_v1; -BEGIN - FOR l_request IN - SELECT * - FROM unnest(arg_requests) - ORDER BY topic_id, partition -- ordering is important to prevent deadlocks - LOOP - SELECT * - FROM logs - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - INTO l_log - FOR UPDATE; - - IF NOT FOUND THEN - RETURN NEXT (l_request.topic_id, l_request.partition, 'unknown_topic_or_partition', NULL, NULL, NULL)::enforce_retention_response_v1; - CONTINUE; - END IF; - - l_base_offset_of_first_batch_to_keep = NULL; - - IF l_request.retention_bytes >= 0 OR l_request.retention_ms >= 0 THEN - WITH augmented_batches AS ( - -- For retention by size: - -- Associate with each batch the number of bytes that the log would have if this batch and later batches are retained. - -- In other words, this is the reverse aggregated size (counted from the end to the beginning). - -- An example: - -- Batch size | Aggregated | Reverse aggregated | - -- (in order) | size | size | - -- 1 | 1 | 10 - 1 + 1 = 10 | - -- 2 | 1 + 2 = 3 | 10 - 3 + 2 = 9 | - -- 3 | 3 + 3 = 6 | 10 - 6 + 3 = 7 | - -- 4 | 6 + 4 = 10 | 10 - 10 + 4 = 4 | - -- The reverse aggregated size is equal to what the aggregated size would be if the sorting order is reverse, - -- but doing so explicitly might be costly, hence the formula. - -- For retention by time: - -- Associate with each batch its effective timestamp. - SELECT topic_id, partition, last_offset, - base_offset, - l_log.byte_size - SUM(byte_size) OVER (ORDER BY topic_id, partition, last_offset) + byte_size AS reverse_agg_byte_size, - batch_timestamp(timestamp_type, batch_max_timestamp, log_append_timestamp) AS effective_timestamp - FROM batches - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - ORDER BY topic_id, partition, last_offset - ) - -- Look for the first batch that complies with both retention policies (if they are enabled): - -- For size: - -- The first batch which being retained with the subsequent batches would make the total log size <= retention_bytes. - -- For time: - -- The first batch which effective timestamp is greater or equal to the last timestamp to retain. - SELECT base_offset - FROM augmented_batches - WHERE (l_request.retention_bytes < 0 OR reverse_agg_byte_size <= l_request.retention_bytes) - AND (l_request.retention_ms < 0 OR effective_timestamp >= (EXTRACT(EPOCH FROM arg_now AT TIME ZONE 'UTC') * 1000)::BIGINT - l_request.retention_ms) - ORDER BY topic_id, partition, last_offset - LIMIT 1 - INTO l_base_offset_of_first_batch_to_keep; - - -- No batch satisfy the retention policy == delete everything, i.e. up to HWM. - l_base_offset_of_first_batch_to_keep = COALESCE(l_base_offset_of_first_batch_to_keep, l_log.high_watermark); - - -- Enforce the limit of batches per request if needed - IF max_batches_per_request > 0 THEN - -- Find the base offset of the batch that would be the Nth one to delete - -- This effectively limits how many batches we'll delete in one call - WITH batches_to_delete AS ( - SELECT base_offset - FROM batches - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND last_offset < l_base_offset_of_first_batch_to_keep - ORDER BY topic_id, partition, last_offset - ) - SELECT - CASE - WHEN COUNT(*) <= max_batches_per_request THEN l_base_offset_of_first_batch_to_keep - ELSE (SELECT base_offset FROM batches_to_delete ORDER BY base_offset LIMIT 1 OFFSET max_batches_per_request) - END INTO l_base_offset_of_first_batch_to_keep - FROM batches_to_delete; - END IF; - END IF; - - -- Nothing to delete. - IF l_base_offset_of_first_batch_to_keep IS NULL THEN - RETURN NEXT (l_request.topic_id, l_request.partition, NULL, 0, 0::BIGINT, l_log.log_start_offset)::enforce_retention_response_v1; - CONTINUE; - END IF; - - SELECT COUNT(*), SUM(byte_size) - FROM batches - WHERE topic_id = l_request.topic_id - AND partition = l_request.partition - AND last_offset < l_base_offset_of_first_batch_to_keep - INTO l_batches_deleted, l_bytes_deleted; - - SELECT * - FROM delete_records_v1(arg_now, array[ROW(l_request.topic_id, l_request.partition, l_base_offset_of_first_batch_to_keep)::delete_records_request_v1]) - INTO l_delete_records_response; - - -- This should never happen, just fail. - IF l_delete_records_response.error IS DISTINCT FROM NULL THEN - RAISE 'delete_records_v1 returned unexpected error: %', l_delete_records_response; - END IF; - - RETURN NEXT ( - l_request.topic_id, - l_request.partition, - NULL::enforce_retention_response_error_v1, - COALESCE(l_batches_deleted, 0), - COALESCE(l_bytes_deleted, 0), - l_delete_records_response.log_start_offset - )::enforce_retention_response_v1; - END LOOP; -END; -$$ -; diff --git a/storage/inkless/bin/main/message/CacheKey.json b/storage/inkless/bin/main/message/CacheKey.json deleted file mode 100644 index 37f45f2459..0000000000 --- a/storage/inkless/bin/main/message/CacheKey.json +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Inkless - * Copyright (C) 2024 - 2025 Aiven OY - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ -{ - "apiKey": 1, - "name": "CacheKey", - "type": "data", - "validVersions": "0", - "flexibleVersions": "0+", - "fields": [ - { - "name": "Object", - "type": "string", - "versions": "0", - "about": "Identifier for object in backing storage" - }, - { - "name": "Range", - "type": "ByteRange", - "versions": "0", - "about": "A range of bytes within the specified file", - "fields": [ - { - "name": "Offset", - "type": "int64", - "versions": "0", - "about": "The index of the first byte in the object" - }, - { - "name": "Length", - "type": "int64", - "versions": "0", - "about": "Number of bytes" - } - ] - } - ] -} diff --git a/storage/inkless/bin/main/message/FileExtent.json b/storage/inkless/bin/main/message/FileExtent.json deleted file mode 100644 index 1af9d01afe..0000000000 --- a/storage/inkless/bin/main/message/FileExtent.json +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Inkless - * Copyright (C) 2024 - 2025 Aiven OY - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ -{ - "apiKey": 2, - "name": "FileExtent", - "type": "data", - "validVersions": "0", - "flexibleVersions": "0+", - "fields": [ - { - "name": "Object", - "type": "string", - "versions": "0", - "about": "Object identifier" - }, - { - "name": "Range", - "type": "ByteRange", - "versions": "0", - "about": "Range of bytes stored in the specified buffer", - "fields": [ - { - "name": "Offset", - "type": "int64", - "versions": "0", - "about": "The index of the first byte in the object" - }, - { - "name": "Length", - "type": "int64", - "versions": "0", - "about": "Number of bytes" - } - ] - }, - { - "name": "Data", - "type": "bytes", - "versions": "0+", - "about": "Start offset of the segment." - } - ] -} \ No newline at end of file diff --git a/storage/inkless/bin/test/META-INF/services/net.jqwik.api.providers.ArbitraryProvider b/storage/inkless/bin/test/META-INF/services/net.jqwik.api.providers.ArbitraryProvider deleted file mode 100644 index 1742cba093..0000000000 --- a/storage/inkless/bin/test/META-INF/services/net.jqwik.api.providers.ArbitraryProvider +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (c) 2024 Aiven, Helsinki, Finland. https://aiven.io/ -io.aiven.inkless.test_utils.HeaderProvider -io.aiven.inkless.test_utils.SimpleRecordProvider -io.aiven.inkless.test_utils.RecordsProvider -io.aiven.inkless.test_utils.TopicIdPartitionProvider -io.aiven.inkless.test_utils.DataLayout$DataLayoutArbitraryProvider diff --git a/storage/inkless/bin/test/log4j.properties b/storage/inkless/bin/test/log4j.properties deleted file mode 100644 index 0d46d239df..0000000000 --- a/storage/inkless/bin/test/log4j.properties +++ /dev/null @@ -1,7 +0,0 @@ -log4j.rootLogger=INFO, stdout - -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n - -log4j.logger.io.aiven.inkless.control_plane.InMemoryControlPlane=WARN diff --git a/storage/inkless/bin/test/test_gcs_credentials.json b/storage/inkless/bin/test/test_gcs_credentials.json deleted file mode 100644 index f73e506cb8..0000000000 --- a/storage/inkless/bin/test/test_gcs_credentials.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "client_id": "test-client-id", - "client_secret": "test-client-secret", - "refresh_token": "x", - "type": "authorized_user" -} diff --git a/streams/integration-tests/bin/test/log4j2.yaml b/streams/integration-tests/bin/test/log4j2.yaml deleted file mode 100644 index 0942036a33..0000000000 --- a/streams/integration-tests/bin/test/log4j2.yaml +++ /dev/null @@ -1,65 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -Configuration: - Properties: - Property: - - name: "logPattern" - value: "[%d] %p %m (%c:%L)%n" - - Appenders: - Console: - name: STDOUT - PatternLayout: - pattern: "${logPattern}" - - Loggers: - Root: - level: INFO - AppenderRef: - - ref: STDOUT - Logger: - - name: kafka - level: ERROR - - - name: state.change.logger - level: ERROR - - - name: org.apache.kafka - level: ERROR - - - name: org.apache.kafka.clients - level: ERROR - - - name: org.apache.kafka.clients.consumer - level: INFO - - - name: org.apache.kafka.clients.producer - level: INFO - - - name: org.apache.kafka.streams - level: INFO - - - name: org.apache.kafka.clients.producer.ProducerConfig - level: ERROR - - - name: org.apache.kafka.clients.consumer.ConsumerConfig - level: ERROR - - - name: org.apache.kafka.clients.admin.AdminClientConfig - level: ERROR - - - name: org.apache.kafka.streams.StreamsConfig - level: ERROR diff --git a/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala b/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala deleted file mode 100644 index 4cfc811728..0000000000 --- a/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/StreamToTableJoinScalaIntegrationTestImplicitSerdes.scala +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.integration - -import org.apache.kafka.streams.integration.utils.StreamToTableJoinScalaIntegrationTestBase -import org.apache.kafka.streams.scala.ImplicitConversions._ -import org.apache.kafka.streams.scala.StreamsBuilder -import org.apache.kafka.streams.scala.kstream._ -import org.apache.kafka.streams.scala.serialization.{Serdes => NewSerdes} -import org.apache.kafka.streams.{KafkaStreams, KeyValue, StreamsConfig} -import org.junit.jupiter.api.Assertions._ -import org.junit.jupiter.api._ - -import java.util.Properties - -/** - * Test suite that does an example to demonstrate stream-table joins in Kafka Streams - *

- * The suite contains the test case using Scala APIs `testShouldCountClicksPerRegion` and the same test case using the - * Java APIs `testShouldCountClicksPerRegionJava`. The idea is to demonstrate that both generate the same result. - */ -@Tag("integration") -class StreamToTableJoinScalaIntegrationTestImplicitSerdes extends StreamToTableJoinScalaIntegrationTestBase { - - @Test def testShouldCountClicksPerRegion(): Unit = { - - // DefaultSerdes brings into scope implicit serdes (mostly for primitives) that will set up all Grouped, Produced, - // Consumed and Joined instances. So all APIs below that accept Grouped, Produced, Consumed or Joined will - // get these instances automatically - import org.apache.kafka.streams.scala.serialization.Serdes._ - - val streamsConfiguration: Properties = getStreamsConfiguration() - - val builder = new StreamsBuilder() - - val userClicksStream: KStream[String, Long] = builder.stream(userClicksTopic) - - val userRegionsTable: KTable[String, String] = builder.table(userRegionsTopic) - - // Compute the total per region by summing the individual click counts per region. - val clicksPerRegion: KTable[String, Long] = - userClicksStream - - // Join the stream against the table. - .leftJoin(userRegionsTable)((clicks, region) => (if (region == null) "UNKNOWN" else region, clicks)) - - // Change the stream from -> to -> - .map((_, regionWithClicks) => regionWithClicks) - - // Compute the total per region by summing the individual click counts per region. - .groupByKey - .reduce(_ + _) - - // Write the (continuously updating) results to the output topic. - clicksPerRegion.toStream.to(outputTopic) - - val streams: KafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration) - streams.start() - - val actualClicksPerRegion: java.util.List[KeyValue[String, Long]] = - produceNConsume(userClicksTopic, userRegionsTopic, outputTopic) - - assertTrue(!actualClicksPerRegion.isEmpty, "Expected to process some data") - - streams.close() - } - - @Test - def testShouldCountClicksPerRegionWithNamedRepartitionTopic(): Unit = { - - // DefaultSerdes brings into scope implicit serdes (mostly for primitives) that will set up all Grouped, Produced, - // Consumed and Joined instances. So all APIs below that accept Grouped, Produced, Consumed or Joined will - // get these instances automatically - import org.apache.kafka.streams.scala.serialization.Serdes._ - - val streamsConfiguration: Properties = getStreamsConfiguration() - - val builder = new StreamsBuilder() - - val userClicksStream: KStream[String, Long] = builder.stream(userClicksTopic) - - val userRegionsTable: KTable[String, String] = builder.table(userRegionsTopic) - - // Compute the total per region by summing the individual click counts per region. - val clicksPerRegion: KTable[String, Long] = - userClicksStream - - // Join the stream against the table. - .leftJoin(userRegionsTable)((clicks, region) => (if (region == null) "UNKNOWN" else region, clicks)) - - // Change the stream from -> to -> - .map((_, regionWithClicks) => regionWithClicks) - - // Compute the total per region by summing the individual click counts per region. - .groupByKey - .reduce(_ + _) - - // Write the (continuously updating) results to the output topic. - clicksPerRegion.toStream.to(outputTopic) - - val streams: KafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration) - streams.start() - - val actualClicksPerRegion: java.util.List[KeyValue[String, Long]] = - produceNConsume(userClicksTopic, userRegionsTopic, outputTopic) - - assertTrue(!actualClicksPerRegion.isEmpty, "Expected to process some data") - - streams.close() - } - - @Test - def testShouldCountClicksPerRegionJava(): Unit = { - - import org.apache.kafka.streams.kstream.{KStream => KStreamJ, KTable => KTableJ, _} - import org.apache.kafka.streams.{KafkaStreams => KafkaStreamsJ, StreamsBuilder => StreamsBuilderJ} - - import java.lang.{Long => JLong} - - val streamsConfiguration: Properties = getStreamsConfiguration() - - streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, NewSerdes.stringSerde.getClass.getName) - streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, NewSerdes.stringSerde.getClass.getName) - - val builder: StreamsBuilderJ = new StreamsBuilderJ() - - val userClicksStream: KStreamJ[String, JLong] = - builder.stream[String, JLong](userClicksTopicJ, Consumed.`with`(NewSerdes.stringSerde, NewSerdes.javaLongSerde)) - - val userRegionsTable: KTableJ[String, String] = - builder.table[String, String](userRegionsTopicJ, Consumed.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) - - // Join the stream against the table. - val valueJoinerJ: ValueJoiner[JLong, String, (String, JLong)] = - (clicks: JLong, region: String) => (if (region == null) "UNKNOWN" else region, clicks) - val userClicksJoinRegion: KStreamJ[String, (String, JLong)] = userClicksStream.leftJoin( - userRegionsTable, - valueJoinerJ, - Joined.`with`[String, JLong, String](NewSerdes.stringSerde, NewSerdes.javaLongSerde, NewSerdes.stringSerde) - ) - - // Change the stream from -> to -> - val clicksByRegion: KStreamJ[String, JLong] = userClicksJoinRegion.map { (_, regionWithClicks) => - new KeyValue(regionWithClicks._1, regionWithClicks._2) - } - - // Compute the total per region by summing the individual click counts per region. - val clicksPerRegion: KTableJ[String, JLong] = clicksByRegion - .groupByKey(Grouped.`with`(NewSerdes.stringSerde, NewSerdes.javaLongSerde)) - .reduce((v1, v2) => v1 + v2) - - // Write the (continuously updating) results to the output topic. - clicksPerRegion.toStream.to(outputTopicJ, Produced.`with`(NewSerdes.stringSerde, NewSerdes.javaLongSerde)) - - val streams = new KafkaStreamsJ(builder.build(), streamsConfiguration) - - streams.start() - produceNConsume(userClicksTopicJ, userRegionsTopicJ, outputTopicJ) - streams.close() - } -} diff --git a/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/WordCountTest.scala b/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/WordCountTest.scala deleted file mode 100644 index 3e9813dda2..0000000000 --- a/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/WordCountTest.scala +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.integration - -import java.util.Properties -import java.util.regex.Pattern -import org.junit.jupiter.api.Assertions._ -import org.junit.jupiter.api._ -import org.apache.kafka.streams.scala.serialization.{Serdes => NewSerdes} -import org.apache.kafka.streams.scala.ImplicitConversions._ -import org.apache.kafka.streams.scala.StreamsBuilder -import org.apache.kafka.streams.{KafkaStreams, KeyValue, StreamsConfig} -import org.apache.kafka.streams.scala.kstream._ -import org.apache.kafka.streams.integration.utils.{EmbeddedKafkaCluster, IntegrationTestUtils} -import org.apache.kafka.clients.consumer.ConsumerConfig -import org.apache.kafka.clients.producer.ProducerConfig -import org.apache.kafka.common.utils.{MockTime, Utils} -import org.apache.kafka.common.serialization.{LongDeserializer, StringDeserializer, StringSerializer} -import org.apache.kafka.test.TestUtils -import org.junit.jupiter.api.Tag - -import java.io.File - -/** - * Test suite that does a classic word count example. - *

- * The suite contains the test case using Scala APIs `testShouldCountWords` and the same test case using the - * Java APIs `testShouldCountWordsJava`. The idea is to demonstrate that both generate the same result. - */ -@Tag("integration") -class WordCountTest extends WordCountTestData { - - private val cluster: EmbeddedKafkaCluster = new EmbeddedKafkaCluster(1) - - final private val alignedTime = (System.currentTimeMillis() / 1000 + 1) * 1000 - private val mockTime: MockTime = cluster.time - mockTime.setCurrentTimeMs(alignedTime) - - private val testFolder: File = TestUtils.tempDirectory() - - @BeforeEach - def startKafkaCluster(): Unit = { - cluster.start() - cluster.createTopic(inputTopic) - cluster.createTopic(outputTopic) - cluster.createTopic(inputTopicJ) - cluster.createTopic(outputTopicJ) - } - - @AfterEach - def stopKafkaCluster(): Unit = { - cluster.stop() - Utils.delete(testFolder) - } - - @Test - def testShouldCountWords(): Unit = { - import org.apache.kafka.streams.scala.serialization.Serdes._ - - val streamsConfiguration = getStreamsConfiguration() - - val streamBuilder = new StreamsBuilder - val textLines = streamBuilder.stream[String, String](inputTopic) - - val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) - - // generate word counts - val wordCounts: KTable[String, Long] = - textLines - .flatMapValues(v => pattern.split(v.toLowerCase)) - .groupBy((_, v) => v) - .count() - - // write to output topic - wordCounts.toStream.to(outputTopic) - - val streams = new KafkaStreams(streamBuilder.build(), streamsConfiguration) - streams.start() - - // produce and consume synchronously - val actualWordCounts: java.util.List[KeyValue[String, Long]] = produceNConsume(inputTopic, outputTopic) - - streams.close() - - import scala.jdk.CollectionConverters._ - assertEquals(actualWordCounts.asScala.take(expectedWordCounts.size).sortBy(_.key), expectedWordCounts.sortBy(_.key)) - } - - @Test - def testShouldCountWordsMaterialized(): Unit = { - import org.apache.kafka.streams.scala.serialization.Serdes._ - - val streamsConfiguration = getStreamsConfiguration() - - val streamBuilder = new StreamsBuilder - val textLines = streamBuilder.stream[String, String](inputTopic) - - val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) - - // generate word counts - val wordCounts: KTable[String, Long] = - textLines - .flatMapValues(v => pattern.split(v.toLowerCase)) - .groupBy((k, v) => v) - .count()(Materialized.as("word-count")) - - // write to output topic - wordCounts.toStream.to(outputTopic) - - val streams = new KafkaStreams(streamBuilder.build(), streamsConfiguration) - streams.start() - - // produce and consume synchronously - val actualWordCounts: java.util.List[KeyValue[String, Long]] = produceNConsume(inputTopic, outputTopic) - - streams.close() - - import scala.jdk.CollectionConverters._ - assertEquals(actualWordCounts.asScala.take(expectedWordCounts.size).sortBy(_.key), expectedWordCounts.sortBy(_.key)) - } - - @Test - def testShouldCountWordsJava(): Unit = { - - import org.apache.kafka.streams.{KafkaStreams => KafkaStreamsJ, StreamsBuilder => StreamsBuilderJ} - import org.apache.kafka.streams.kstream.{ - KTable => KTableJ, - KStream => KStreamJ, - KGroupedStream => KGroupedStreamJ, - _ - } - import scala.jdk.CollectionConverters._ - - val streamsConfiguration = getStreamsConfiguration() - streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, NewSerdes.stringSerde.getClass.getName) - streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, NewSerdes.stringSerde.getClass.getName) - - val streamBuilder = new StreamsBuilderJ - val textLines: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopicJ) - - val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) - - val splits: KStreamJ[String, String] = textLines.flatMapValues { line => - pattern.split(line.toLowerCase).toBuffer.asJava - } - - val grouped: KGroupedStreamJ[String, String] = splits.groupBy { (_, v) => - v - } - - val wordCounts: KTableJ[String, java.lang.Long] = grouped.count() - - wordCounts.toStream.to(outputTopicJ, Produced.`with`(NewSerdes.stringSerde, NewSerdes.javaLongSerde)) - - val streams: KafkaStreamsJ = new KafkaStreamsJ(streamBuilder.build(), streamsConfiguration) - streams.start() - - val actualWordCounts: java.util.List[KeyValue[String, Long]] = produceNConsume(inputTopicJ, outputTopicJ) - - streams.close() - - assertEquals(actualWordCounts.asScala.take(expectedWordCounts.size).sortBy(_.key), expectedWordCounts.sortBy(_.key)) - } - - private def getStreamsConfiguration(): Properties = { - val streamsConfiguration: Properties = new Properties() - - streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-test") - streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) - streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "10000") - streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") - streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, testFolder.getPath) - streamsConfiguration - } - - private def getProducerConfig(): Properties = { - val p = new Properties() - p.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) - p.put(ProducerConfig.ACKS_CONFIG, "all") - p.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) - p.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) - p - } - - private def getConsumerConfig(): Properties = { - val p = new Properties() - p.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) - p.put(ConsumerConfig.GROUP_ID_CONFIG, "wordcount-scala-integration-test-standard-consumer") - p.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") - p.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer]) - p.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[LongDeserializer]) - p - } - - private def produceNConsume(inputTopic: String, outputTopic: String): java.util.List[KeyValue[String, Long]] = { - - val linesProducerConfig: Properties = getProducerConfig() - - import scala.jdk.CollectionConverters._ - IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues.asJava, linesProducerConfig, mockTime) - - val consumerConfig = getConsumerConfig() - - IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, expectedWordCounts.size) - } -} - -trait WordCountTestData { - val inputTopic = s"inputTopic" - val outputTopic = s"outputTopic" - val inputTopicJ = s"inputTopicJ" - val outputTopicJ = s"outputTopicJ" - - val inputValues = List( - "Hello Kafka Streams", - "All streams lead to Kafka", - "Join Kafka Summit", - "И теперь пошли русские слова" - ) - - val expectedWordCounts: List[KeyValue[String, Long]] = List( - new KeyValue("hello", 1L), - new KeyValue("all", 1L), - new KeyValue("streams", 2L), - new KeyValue("lead", 1L), - new KeyValue("to", 1L), - new KeyValue("join", 1L), - new KeyValue("kafka", 3L), - new KeyValue("summit", 1L), - new KeyValue("и", 1L), - new KeyValue("теперь", 1L), - new KeyValue("пошли", 1L), - new KeyValue("русские", 1L), - new KeyValue("слова", 1L) - ) -} diff --git a/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinScalaIntegrationTestBase.scala b/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinScalaIntegrationTestBase.scala deleted file mode 100644 index f3aec5784c..0000000000 --- a/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinScalaIntegrationTestBase.scala +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.integration.utils - -import org.apache.kafka.clients.consumer.ConsumerConfig -import org.apache.kafka.clients.producer.ProducerConfig -import org.apache.kafka.common.serialization._ -import org.apache.kafka.common.utils.{MockTime, Utils} -import org.apache.kafka.streams._ -import org.apache.kafka.test.TestUtils -import org.junit.jupiter.api._ - -import java.io.File -import java.util.Properties - -/** - * Test suite base that prepares Kafka cluster for stream-table joins in Kafka Streams - *

- */ -@Tag("integration") -class StreamToTableJoinScalaIntegrationTestBase extends StreamToTableJoinTestData { - - private val cluster: EmbeddedKafkaCluster = new EmbeddedKafkaCluster(1) - - final private val alignedTime = (System.currentTimeMillis() / 1000 + 1) * 1000 - private val mockTime: MockTime = cluster.time - mockTime.setCurrentTimeMs(alignedTime) - - private val testFolder: File = TestUtils.tempDirectory() - - @BeforeEach - def startKafkaCluster(): Unit = { - cluster.start() - cluster.createTopic(userClicksTopic) - cluster.createTopic(userRegionsTopic) - cluster.createTopic(outputTopic) - cluster.createTopic(userClicksTopicJ) - cluster.createTopic(userRegionsTopicJ) - cluster.createTopic(outputTopicJ) - } - - @AfterEach - def stopKafkaCluster(): Unit = { - cluster.stop() - Utils.delete(testFolder) - } - - def getStreamsConfiguration(): Properties = { - val streamsConfiguration: Properties = new Properties() - - streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "stream-table-join-scala-integration-test") - streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) - streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, "1000") - streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") - streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, testFolder.getPath) - - streamsConfiguration - } - - private def getUserRegionsProducerConfig(): Properties = { - val p = new Properties() - p.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) - p.put(ProducerConfig.ACKS_CONFIG, "all") - p.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) - p.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) - p - } - - private def getUserClicksProducerConfig(): Properties = { - val p = new Properties() - p.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) - p.put(ProducerConfig.ACKS_CONFIG, "all") - p.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer]) - p.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[LongSerializer]) - p - } - - private def getConsumerConfig(): Properties = { - val p = new Properties() - p.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers()) - p.put(ConsumerConfig.GROUP_ID_CONFIG, "join-scala-integration-test-standard-consumer") - p.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") - p.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer]) - p.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[LongDeserializer]) - p - } - - def produceNConsume( - userClicksTopic: String, - userRegionsTopic: String, - outputTopic: String, - waitTillRecordsReceived: Boolean = true - ): java.util.List[KeyValue[String, Long]] = { - - import _root_.scala.jdk.CollectionConverters._ - - // Publish user-region information. - val userRegionsProducerConfig: Properties = getUserRegionsProducerConfig() - IntegrationTestUtils.produceKeyValuesSynchronously( - userRegionsTopic, - userRegions.asJava, - userRegionsProducerConfig, - mockTime, - false - ) - - // Publish user-click information. - val userClicksProducerConfig: Properties = getUserClicksProducerConfig() - IntegrationTestUtils.produceKeyValuesSynchronously( - userClicksTopic, - userClicks.asJava, - userClicksProducerConfig, - mockTime, - false - ) - - if (waitTillRecordsReceived) { - // consume and verify result - val consumerConfig = getConsumerConfig() - - IntegrationTestUtils.waitUntilFinalKeyValueRecordsReceived( - consumerConfig, - outputTopic, - expectedClicksPerRegion.asJava - ) - } else { - java.util.Collections.emptyList() - } - } -} diff --git a/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinTestData.scala b/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinTestData.scala deleted file mode 100644 index 4e8a2f024a..0000000000 --- a/streams/integration-tests/bin/test/org/apache/kafka/streams/integration/utils/StreamToTableJoinTestData.scala +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.integration.utils - -import org.apache.kafka.streams.KeyValue - -trait StreamToTableJoinTestData { - val brokers = "localhost:9092" - - val userClicksTopic = s"user-clicks" - val userRegionsTopic = s"user-regions" - val outputTopic = s"output-topic" - - val userClicksTopicJ = s"user-clicks-j" - val userRegionsTopicJ = s"user-regions-j" - val outputTopicJ = s"output-topic-j" - - // Input 1: Clicks per user (multiple records allowed per user). - val userClicks: Seq[KeyValue[String, Long]] = Seq( - new KeyValue("alice", 13L), - new KeyValue("bob", 4L), - new KeyValue("chao", 25L), - new KeyValue("bob", 19L), - new KeyValue("dave", 56L), - new KeyValue("eve", 78L), - new KeyValue("alice", 40L), - new KeyValue("fang", 99L) - ) - - // Input 2: Region per user (multiple records allowed per user). - val userRegions: Seq[KeyValue[String, String]] = Seq( - new KeyValue("alice", "asia"), /* Alice lived in Asia originally... */ - new KeyValue("bob", "americas"), - new KeyValue("chao", "asia"), - new KeyValue("dave", "europe"), - new KeyValue("alice", "europe"), /* ...but moved to Europe some time later. */ - new KeyValue("eve", "americas"), - new KeyValue("fang", "asia") - ) - - val expectedClicksPerRegion: Seq[KeyValue[String, Long]] = Seq( - new KeyValue("americas", 101L), - new KeyValue("europe", 109L), - new KeyValue("asia", 124L) - ) -} diff --git a/test-common/test-common-internal-api/bin/main/org/apache/kafka/common/test/api/README.md b/test-common/test-common-internal-api/bin/main/org/apache/kafka/common/test/api/README.md deleted file mode 100644 index e69a381161..0000000000 --- a/test-common/test-common-internal-api/bin/main/org/apache/kafka/common/test/api/README.md +++ /dev/null @@ -1,192 +0,0 @@ -This document describes a custom JUnit extension which allows for running the same JUnit tests against multiple Kafka -cluster configurations. - -# Annotations - -Three annotations are provided for defining a template of a Kafka cluster. - -* `@ClusterTest`: declarative style cluster definition -* `@ClusterTests`: wrapper around multiple `@ClusterTest`-s -* `@ClusterTemplate`: points to a function for imperative cluster definition - -Another helper annotation `@ClusterTestDefaults` allows overriding the defaults for -all `@ClusterTest` in a single test class. - -# Usage - -The simplest usage is `@ClusterTest` by itself which will use some reasonable defaults. - -```java -public class SampleTest { - @ClusterTest - void testSomething() { ... } -} -``` - -The defaults can be modified by setting specific paramters on the annotation. - -```java -public class SampleTest { - @ClusterTest(brokers = 3, metadataVersion = MetadataVersion.IBP_4_0_IV3) - void testSomething() { ... } -} -``` - -It is also possible to modify the defaults for a whole class using `@ClusterTestDefaults`. - -```java -@ClusterTestDefaults(brokers = 3, metadataVersion = MetadataVersion.IBP_4_0_IV3) -public class SampleTest { - @ClusterTest - void testSomething() { ... } -} -``` - -To set some specific config, an array of `@ClusterProperty` annotations can be -given. - -```java -public class SampleTest { - @ClusterTest( - types = {Type.KRAFT}, - brokerSecurityProtocol = SecurityProtocol.PLAINTEXT, - properties = { - @ClusterProperty(key = "inter.broker.protocol.version", value = "2.7-IV2"), - @ClusterProperty(key = "socket.send.buffer.bytes", value = "10240"), - }) - void testSomething() { ... } -} -``` - -Using the `@ClusterTests` annotation, multiple declarative cluster templates can -be given. - -```java -public class SampleTest { - @ClusterTests({ - @ClusterTest(brokerSecurityProtocol = SecurityProtocol.PLAINTEXT), - @ClusterTest(brokerSecurityProtocol = SecurityProtocol.SASL_PLAINTEXT) - }) - void testSomething() { ... } -} -``` - -# Dynamic Configuration - -In order to allow for more flexible cluster configuration, a `@ClusterTemplate` -annotation is also introduced. This annotation takes a single string value which -references a static method on the test class. This method is used to produce any -number of test configurations using a fluent builder style API. - -```java -import java.util.List; - -@ClusterTemplate("generateConfigs") -void testSomething() { ... } - -static List generateConfigs() { - ClusterConfig config1 = ClusterConfig.defaultClusterBuilder() - .name("Generated Test 1") - .serverProperties(props1) - .setMetadataVersion(MetadataVersion.IBP_2_7_IV1) - .build(); - ClusterConfig config2 = ClusterConfig.defaultClusterBuilder() - .name("Generated Test 2") - .serverProperties(props2) - .setMetadataVersion(MetadataVersion.IBP_2_7_IV2) - .build(); - ClusterConfig config3 = ClusterConfig.defaultClusterBuilder() - .name("Generated Test 3") - .serverProperties(props3) - .build(); - return List.of(config1, config2, config3); -} -``` - -This alternate configuration style makes it easy to create any number of complex -configurations. Each returned ClusterConfig by a template method will result in -an additional variation of the run. - - -# JUnit Extension - -The core logic of our test framework lies in `ClusterTestExtensions` which is a -JUnit extension. It is automatically registered using SPI and will look for test -methods that include one of the three annotations mentioned above. - -This way of dynamically generating tests uses the JUnit concept of test templates. - -# JUnit Lifecycle - -JUnit discovers test template methods that are annotated with `@ClusterTest`, -`@ClusterTests`, or `@ClusterTemplate`. These annotations are processed and some -number of test invocations are created. - -For each generated test invocation we have the following lifecycle: - -* Static `@BeforeAll` methods are called -* Test class is instantiated -* Kafka Cluster is started (if autoStart=true) -* Non-static `@BeforeEach` methods are called -* Test method is invoked -* Kafka Cluster is stopped -* Non-static `@AfterEach` methods are called -* Static `@AfterAll` methods are called - -`@BeforeEach` methods give an opportunity to set up additional test dependencies -after the cluster has started but before the test method is run. - -# Dependency Injection - -A ClusterInstance object can be injected into the test method or the test class constructor. -This object is a shim to the underlying test framework and provides access to things like -SocketServers and has convenience factory methods for getting a client. - -The class is introduced to provide context to the underlying cluster and to provide reusable -functionality that was previously garnered from the test hierarchy. - -Common usage is to inject this class into a test method - -```java -class SampleTest { - @ClusterTest - public void testOne(ClusterInstance cluster) { - this.cluster.admin().createTopics(...); - // Test code - } -} -``` - -For cases where there is common setup code that involves the cluster (such as -creating topics), it is possible to access the ClusterInstance from a `@BeforeEach` -method. This requires injecting the object in the constructor. For example, - -```java -class SampleTest { - private final ClusterInstance cluster; - - SampleTest(ClusterInstance cluster) { - this.cluster = cluster; - } - - @BeforeEach - public void setup() { - // Common setup code with started ClusterInstance - this.cluster.admin().createTopics(...); - } - - @ClusterTest - public void testOne() { - // Test code - } -} -``` - -It is okay to inject the ClusterInstance in both ways. The same object will be -provided in either case. - -# Gotchas -* Cluster tests are not compatible with other test templates like `@ParameterizedTest` -* Test methods annotated with JUnit's `@Test` will still be run, but no cluster will be started and no dependency - injection will happen. This is generally not what you want. -* Even though ClusterConfig is accessible, it is immutable inside the test method. diff --git a/test-common/test-common-internal-api/bin/test/log4j2.yaml b/test-common/test-common-internal-api/bin/test/log4j2.yaml deleted file mode 100644 index be546a18b5..0000000000 --- a/test-common/test-common-internal-api/bin/test/log4j2.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -Configuration: - Properties: - Property: - - name: "logPattern" - value: "[%d] %p %m (%c:%L)%n" - - Appenders: - Console: - name: STDOUT - PatternLayout: - pattern: "${logPattern}" - - Loggers: - Root: - level: INFO - AppenderRef: - - ref: STDOUT - Logger: - - name: org.apache.kafka - level: INFO diff --git a/test-common/test-common-runtime/bin/main/META-INF/services/org.junit.jupiter.api.extension.Extension b/test-common/test-common-runtime/bin/main/META-INF/services/org.junit.jupiter.api.extension.Extension deleted file mode 100644 index d7f05be7df..0000000000 --- a/test-common/test-common-runtime/bin/main/META-INF/services/org.junit.jupiter.api.extension.Extension +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -org.apache.kafka.common.test.junit.ClusterTestExtensions \ No newline at end of file diff --git a/test-common/test-common-runtime/bin/main/log4j2.yaml b/test-common/test-common-runtime/bin/main/log4j2.yaml deleted file mode 100644 index be546a18b5..0000000000 --- a/test-common/test-common-runtime/bin/main/log4j2.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -Configuration: - Properties: - Property: - - name: "logPattern" - value: "[%d] %p %m (%c:%L)%n" - - Appenders: - Console: - name: STDOUT - PatternLayout: - pattern: "${logPattern}" - - Loggers: - Root: - level: INFO - AppenderRef: - - ref: STDOUT - Logger: - - name: org.apache.kafka - level: INFO diff --git a/transaction-coordinator/bin/main/common/message/TransactionLogKey.json b/transaction-coordinator/bin/main/common/message/TransactionLogKey.json deleted file mode 100644 index 6caa46e4b6..0000000000 --- a/transaction-coordinator/bin/main/common/message/TransactionLogKey.json +++ /dev/null @@ -1,26 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -{ - "apiKey": 0, - "type": "coordinator-key", - "name": "TransactionLogKey", - "validVersions": "0", - "flexibleVersions": "none", - "fields": [ - { "name": "TransactionalId", "type": "string", "versions": "0", - "about": "The transactional id of the transaction."} - ] -} diff --git a/transaction-coordinator/bin/main/common/message/TransactionLogValue.json b/transaction-coordinator/bin/main/common/message/TransactionLogValue.json deleted file mode 100644 index 93762afa38..0000000000 --- a/transaction-coordinator/bin/main/common/message/TransactionLogValue.json +++ /dev/null @@ -1,53 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -{ - "apiKey": 0, - "type": "coordinator-value", - "name": "TransactionLogValue", - // Version 1 is the first flexible version. - // KIP-915: bumping the version will no longer make this record backward compatible. - // We suggest to add/remove only tagged fields to maintain backward compatibility. - "validVersions": "0-1", - "flexibleVersions": "1+", - "fields": [ - { "name": "ProducerId", "type": "int64", "versions": "0+", - "about": "Producer id in use by the transactional id."}, - { "name": "PreviousProducerId", "type": "int64", "taggedVersions": "1+", "tag": 0, "default": -1, - "about": "Producer id used by the last committed transaction."}, - { "name": "NextProducerId", "type": "int64", "taggedVersions": "1+", "tag": 1, "default": -1, - "about": "Latest producer ID sent to the producer for the given transactional id."}, - { "name": "ProducerEpoch", "type": "int16", "versions": "0+", - "about": "Epoch associated with the producer id."}, - { "name": "NextProducerEpoch", "type": "int16", "default": -1, "taggedVersions": "1+", "tag": 3, - "about": "Producer epoch associated with the NextProducerId"}, - { "name": "TransactionTimeoutMs", "type": "int32", "versions": "0+", - "about": "Transaction timeout in milliseconds."}, - { "name": "TransactionStatus", "type": "int8", "versions": "0+", - "about": "TransactionState the transaction is in."}, - { "name": "TransactionPartitions", "type": "[]PartitionsSchema", "versions": "0+", "nullableVersions": "0+", - "about": "Partitions involved in the transaction.", "fields": [ - { "name": "Topic", "type": "string", "versions": "0+", - "about": "Topic involved in the transaction."}, - { "name": "PartitionIds", "type": "[]int32", "versions": "0+", - "about": "Partition ids involved in the transaction."}]}, - { "name": "TransactionLastUpdateTimestampMs", "type": "int64", "versions": "0+", - "about": "Time the transaction was last updated."}, - { "name": "TransactionStartTimestampMs", "type": "int64", "versions": "0+", - "about": "Time the transaction was started."}, - { "name": "ClientTransactionVersion", "type": "int16", "default": 0, "taggedVersions": "1+", "tag": 2, - "about": "The transaction version used by the client."} - ] -} From adc34ac411d7f4f63d45b7193103beebf9cd989f Mon Sep 17 00:00:00 2001 From: Giuseppe Lillo Date: Fri, 9 Jan 2026 16:47:52 +0100 Subject: [PATCH 4/7] a --- ...rg.apache.kafka.connect.sink.SinkConnector | 16 - ...pache.kafka.connect.source.SourceConnector | 16 - connect/file/bin/test/log4j2.yaml | 35 - server-common/bin/test/log4j2.yaml | 35 - .../main/common/message/ShareSnapshotKey.json | 31 - .../common/message/ShareSnapshotValue.json | 48 - .../main/common/message/ShareUpdateKey.json | 31 - .../main/common/message/ShareUpdateValue.json | 42 - shell/bin/test/log4j2.yaml | 32 - .../scala/FunctionsCompatConversions.scala | 141 --- .../streams/scala/ImplicitConversions.scala | 107 --- .../kafka/streams/scala/StreamsBuilder.scala | 195 ---- .../streams/scala/kstream/Branched.scala | 67 -- .../scala/kstream/BranchedKStream.scala | 118 --- .../scala/kstream/CogroupedKStream.scala | 113 --- .../streams/scala/kstream/Consumed.scala | 115 --- .../kafka/streams/scala/kstream/Grouped.scala | 51 - .../kafka/streams/scala/kstream/Joined.scala | 66 -- .../scala/kstream/KGroupedStream.scala | 190 ---- .../streams/scala/kstream/KGroupedTable.scala | 145 --- .../kafka/streams/scala/kstream/KStream.scala | 877 ------------------ .../kafka/streams/scala/kstream/KTable.scala | 806 ---------------- .../streams/scala/kstream/Materialized.scala | 114 --- .../streams/scala/kstream/Produced.scala | 60 -- .../streams/scala/kstream/Repartitioned.scala | 87 -- .../SessionWindowedCogroupedKStream.scala | 64 -- .../kstream/SessionWindowedKStream.scala | 148 --- .../streams/scala/kstream/StreamJoined.scala | 91 -- .../TimeWindowedCogroupedKStream.scala | 62 -- .../scala/kstream/TimeWindowedKStream.scala | 142 --- .../kafka/streams/scala/kstream/package.scala | 31 - .../apache/kafka/streams/scala/package.scala | 26 - .../streams/scala/serialization/Serdes.scala | 89 -- streams/streams-scala/bin/test/log4j2.yaml | 32 - .../kafka/streams/scala/TopologyTest.scala | 470 ---------- .../streams/scala/kstream/ConsumedTest.scala | 74 -- .../streams/scala/kstream/GroupedTest.scala | 46 - .../streams/scala/kstream/JoinedTest.scala | 44 - .../scala/kstream/KStreamSplitTest.scala | 125 --- .../streams/scala/kstream/KStreamTest.scala | 419 --------- .../streams/scala/kstream/KTableTest.scala | 617 ------------ .../scala/kstream/MaterializedTest.scala | 88 -- .../streams/scala/kstream/ProducedTest.scala | 61 -- .../scala/kstream/RepartitionedTest.scala | 111 --- .../scala/kstream/StreamJoinedTest.scala | 85 -- .../streams/scala/utils/TestDriver.scala | 41 - streams/test-utils/bin/test/log4j2.yaml | 35 - 47 files changed, 6439 deletions(-) delete mode 100644 connect/file/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector delete mode 100644 connect/file/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector delete mode 100644 connect/file/bin/test/log4j2.yaml delete mode 100644 server-common/bin/test/log4j2.yaml delete mode 100644 share-coordinator/bin/main/common/message/ShareSnapshotKey.json delete mode 100644 share-coordinator/bin/main/common/message/ShareSnapshotValue.json delete mode 100644 share-coordinator/bin/main/common/message/ShareUpdateKey.json delete mode 100644 share-coordinator/bin/main/common/message/ShareUpdateValue.json delete mode 100644 shell/bin/test/log4j2.yaml delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/ImplicitConversions.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/StreamsBuilder.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Branched.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/BranchedKStream.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/CogroupedKStream.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Consumed.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Grouped.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Joined.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedStream.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedTable.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KStream.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KTable.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Materialized.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Produced.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Repartitioned.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedCogroupedKStream.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedKStream.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/StreamJoined.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedCogroupedKStream.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedKStream.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/package.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/package.scala delete mode 100644 streams/streams-scala/bin/main/org/apache/kafka/streams/scala/serialization/Serdes.scala delete mode 100644 streams/streams-scala/bin/test/log4j2.yaml delete mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/TopologyTest.scala delete mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ConsumedTest.scala delete mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/GroupedTest.scala delete mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/JoinedTest.scala delete mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamSplitTest.scala delete mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamTest.scala delete mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KTableTest.scala delete mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/MaterializedTest.scala delete mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ProducedTest.scala delete mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/RepartitionedTest.scala delete mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/StreamJoinedTest.scala delete mode 100644 streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/TestDriver.scala delete mode 100644 streams/test-utils/bin/test/log4j2.yaml diff --git a/connect/file/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector b/connect/file/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector deleted file mode 100644 index 4acecd76b5..0000000000 --- a/connect/file/bin/main/META-INF/services/org.apache.kafka.connect.sink.SinkConnector +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.file.FileStreamSinkConnector \ No newline at end of file diff --git a/connect/file/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector b/connect/file/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector deleted file mode 100644 index 66a0c5d858..0000000000 --- a/connect/file/bin/main/META-INF/services/org.apache.kafka.connect.source.SourceConnector +++ /dev/null @@ -1,16 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.file.FileStreamSourceConnector \ No newline at end of file diff --git a/connect/file/bin/test/log4j2.yaml b/connect/file/bin/test/log4j2.yaml deleted file mode 100644 index 1e9f550fa6..0000000000 --- a/connect/file/bin/test/log4j2.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -Configuration: - Properties: - Property: - - name: "logPattern" - value: "[%d] %p %X{connector.context}%m (%c:%L)%n" - - Appenders: - Console: - name: STDOUT - PatternLayout: - pattern: "${logPattern}" - - Loggers: - Root: - level: INFO - AppenderRef: - - ref: STDOUT - Logger: - - name: kafka - level: WARN diff --git a/server-common/bin/test/log4j2.yaml b/server-common/bin/test/log4j2.yaml deleted file mode 100644 index be546a18b5..0000000000 --- a/server-common/bin/test/log4j2.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -Configuration: - Properties: - Property: - - name: "logPattern" - value: "[%d] %p %m (%c:%L)%n" - - Appenders: - Console: - name: STDOUT - PatternLayout: - pattern: "${logPattern}" - - Loggers: - Root: - level: INFO - AppenderRef: - - ref: STDOUT - Logger: - - name: org.apache.kafka - level: INFO diff --git a/share-coordinator/bin/main/common/message/ShareSnapshotKey.json b/share-coordinator/bin/main/common/message/ShareSnapshotKey.json deleted file mode 100644 index feeb6d4ee1..0000000000 --- a/share-coordinator/bin/main/common/message/ShareSnapshotKey.json +++ /dev/null @@ -1,31 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -{ - "apiKey": 0, - "type": "coordinator-key", - "name": "ShareSnapshotKey", - "validVersions": "0", - "flexibleVersions": "none", - "fields": [ - { "name": "GroupId", "type": "string", "versions": "0", - "about": "The group id." }, - { "name": "TopicId", "type": "uuid", "versions": "0", - "about": "The topic id." }, - { "name": "Partition", "type": "int32", "versions": "0", - "about": "The partition index." } - ] -} - diff --git a/share-coordinator/bin/main/common/message/ShareSnapshotValue.json b/share-coordinator/bin/main/common/message/ShareSnapshotValue.json deleted file mode 100644 index 6126cfd0a2..0000000000 --- a/share-coordinator/bin/main/common/message/ShareSnapshotValue.json +++ /dev/null @@ -1,48 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -{ - "apiKey": 0, - "type": "coordinator-value", - "name": "ShareSnapshotValue", - "validVersions": "0", - "flexibleVersions": "0+", - "fields": [ - { "name": "SnapshotEpoch", "type": "uint16", "versions": "0+", - "about": "The snapshot epoch." }, - { "name": "StateEpoch", "type": "int32", "versions": "0+", - "about": "The state epoch for this share-partition." }, - { "name": "LeaderEpoch", "type": "int32", "versions": "0+", - "about": "The leader epoch of the share-partition." }, - { "name": "StartOffset", "type": "int64", "versions": "0+", - "about": "The share-partition start offset." }, - { "name": "CreateTimestamp", "type": "int64", "versions": "0+", - "about": "The time at which the state was created." }, - { "name": "WriteTimestamp", "type": "int64", "versions": "0+", - "about": "The time at which the state was written or rewritten." }, - { "name": "StateBatches", "type": "[]StateBatch", "versions": "0+", - "about": "The state batches.", "fields": [ - { "name": "FirstOffset", "type": "int64", "versions": "0+", - "about": "The first offset of this state batch." }, - { "name": "LastOffset", "type": "int64", "versions": "0+", - "about": "The last offset of this state batch." }, - { "name": "DeliveryState", "type": "int8", "versions": "0+", - "about": "The delivery state - 0:Available,2:Acked,4:Archived." }, - { "name": "DeliveryCount", "type": "int16", "versions": "0+", - "about": "The delivery count." } - ]} - ] -} - diff --git a/share-coordinator/bin/main/common/message/ShareUpdateKey.json b/share-coordinator/bin/main/common/message/ShareUpdateKey.json deleted file mode 100644 index f026b840bc..0000000000 --- a/share-coordinator/bin/main/common/message/ShareUpdateKey.json +++ /dev/null @@ -1,31 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -{ - "apiKey": 1, - "type": "coordinator-key", - "name": "ShareUpdateKey", - "validVersions": "0", - "flexibleVersions": "none", - "fields": [ - { "name": "GroupId", "type": "string", "versions": "0", - "about": "The group id." }, - { "name": "TopicId", "type": "uuid", "versions": "0", - "about": "The topic id." }, - { "name": "Partition", "type": "int32", "versions": "0", - "about": "The partition index." } - ] -} - diff --git a/share-coordinator/bin/main/common/message/ShareUpdateValue.json b/share-coordinator/bin/main/common/message/ShareUpdateValue.json deleted file mode 100644 index 35e31462a9..0000000000 --- a/share-coordinator/bin/main/common/message/ShareUpdateValue.json +++ /dev/null @@ -1,42 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -{ - "apiKey": 1, - "type": "coordinator-value", - "name": "ShareUpdateValue", - "validVersions": "0", - "flexibleVersions": "0+", - "fields": [ - { "name": "SnapshotEpoch", "type": "uint16", "versions": "0+", - "about": "The snapshot epoch." }, - { "name": "LeaderEpoch", "type": "int32", "versions": "0+", - "about": "The leader epoch of the share-partition." }, - { "name": "StartOffset", "type": "int64", "versions": "0+", - "about": "The share-partition start offset, or -1 if the start offset is not being updated." }, - { "name": "StateBatches", "type": "[]StateBatch", "versions": "0+", - "about": "The state batches that have been updated.", "fields": [ - { "name": "FirstOffset", "type": "int64", "versions": "0+", - "about": "The first offset of this state batch." }, - { "name": "LastOffset", "type": "int64", "versions": "0+", - "about": "The last offset of this state batch." }, - { "name": "DeliveryState", "type": "int8", "versions": "0+", - "about": "The delivery state - 0:Available,2:Acked,4:Archived." }, - { "name": "DeliveryCount", "type": "int16", "versions": "0+", - "about": "The delivery count." } - ]} - ] -} - diff --git a/shell/bin/test/log4j2.yaml b/shell/bin/test/log4j2.yaml deleted file mode 100644 index c229cbce31..0000000000 --- a/shell/bin/test/log4j2.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -Configuration: - Properties: - Property: - - name: "logPattern" - value: "[%d] %p %m (%c:%L)%n" - - Appenders: - Console: - name: STDOUT - PatternLayout: - pattern: "${logPattern}" - - Loggers: - Root: - level: DEBUG - AppenderRef: - - ref: STDOUT diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala deleted file mode 100644 index 527f0544d0..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/FunctionsCompatConversions.scala +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala - -import org.apache.kafka.streams.KeyValue -import org.apache.kafka.streams.kstream._ -import scala.jdk.CollectionConverters._ -import java.lang.{Iterable => JIterable} - -import org.apache.kafka.streams.processor.ProcessorContext - -/** - * Implicit classes that offer conversions of Scala function literals to SAM (Single Abstract Method) objects in Java. - * These make the Scala APIs much more expressive, with less boilerplate and more succinct. - */ -private[scala] object FunctionsCompatConversions { - - implicit class ForeachActionFromFunction[K, V](val p: (K, V) => Unit) extends AnyVal { - def asForeachAction: ForeachAction[K, V] = (key: K, value: V) => p(key, value) - } - - implicit class PredicateFromFunction[K, V](val p: (K, V) => Boolean) extends AnyVal { - def asPredicate: Predicate[K, V] = (key: K, value: V) => p(key, value) - } - - implicit class MapperFromFunction[T, U, VR](val f: (T, U) => VR) extends AnyVal { - def asKeyValueMapper: KeyValueMapper[T, U, VR] = (key: T, value: U) => f(key, value) - def asValueJoiner: ValueJoiner[T, U, VR] = (value1: T, value2: U) => f(value1, value2) - } - - implicit class KeyValueMapperFromFunction[K, V, KR, VR](val f: (K, V) => (KR, VR)) extends AnyVal { - def asKeyValueMapper: KeyValueMapper[K, V, KeyValue[KR, VR]] = (key: K, value: V) => { - val (kr, vr) = f(key, value) - KeyValue.pair(kr, vr) - } - } - - implicit class FunctionFromFunction[V, VR](val f: V => VR) extends AnyVal { - def asJavaFunction: java.util.function.Function[V, VR] = (value: V) => f(value) - } - - implicit class ValueMapperFromFunction[V, VR](val f: V => VR) extends AnyVal { - def asValueMapper: ValueMapper[V, VR] = (value: V) => f(value) - } - - implicit class FlatValueMapperFromFunction[V, VR](val f: V => Iterable[VR]) extends AnyVal { - def asValueMapper: ValueMapper[V, JIterable[VR]] = (value: V) => f(value).asJava - } - - implicit class ValueMapperWithKeyFromFunction[K, V, VR](val f: (K, V) => VR) extends AnyVal { - def asValueMapperWithKey: ValueMapperWithKey[K, V, VR] = (readOnlyKey: K, value: V) => f(readOnlyKey, value) - } - - implicit class FlatValueMapperWithKeyFromFunction[K, V, VR](val f: (K, V) => Iterable[VR]) extends AnyVal { - def asValueMapperWithKey: ValueMapperWithKey[K, V, JIterable[VR]] = - (readOnlyKey: K, value: V) => f(readOnlyKey, value).asJava - } - - implicit class AggregatorFromFunction[K, V, VA](val f: (K, V, VA) => VA) extends AnyVal { - def asAggregator: Aggregator[K, V, VA] = (key: K, value: V, aggregate: VA) => f(key, value, aggregate) - } - - implicit class MergerFromFunction[K, VR](val f: (K, VR, VR) => VR) extends AnyVal { - def asMerger: Merger[K, VR] = (aggKey: K, aggOne: VR, aggTwo: VR) => f(aggKey, aggOne, aggTwo) - } - - implicit class ReducerFromFunction[V](val f: (V, V) => V) extends AnyVal { - def asReducer: Reducer[V] = (value1: V, value2: V) => f(value1, value2) - } - - implicit class InitializerFromFunction[VA](val f: () => VA) extends AnyVal { - def asInitializer: Initializer[VA] = () => f() - } - - @deprecated( - since = "4.0.0" - ) - implicit class TransformerSupplierFromFunction[K, V, VO](val f: () => Transformer[K, V, VO]) extends AnyVal { - def asTransformerSupplier: TransformerSupplier[K, V, VO] = () => f() - } - - @deprecated( - since = "4.0.0" - ) - implicit class TransformerSupplierAsJava[K, V, VO](val supplier: TransformerSupplier[K, V, Iterable[VO]]) - extends AnyVal { - def asJava: TransformerSupplier[K, V, JIterable[VO]] = () => { - val innerTransformer = supplier.get() - new Transformer[K, V, JIterable[VO]] { - override def transform(key: K, value: V): JIterable[VO] = innerTransformer.transform(key, value).asJava - override def init(context: ProcessorContext): Unit = innerTransformer.init(context) - override def close(): Unit = innerTransformer.close() - } - } - } - - @deprecated( - since = "4.0.0" - ) - implicit class ValueTransformerSupplierAsJava[V, VO](val supplier: ValueTransformerSupplier[V, Iterable[VO]]) - extends AnyVal { - def asJava: ValueTransformerSupplier[V, JIterable[VO]] = () => { - val innerTransformer = supplier.get() - new ValueTransformer[V, JIterable[VO]] { - override def transform(value: V): JIterable[VO] = innerTransformer.transform(value).asJava - override def init(context: ProcessorContext): Unit = innerTransformer.init(context) - override def close(): Unit = innerTransformer.close() - } - } - } - - @deprecated( - since = "4.0.0" - ) - implicit class ValueTransformerSupplierWithKeyAsJava[K, V, VO]( - val supplier: ValueTransformerWithKeySupplier[K, V, Iterable[VO]] - ) extends AnyVal { - def asJava: ValueTransformerWithKeySupplier[K, V, JIterable[VO]] = () => { - val innerTransformer = supplier.get() - new ValueTransformerWithKey[K, V, JIterable[VO]] { - override def transform(key: K, value: V): JIterable[VO] = innerTransformer.transform(key, value).asJava - override def init(context: ProcessorContext): Unit = innerTransformer.init(context) - override def close(): Unit = innerTransformer.close() - } - } - } -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/ImplicitConversions.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/ImplicitConversions.scala deleted file mode 100644 index 626038a315..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/ImplicitConversions.scala +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala - -import org.apache.kafka.common.serialization.Serde -import org.apache.kafka.streams.KeyValue -import org.apache.kafka.streams.kstream.{ - CogroupedKStream => CogroupedKStreamJ, - KGroupedStream => KGroupedStreamJ, - KGroupedTable => KGroupedTableJ, - KStream => KStreamJ, - KTable => KTableJ, - SessionWindowedCogroupedKStream => SessionWindowedCogroupedKStreamJ, - SessionWindowedKStream => SessionWindowedKStreamJ, - TimeWindowedCogroupedKStream => TimeWindowedCogroupedKStreamJ, - TimeWindowedKStream => TimeWindowedKStreamJ -} -import org.apache.kafka.streams.processor.StateStore -import org.apache.kafka.streams.scala.kstream._ - -/** - * Implicit conversions between the Scala wrapper objects and the underlying Java objects. - */ -object ImplicitConversions { - - implicit def wrapKStream[K, V](inner: KStreamJ[K, V]): KStream[K, V] = - new KStream[K, V](inner) - - implicit def wrapKGroupedStream[K, V](inner: KGroupedStreamJ[K, V]): KGroupedStream[K, V] = - new KGroupedStream[K, V](inner) - - implicit def wrapTimeWindowedKStream[K, V](inner: TimeWindowedKStreamJ[K, V]): TimeWindowedKStream[K, V] = - new TimeWindowedKStream[K, V](inner) - - implicit def wrapSessionWindowedKStream[K, V](inner: SessionWindowedKStreamJ[K, V]): SessionWindowedKStream[K, V] = - new SessionWindowedKStream[K, V](inner) - - implicit def wrapCogroupedKStream[K, V](inner: CogroupedKStreamJ[K, V]): CogroupedKStream[K, V] = - new CogroupedKStream[K, V](inner) - - implicit def wrapTimeWindowedCogroupedKStream[K, V]( - inner: TimeWindowedCogroupedKStreamJ[K, V] - ): TimeWindowedCogroupedKStream[K, V] = - new TimeWindowedCogroupedKStream[K, V](inner) - - implicit def wrapSessionWindowedCogroupedKStream[K, V]( - inner: SessionWindowedCogroupedKStreamJ[K, V] - ): SessionWindowedCogroupedKStream[K, V] = - new SessionWindowedCogroupedKStream[K, V](inner) - - implicit def wrapKTable[K, V](inner: KTableJ[K, V]): KTable[K, V] = - new KTable[K, V](inner) - - implicit def wrapKGroupedTable[K, V](inner: KGroupedTableJ[K, V]): KGroupedTable[K, V] = - new KGroupedTable[K, V](inner) - - implicit def tuple2ToKeyValue[K, V](tuple: (K, V)): KeyValue[K, V] = new KeyValue(tuple._1, tuple._2) - - // we would also like to allow users implicit serdes - // and these implicits will convert them to `Grouped`, `Produced` or `Consumed` - - implicit def consumedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Consumed[K, V] = - Consumed.`with`[K, V] - - implicit def groupedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Grouped[K, V] = - Grouped.`with`[K, V] - - implicit def joinedFromKeyValueOtherSerde[K, V, VO](implicit - keySerde: Serde[K], - valueSerde: Serde[V], - otherValueSerde: Serde[VO] - ): Joined[K, V, VO] = - Joined.`with`[K, V, VO] - - implicit def materializedFromSerde[K, V, S <: StateStore](implicit - keySerde: Serde[K], - valueSerde: Serde[V] - ): Materialized[K, V, S] = - Materialized.`with`[K, V, S] - - implicit def producedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Produced[K, V] = - Produced.`with`[K, V] - - implicit def repartitionedFromSerde[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): Repartitioned[K, V] = - Repartitioned.`with`[K, V] - - implicit def streamJoinFromKeyValueOtherSerde[K, V, VO](implicit - keySerde: Serde[K], - valueSerde: Serde[V], - otherValueSerde: Serde[VO] - ): StreamJoined[K, V, VO] = - StreamJoined.`with`[K, V, VO] -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/StreamsBuilder.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/StreamsBuilder.scala deleted file mode 100644 index 25f5ce339b..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/StreamsBuilder.scala +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala - -import java.util.Properties -import java.util.regex.Pattern - -import org.apache.kafka.streams.kstream.GlobalKTable -import org.apache.kafka.streams.processor.StateStore -import org.apache.kafka.streams.state.StoreBuilder -import org.apache.kafka.streams.{StreamsBuilder => StreamsBuilderJ, Topology} -import org.apache.kafka.streams.scala.kstream.{Consumed, KStream, KTable, Materialized} - -import scala.jdk.CollectionConverters._ - -/** - * Wraps the Java class StreamsBuilder and delegates method calls to the underlying Java object. - */ -class StreamsBuilder(inner: StreamsBuilderJ = new StreamsBuilderJ) { - - /** - * Create a [[kstream.KStream]] from the specified topic. - *

- * The `implicit Consumed` instance provides the values of `auto.offset.reset` strategy, `TimestampExtractor`, - * key and value deserializers etc. If the implicit is not found in scope, compiler error will result. - *

- * A convenient alternative is to have the necessary implicit serdes in scope, which will be implicitly - * converted to generate an instance of `Consumed`. @see [[ImplicitConversions]]. - * {{{ - * // Brings all implicit conversions in scope - * import ImplicitConversions._ - * - * // Bring implicit default serdes in scope - * import Serdes._ - * - * val builder = new StreamsBuilder() - * - * // stream function gets the implicit Consumed which is constructed automatically - * // from the serdes through the implicits in ImplicitConversions#consumedFromSerde - * val userClicksStream: KStream[String, Long] = builder.stream(userClicksTopic) - * }}} - * - * @param topic the topic name - * @return a [[kstream.KStream]] for the specified topic - */ - def stream[K, V](topic: String)(implicit consumed: Consumed[K, V]): KStream[K, V] = - new KStream(inner.stream[K, V](topic, consumed)) - - /** - * Create a [[kstream.KStream]] from the specified topics. - * - * @param topics the topic names - * @return a [[kstream.KStream]] for the specified topics - * @see #stream(String) - * @see `org.apache.kafka.streams.StreamsBuilder#stream` - */ - def stream[K, V](topics: Set[String])(implicit consumed: Consumed[K, V]): KStream[K, V] = - new KStream(inner.stream[K, V](topics.asJava, consumed)) - - /** - * Create a [[kstream.KStream]] from the specified topic pattern. - * - * @param topicPattern the topic name pattern - * @return a [[kstream.KStream]] for the specified topics - * @see #stream(String) - * @see `org.apache.kafka.streams.StreamsBuilder#stream` - */ - def stream[K, V](topicPattern: Pattern)(implicit consumed: Consumed[K, V]): KStream[K, V] = - new KStream(inner.stream[K, V](topicPattern, consumed)) - - /** - * Create a [[kstream.KTable]] from the specified topic. - *

- * The `implicit Consumed` instance provides the values of `auto.offset.reset` strategy, `TimestampExtractor`, - * key and value deserializers etc. If the implicit is not found in scope, compiler error will result. - *

- * A convenient alternative is to have the necessary implicit serdes in scope, which will be implicitly - * converted to generate an instance of `Consumed`. @see [[ImplicitConversions]]. - * {{{ - * // Brings all implicit conversions in scope - * import ImplicitConversions._ - * - * // Bring implicit default serdes in scope - * import Serdes._ - * - * val builder = new StreamsBuilder() - * - * // stream function gets the implicit Consumed which is constructed automatically - * // from the serdes through the implicits in ImplicitConversions#consumedFromSerde - * val userClicksStream: KTable[String, Long] = builder.table(userClicksTopic) - * }}} - * - * @param topic the topic name - * @return a [[kstream.KTable]] for the specified topic - * @see `org.apache.kafka.streams.StreamsBuilder#table` - */ - def table[K, V](topic: String)(implicit consumed: Consumed[K, V]): KTable[K, V] = - new KTable(inner.table[K, V](topic, consumed)) - - /** - * Create a [[kstream.KTable]] from the specified topic. - * - * @param topic the topic name - * @param materialized the instance of `Materialized` used to materialize a state store - * @return a [[kstream.KTable]] for the specified topic - * @see #table(String) - * @see `org.apache.kafka.streams.StreamsBuilder#table` - */ - def table[K, V](topic: String, materialized: Materialized[K, V, ByteArrayKeyValueStore])(implicit - consumed: Consumed[K, V] - ): KTable[K, V] = - new KTable(inner.table[K, V](topic, consumed, materialized)) - - /** - * Create a `GlobalKTable` from the specified topic. The serializers from the implicit `Consumed` - * instance will be used. Input records with `null` key will be dropped. - * - * @param topic the topic name - * @return a `GlobalKTable` for the specified topic - * @see `org.apache.kafka.streams.StreamsBuilder#globalTable` - */ - def globalTable[K, V](topic: String)(implicit consumed: Consumed[K, V]): GlobalKTable[K, V] = - inner.globalTable(topic, consumed) - - /** - * Create a `GlobalKTable` from the specified topic. The resulting `GlobalKTable` will be materialized - * in a local `KeyValueStore` configured with the provided instance of `Materialized`. The serializers - * from the implicit `Consumed` instance will be used. - * - * @param topic the topic name - * @param materialized the instance of `Materialized` used to materialize a state store - * @return a `GlobalKTable` for the specified topic - * @see `org.apache.kafka.streams.StreamsBuilder#globalTable` - */ - def globalTable[K, V](topic: String, materialized: Materialized[K, V, ByteArrayKeyValueStore])(implicit - consumed: Consumed[K, V] - ): GlobalKTable[K, V] = - inner.globalTable(topic, consumed, materialized) - - /** - * Adds a state store to the underlying `Topology`. The store must still be "connected" to a `Processor`, - * `Transformer`, or `ValueTransformer` before it can be used. - *

- * It is required to connect state stores to `Processor`, `Transformer`, or `ValueTransformer` before they can be used. - * - * @param builder the builder used to obtain this state store `StateStore` instance - * @return the underlying Java abstraction `StreamsBuilder` after adding the `StateStore` - * @throws org.apache.kafka.streams.errors.TopologyException if state store supplier is already added - * @see `org.apache.kafka.streams.StreamsBuilder#addStateStore` - */ - def addStateStore(builder: StoreBuilder[_ <: StateStore]): StreamsBuilderJ = inner.addStateStore(builder) - - /** - * Adds a global `StateStore` to the topology. Global stores should not be added to `Processor`, `Transformer`, - * or `ValueTransformer` (in contrast to regular stores). - *

- * It is not required to connect a global store to `Processor`, `Transformer`, or `ValueTransformer`; - * those have read-only access to all global stores by default. - * - * @see `org.apache.kafka.streams.StreamsBuilder#addGlobalStore` - */ - def addGlobalStore[K, V]( - storeBuilder: StoreBuilder[_ <: StateStore], - topic: String, - consumed: Consumed[K, V], - stateUpdateSupplier: org.apache.kafka.streams.processor.api.ProcessorSupplier[K, V, Void, Void] - ): StreamsBuilderJ = - inner.addGlobalStore(storeBuilder, topic, consumed, stateUpdateSupplier) - - def build(): Topology = inner.build() - - /** - * Returns the `Topology` that represents the specified processing logic and accepts - * a `Properties` instance used to indicate whether to optimize topology or not. - * - * @param props the `Properties` used for building possibly optimized topology - * @return the `Topology` that represents the specified processing logic - * @see `org.apache.kafka.streams.StreamsBuilder#build` - */ - def build(props: Properties): Topology = inner.build(props) -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Branched.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Branched.scala deleted file mode 100644 index 63bcf323af..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Branched.scala +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.streams.kstream.{Branched => BranchedJ, KStream => KStreamJ} - -object Branched { - - /** - * Create an instance of `Branched` with provided branch name suffix. - * - * @param name the branch name suffix to be used (see [[BranchedKStream]] description for details) - * @tparam K key type - * @tparam V value type - * @return a new instance of `Branched` - */ - def as[K, V](name: String): BranchedJ[K, V] = - BranchedJ.as[K, V](name) - - /** - * Create an instance of `Branched` with provided chain function and branch name suffix. - * - * @param chain A function that will be applied to the branch. If the provided function returns - * `null`, its result is ignored, otherwise it is added to the Map returned - * by [[BranchedKStream.defaultBranch()*]] or [[BranchedKStream.noDefaultBranch]] (see - * [[BranchedKStream]] description for details). - * @param name the branch name suffix to be used. If `null`, a default branch name suffix will be generated - * (see [[BranchedKStream]] description for details) - * @tparam K key type - * @tparam V value type - * @return a new instance of `Branched` - * @see `org.apache.kafka.streams.kstream.Branched#withFunction(java.util.function.Function, java.lang.String)` - */ - def withFunction[K, V](chain: KStream[K, V] => KStream[K, V], name: String = null): BranchedJ[K, V] = - BranchedJ.withFunction((f: KStreamJ[K, V]) => chain.apply(new KStream[K, V](f)).inner, name) - - /** - * Create an instance of `Branched` with provided chain consumer and branch name suffix. - * - * @param chain A consumer to which the branch will be sent. If a non-null consumer is provided here, - * the respective branch will not be added to the resulting Map returned - * by [[BranchedKStream.defaultBranch()*]] or [[BranchedKStream.noDefaultBranch]] (see - * [[BranchedKStream]] description for details). - * @param name the branch name suffix to be used. If `null`, a default branch name suffix will be generated - * (see [[BranchedKStream]] description for details) - * @tparam K key type - * @tparam V value type - * @return a new instance of `Branched` - * @see `org.apache.kafka.streams.kstream.Branched#withConsumer(java.util.function.Consumer, java.lang.String)` - */ - def withConsumer[K, V](chain: KStream[K, V] => Unit, name: String = null): BranchedJ[K, V] = - BranchedJ.withConsumer((c: KStreamJ[K, V]) => chain.apply(new KStream[K, V](c)), name) -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/BranchedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/BranchedKStream.scala deleted file mode 100644 index 196198f6e1..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/BranchedKStream.scala +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import java.util - -import org.apache.kafka.streams.kstream -import org.apache.kafka.streams.kstream.{BranchedKStream => BranchedKStreamJ} -import org.apache.kafka.streams.scala.FunctionsCompatConversions.PredicateFromFunction - -import scala.jdk.CollectionConverters._ - -/** - * Branches the records in the original stream based on the predicates supplied for the branch definitions. - *

- * Branches are defined with [[branch]] or [[defaultBranch()*]] - * methods. Each record is evaluated against the predicates - * supplied via [[Branched]] parameters, and is routed to the first branch for which its respective predicate - * evaluates to `true`. If a record does not match any predicates, it will be routed to the default branch, - * or dropped if no default branch is created. - *

- * - * Each branch (which is a [[KStream]] instance) then can be processed either by - * a function or a consumer provided via a [[Branched]] - * parameter. If certain conditions are met, it also can be accessed from the `Map` returned by - * an optional [[defaultBranch()*]] or [[noDefaultBranch]] method call. - *

- * The branching happens on a first match basis: A record in the original stream is assigned to the corresponding result - * stream for the first predicate that evaluates to true, and is assigned to this stream only. If you need - * to route a record to multiple streams, you can apply multiple - * [[KStream.filter]] operators to the same [[KStream]] - * instance, one for each predicate, instead of branching. - *

- * The process of routing the records to different branches is a stateless record-by-record operation. - * - * @tparam K Type of keys - * @tparam V Type of values - */ -class BranchedKStream[K, V](val inner: BranchedKStreamJ[K, V]) { - - /** - * Define a branch for records that match the predicate. - * - * @param predicate A predicate against which each record will be evaluated. - * If this predicate returns `true` for a given record, the record will be - * routed to the current branch and will not be evaluated against the predicates - * for the remaining branches. - * @return `this` to facilitate method chaining - */ - def branch(predicate: (K, V) => Boolean): BranchedKStream[K, V] = { - inner.branch(predicate.asPredicate) - this - } - - /** - * Define a branch for records that match the predicate. - * - * @param predicate A predicate against which each record will be evaluated. - * If this predicate returns `true` for a given record, the record will be - * routed to the current branch and will not be evaluated against the predicates - * for the remaining branches. - * @param branched A [[Branched]] parameter, that allows to define a branch name, an in-place - * branch consumer or branch mapper (see code examples - * for [[BranchedKStream]]) - * @return `this` to facilitate method chaining - */ - def branch(predicate: (K, V) => Boolean, branched: Branched[K, V]): BranchedKStream[K, V] = { - inner.branch(predicate.asPredicate, branched) - this - } - - /** - * Finalize the construction of branches and defines the default branch for the messages not intercepted - * by other branches. Calling [[defaultBranch()*]] or [[noDefaultBranch]] is optional. - * - * @return Map of named branches. For rules of forming the resulting map, see [[BranchedKStream]] - * description. - */ - def defaultBranch(): Map[String, KStream[K, V]] = toScalaMap(inner.defaultBranch()) - - /** - * Finalize the construction of branches and defines the default branch for the messages not intercepted - * by other branches. Calling [[defaultBranch()*]] or [[noDefaultBranch]] is optional. - * - * @param branched A [[Branched]] parameter, that allows to define a branch name, an in-place - * branch consumer or branch mapper for [[BranchedKStream]]. - * @return Map of named branches. For rules of forming the resulting map, see [[BranchedKStream]] - * description. - */ - def defaultBranch(branched: Branched[K, V]): Map[String, KStream[K, V]] = toScalaMap(inner.defaultBranch(branched)) - - /** - * Finalizes the construction of branches without forming a default branch. - * - * @return Map of named branches. For rules of forming the resulting map, see [[BranchedKStream]] - * description. - */ - def noDefaultBranch(): Map[String, KStream[K, V]] = toScalaMap(inner.noDefaultBranch()) - - private def toScalaMap(m: util.Map[String, kstream.KStream[K, V]]): collection.immutable.Map[String, KStream[K, V]] = - m.asScala.map { case (name, kStreamJ) => - (name, new KStream(kStreamJ)) - }.toMap -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/CogroupedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/CogroupedKStream.scala deleted file mode 100644 index f8f33e7b4e..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/CogroupedKStream.scala +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala -package kstream - -import org.apache.kafka.streams.kstream.{ - CogroupedKStream => CogroupedKStreamJ, - SessionWindows, - SlidingWindows, - Window, - Windows -} -import org.apache.kafka.streams.scala.FunctionsCompatConversions.{AggregatorFromFunction, InitializerFromFunction} - -/** - * Wraps the Java class CogroupedKStream and delegates method calls to the underlying Java object. - * - * @tparam KIn Type of keys - * @tparam VOut Type of values - * @param inner The underlying Java abstraction for CogroupedKStream - * @see `org.apache.kafka.streams.kstream.CogroupedKStream` - */ -class CogroupedKStream[KIn, VOut](val inner: CogroupedKStreamJ[KIn, VOut]) { - - /** - * Add an already [[KGroupedStream]] to this [[CogroupedKStream]]. - * - * @param groupedStream a group stream - * @param aggregator a function that computes a new aggregate result - * @return a [[CogroupedKStream]] - */ - def cogroup[VIn]( - groupedStream: KGroupedStream[KIn, VIn], - aggregator: (KIn, VIn, VOut) => VOut - ): CogroupedKStream[KIn, VOut] = - new CogroupedKStream(inner.cogroup(groupedStream.inner, aggregator.asAggregator)) - - /** - * Aggregate the values of records in these streams by the grouped key and defined window. - * - * @param initializer an `Initializer` that computes an initial intermediate aggregation result. - * Cannot be { @code null}. - * @param materialized an instance of `Materialized` used to materialize a state store. - * Cannot be { @code null}. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the latest - * (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.CogroupedKStream#aggregate` - */ - def aggregate(initializer: => VOut)(implicit - materialized: Materialized[KIn, VOut, ByteArrayKeyValueStore] - ): KTable[KIn, VOut] = new KTable(inner.aggregate((() => initializer).asInitializer, materialized)) - - /** - * Aggregate the values of records in these streams by the grouped key and defined window. - * - * @param initializer an `Initializer` that computes an initial intermediate aggregation result. - * Cannot be { @code null}. - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized an instance of `Materialized` used to materialize a state store. - * Cannot be { @code null}. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the latest - * (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.CogroupedKStream#aggregate` - */ - def aggregate(initializer: => VOut, named: Named)(implicit - materialized: Materialized[KIn, VOut, ByteArrayKeyValueStore] - ): KTable[KIn, VOut] = new KTable(inner.aggregate((() => initializer).asInitializer, named, materialized)) - - /** - * Create a new [[TimeWindowedCogroupedKStream]] instance that can be used to perform windowed aggregations. - * - * @param windows the specification of the aggregation `Windows` - * @return an instance of [[TimeWindowedCogroupedKStream]] - * @see `org.apache.kafka.streams.kstream.CogroupedKStream#windowedBy` - */ - def windowedBy[W <: Window](windows: Windows[W]): TimeWindowedCogroupedKStream[KIn, VOut] = - new TimeWindowedCogroupedKStream(inner.windowedBy(windows)) - - /** - * Create a new [[TimeWindowedCogroupedKStream]] instance that can be used to perform sliding windowed aggregations. - * - * @param windows the specification of the aggregation `SlidingWindows` - * @return an instance of [[TimeWindowedCogroupedKStream]] - * @see `org.apache.kafka.streams.kstream.CogroupedKStream#windowedBy` - */ - def windowedBy(windows: SlidingWindows): TimeWindowedCogroupedKStream[KIn, VOut] = - new TimeWindowedCogroupedKStream(inner.windowedBy(windows)) - - /** - * Create a new [[SessionWindowedKStream]] instance that can be used to perform session windowed aggregations. - * - * @param windows the specification of the aggregation `SessionWindows` - * @return an instance of [[SessionWindowedKStream]] - * @see `org.apache.kafka.streams.kstream.KGroupedStream#windowedBy` - */ - def windowedBy(windows: SessionWindows): SessionWindowedCogroupedKStream[KIn, VOut] = - new SessionWindowedCogroupedKStream(inner.windowedBy(windows)) - -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Consumed.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Consumed.scala deleted file mode 100644 index 89f461a8fe..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Consumed.scala +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.common.serialization.Serde -import org.apache.kafka.streams.kstream.{Consumed => ConsumedJ} -import org.apache.kafka.streams.{AutoOffsetReset, Topology} -import org.apache.kafka.streams.processor.TimestampExtractor - -object Consumed { - - /** - * Create an instance of [[Consumed]] with the supplied arguments. `null` values are acceptable. - * - * @tparam K key type - * @tparam V value type - * @param timestampExtractor the timestamp extractor to used. If `null` the default timestamp extractor from - * config will be used - * @param resetPolicy the offset reset policy to be used. If `null` the default reset policy from config - * will be used - * @param keySerde the key serde to use. - * @param valueSerde the value serde to use. - * @return a new instance of [[Consumed]] - */ - @deprecated("Use `with` method that accepts `AutoOffsetReset` instead", "4.0.0") - def `with`[K, V]( - timestampExtractor: TimestampExtractor, - resetPolicy: Topology.AutoOffsetReset - )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = - ConsumedJ.`with`(keySerde, valueSerde, timestampExtractor, resetPolicy) - - /** - * Create an instance of [[Consumed]] with the supplied arguments. `null` values are acceptable. - * - * @tparam K key type - * @tparam V value type - * @param timestampExtractor the timestamp extractor to used. If `null` the default timestamp extractor from - * config will be used - * @param resetPolicy the offset reset policy to be used. If `null` the default reset policy from config - * will be used - * @param keySerde the key serde to use. - * @param valueSerde the value serde to use. - * @return a new instance of [[Consumed]] - */ - def `with`[K, V]( - timestampExtractor: TimestampExtractor, - resetPolicy: AutoOffsetReset - )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = - ConsumedJ.`with`(keySerde, valueSerde, timestampExtractor, resetPolicy) - - /** - * Create an instance of [[Consumed]] with key and value Serdes. - * - * @tparam K key type - * @tparam V value type - * @return a new instance of [[Consumed]] - */ - def `with`[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = - ConsumedJ.`with`(keySerde, valueSerde) - - /** - * Create an instance of [[Consumed]] with a `org.apache.kafka.streams.processor.TimestampExtractor`. - * - * @param timestampExtractor the timestamp extractor to used. If `null` the default timestamp extractor from - * config will be used - * @tparam K key type - * @tparam V value type - * @return a new instance of [[Consumed]] - */ - def `with`[K, V]( - timestampExtractor: TimestampExtractor - )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = - ConsumedJ.`with`(timestampExtractor).withKeySerde(keySerde).withValueSerde(valueSerde) - - /** - * Create an instance of [[Consumed]] with a `org.apache.kafka.streams.Topology.AutoOffsetReset`. - * - * @tparam K key type - * @tparam V value type - * @param resetPolicy the offset reset policy to be used. If `null` the default reset policy from config will be used - * @return a new instance of [[Consumed]] - */ - @deprecated("Use `with` method that accepts `AutoOffsetReset` instead", "4.0.0") - def `with`[K, V]( - resetPolicy: Topology.AutoOffsetReset - )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = - ConsumedJ.`with`(resetPolicy).withKeySerde(keySerde).withValueSerde(valueSerde) - - /** - * Create an instance of [[Consumed]] with a `org.apache.kafka.streams.AutoOffsetReset`. - * - * @tparam K key type - * @tparam V value type - * @param resetPolicy the offset reset policy to be used. If `null` the default reset policy from config will be used - * @return a new instance of [[Consumed]] - */ - def `with`[K, V]( - resetPolicy: AutoOffsetReset - )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ConsumedJ[K, V] = - ConsumedJ.`with`(resetPolicy).withKeySerde(keySerde).withValueSerde(valueSerde) -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Grouped.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Grouped.scala deleted file mode 100644 index de1aa4e983..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Grouped.scala +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.common.serialization.Serde -import org.apache.kafka.streams.kstream.{Grouped => GroupedJ} - -object Grouped { - - /** - * Construct a `Grouped` instance with the provided key and value Serdes. - * If the Serde params are `null` the default serdes defined in the configs will be used. - * - * @tparam K the key type - * @tparam V the value type - * @param keySerde keySerde that will be used to materialize a stream - * @param valueSerde valueSerde that will be used to materialize a stream - * @return a new instance of [[Grouped]] configured with the provided serdes - */ - def `with`[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): GroupedJ[K, V] = - GroupedJ.`with`(keySerde, valueSerde) - - /** - * Construct a `Grouped` instance with the provided key and value Serdes. - * If the Serde params are `null` the default serdes defined in the configs will be used. - * - * @tparam K the key type - * @tparam V the value type - * @param name the name used as part of a potential repartition topic - * @param keySerde keySerde that will be used to materialize a stream - * @param valueSerde valueSerde that will be used to materialize a stream - * @return a new instance of [[Grouped]] configured with the provided serdes - */ - def `with`[K, V](name: String)(implicit keySerde: Serde[K], valueSerde: Serde[V]): GroupedJ[K, V] = - GroupedJ.`with`(name, keySerde, valueSerde) - -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Joined.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Joined.scala deleted file mode 100644 index 6233ad15f4..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Joined.scala +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.common.serialization.Serde -import org.apache.kafka.streams.kstream.{Joined => JoinedJ} - -object Joined { - - /** - * Create an instance of `org.apache.kafka.streams.kstream.Joined` with key, value, and otherValue Serde - * instances. - * `null` values are accepted and will be replaced by the default serdes as defined in config. - * - * @tparam K key type - * @tparam V value type - * @tparam VO other value type - * @param keySerde the key serde to use. - * @param valueSerde the value serde to use. - * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used - * @return new `org.apache.kafka.streams.kstream.Joined` instance with the provided serdes - */ - def `with`[K, V, VO](implicit - keySerde: Serde[K], - valueSerde: Serde[V], - otherValueSerde: Serde[VO] - ): JoinedJ[K, V, VO] = - JoinedJ.`with`(keySerde, valueSerde, otherValueSerde) - - /** - * Create an instance of `org.apache.kafka.streams.kstream.Joined` with key, value, and otherValue Serde - * instances. - * `null` values are accepted and will be replaced by the default serdes as defined in config. - * - * @tparam K key type - * @tparam V value type - * @tparam VO other value type - * @param name name of possible repartition topic - * @param keySerde the key serde to use. - * @param valueSerde the value serde to use. - * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used - * @return new `org.apache.kafka.streams.kstream.Joined` instance with the provided serdes - */ - // disable spotless scala, which wants to make a mess of the argument lists - // format: off - def `with`[K, V, VO](name: String) - (implicit keySerde: Serde[K], - valueSerde: Serde[V], - otherValueSerde: Serde[VO]): JoinedJ[K, V, VO] = - JoinedJ.`with`(keySerde, valueSerde, otherValueSerde, name) - // format:on -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedStream.scala deleted file mode 100644 index 01e7c1c520..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedStream.scala +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala -package kstream - -import org.apache.kafka.streams.kstream.internals.KTableImpl -import org.apache.kafka.streams.scala.serialization.Serdes -import org.apache.kafka.streams.kstream.{ - KGroupedStream => KGroupedStreamJ, - KTable => KTableJ, - SessionWindows, - SlidingWindows, - Window, - Windows -} -import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ - AggregatorFromFunction, - InitializerFromFunction, - ReducerFromFunction, - ValueMapperFromFunction -} - -/** - * Wraps the Java class KGroupedStream and delegates method calls to the underlying Java object. - * - * @tparam K Type of keys - * @tparam V Type of values - * @param inner The underlying Java abstraction for KGroupedStream - * @see `org.apache.kafka.streams.kstream.KGroupedStream` - */ -class KGroupedStream[K, V](val inner: KGroupedStreamJ[K, V]) { - - /** - * Count the number of records in this stream by the grouped key. - * The result is written into a local `KeyValueStore` (which is basically an ever-updating materialized view) - * provided by the given `materialized`. - * - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that - * represent the latest (rolling) count (i.e., number of records) for each key - * @see `org.apache.kafka.streams.kstream.KGroupedStream#count` - */ - def count()(implicit materialized: Materialized[K, Long, ByteArrayKeyValueStore]): KTable[K, Long] = { - val javaCountTable: KTableJ[K, java.lang.Long] = - inner.count(materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayKeyValueStore]]) - val tableImpl = javaCountTable.asInstanceOf[KTableImpl[K, ByteArrayKeyValueStore, java.lang.Long]] - new KTable( - javaCountTable.mapValues[Long]( - ((l: java.lang.Long) => Long2long(l)).asValueMapper, - Materialized.`with`[K, Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) - ) - ) - } - - /** - * Count the number of records in this stream by the grouped key. - * The result is written into a local `KeyValueStore` (which is basically an ever-updating materialized view) - * provided by the given `materialized`. - * - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that - * represent the latest (rolling) count (i.e., number of records) for each key - * @see `org.apache.kafka.streams.kstream.KGroupedStream#count` - */ - def count(named: Named)(implicit materialized: Materialized[K, Long, ByteArrayKeyValueStore]): KTable[K, Long] = { - val javaCountTable: KTableJ[K, java.lang.Long] = - inner.count(named, materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayKeyValueStore]]) - val tableImpl = javaCountTable.asInstanceOf[KTableImpl[K, ByteArrayKeyValueStore, java.lang.Long]] - new KTable( - javaCountTable.mapValues[Long]( - ((l: java.lang.Long) => Long2long(l)).asValueMapper, - Materialized.`with`[K, Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) - ) - ) - } - - /** - * Combine the values of records in this stream by the grouped key. - * - * @param reducer a function `(V, V) => V` that computes a new aggregate result. - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the - * latest (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.KGroupedStream#reduce` - */ - def reduce(reducer: (V, V) => V)(implicit materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = - new KTable(inner.reduce(reducer.asReducer, materialized)) - - /** - * Combine the values of records in this stream by the grouped key. - * - * @param reducer a function `(V, V) => V` that computes a new aggregate result. - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the - * latest (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.KGroupedStream#reduce` - */ - def reduce(reducer: (V, V) => V, named: Named)(implicit - materialized: Materialized[K, V, ByteArrayKeyValueStore] - ): KTable[K, V] = - new KTable(inner.reduce(reducer.asReducer, materialized)) - - /** - * Aggregate the values of records in this stream by the grouped key. - * - * @param initializer an `Initializer` that computes an initial intermediate aggregation result - * @param aggregator an `Aggregator` that computes a new aggregate result - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the - * latest (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.KGroupedStream#aggregate` - */ - def aggregate[VR](initializer: => VR)(aggregator: (K, V, VR) => VR)(implicit - materialized: Materialized[K, VR, ByteArrayKeyValueStore] - ): KTable[K, VR] = - new KTable(inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, materialized)) - - /** - * Aggregate the values of records in this stream by the grouped key. - * - * @param initializer an `Initializer` that computes an initial intermediate aggregation result - * @param aggregator an `Aggregator` that computes a new aggregate result - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the - * latest (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.KGroupedStream#aggregate` - */ - def aggregate[VR](initializer: => VR, named: Named)(aggregator: (K, V, VR) => VR)(implicit - materialized: Materialized[K, VR, ByteArrayKeyValueStore] - ): KTable[K, VR] = - new KTable(inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, named, materialized)) - - /** - * Create a new [[TimeWindowedKStream]] instance that can be used to perform windowed aggregations. - * - * @param windows the specification of the aggregation `Windows` - * @return an instance of [[TimeWindowedKStream]] - * @see `org.apache.kafka.streams.kstream.KGroupedStream#windowedBy` - */ - def windowedBy[W <: Window](windows: Windows[W]): TimeWindowedKStream[K, V] = - new TimeWindowedKStream(inner.windowedBy(windows)) - - /** - * Create a new [[TimeWindowedKStream]] instance that can be used to perform sliding windowed aggregations. - * - * @param windows the specification of the aggregation `SlidingWindows` - * @return an instance of [[TimeWindowedKStream]] - * @see `org.apache.kafka.streams.kstream.KGroupedStream#windowedBy` - */ - def windowedBy(windows: SlidingWindows): TimeWindowedKStream[K, V] = - new TimeWindowedKStream(inner.windowedBy(windows)) - - /** - * Create a new [[SessionWindowedKStream]] instance that can be used to perform session windowed aggregations. - * - * @param windows the specification of the aggregation `SessionWindows` - * @return an instance of [[SessionWindowedKStream]] - * @see `org.apache.kafka.streams.kstream.KGroupedStream#windowedBy` - */ - def windowedBy(windows: SessionWindows): SessionWindowedKStream[K, V] = - new SessionWindowedKStream(inner.windowedBy(windows)) - - /** - * Create a new [[CogroupedKStream]] from this grouped KStream to allow cogrouping other [[KGroupedStream]] to it. - * - * @param aggregator an `Aggregator` that computes a new aggregate result - * @return an instance of [[CogroupedKStream]] - * @see `org.apache.kafka.streams.kstream.KGroupedStream#cogroup` - */ - def cogroup[VR](aggregator: (K, V, VR) => VR): CogroupedKStream[K, VR] = - new CogroupedKStream(inner.cogroup(aggregator.asAggregator)) - -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedTable.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedTable.scala deleted file mode 100644 index 3d9e052a2f..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KGroupedTable.scala +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala -package kstream - -import org.apache.kafka.streams.kstream.{KGroupedTable => KGroupedTableJ} -import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ - AggregatorFromFunction, - InitializerFromFunction, - ReducerFromFunction -} - -/** - * Wraps the Java class KGroupedTable and delegates method calls to the underlying Java object. - * - * @tparam K Type of keys - * @tparam V Type of values - * @param inner The underlying Java abstraction for KGroupedTable - * @see `org.apache.kafka.streams.kstream.KGroupedTable` - */ -class KGroupedTable[K, V](inner: KGroupedTableJ[K, V]) { - - /** - * Count number of records of the original [[KTable]] that got [[KTable#groupBy]] to - * the same key into a new instance of [[KTable]]. - * - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that - * represent the latest (rolling) count (i.e., number of records) for each key - * @see `org.apache.kafka.streams.kstream.KGroupedTable#count` - */ - def count()(implicit materialized: Materialized[K, Long, ByteArrayKeyValueStore]): KTable[K, Long] = { - val c: KTable[K, java.lang.Long] = - new KTable(inner.count(materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayKeyValueStore]])) - c.mapValues[Long](Long2long _) - } - - /** - * Count number of records of the original [[KTable]] that got [[KTable#groupBy]] to - * the same key into a new instance of [[KTable]]. - * - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that - * represent the latest (rolling) count (i.e., number of records) for each key - * @see `org.apache.kafka.streams.kstream.KGroupedTable#count` - */ - def count(named: Named)(implicit materialized: Materialized[K, Long, ByteArrayKeyValueStore]): KTable[K, Long] = { - val c: KTable[K, java.lang.Long] = - new KTable(inner.count(named, materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayKeyValueStore]])) - c.mapValues[Long](Long2long _) - } - - /** - * Combine the value of records of the original [[KTable]] that got [[KTable#groupBy]] - * to the same key into a new instance of [[KTable]]. - * - * @param adder a function that adds a new value to the aggregate result - * @param subtractor a function that removed an old value from the aggregate result - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the - * latest (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.KGroupedTable#reduce` - */ - def reduce(adder: (V, V) => V, subtractor: (V, V) => V)(implicit - materialized: Materialized[K, V, ByteArrayKeyValueStore] - ): KTable[K, V] = - new KTable(inner.reduce(adder.asReducer, subtractor.asReducer, materialized)) - - /** - * Combine the value of records of the original [[KTable]] that got [[KTable#groupBy]] - * to the same key into a new instance of [[KTable]]. - * - * @param adder a function that adds a new value to the aggregate result - * @param subtractor a function that removed an old value from the aggregate result - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the - * latest (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.KGroupedTable#reduce` - */ - def reduce(adder: (V, V) => V, subtractor: (V, V) => V, named: Named)(implicit - materialized: Materialized[K, V, ByteArrayKeyValueStore] - ): KTable[K, V] = - new KTable(inner.reduce(adder.asReducer, subtractor.asReducer, named, materialized)) - - /** - * Aggregate the value of records of the original [[KTable]] that got [[KTable#groupBy]] - * to the same key into a new instance of [[KTable]] using default serializers and deserializers. - * - * @param initializer a function that provides an initial aggregate result value - * @param adder a function that adds a new record to the aggregate result - * @param subtractor an aggregator function that removed an old record from the aggregate result - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the - * latest (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.KGroupedTable#aggregate` - */ - def aggregate[VR](initializer: => VR)(adder: (K, V, VR) => VR, subtractor: (K, V, VR) => VR)(implicit - materialized: Materialized[K, VR, ByteArrayKeyValueStore] - ): KTable[K, VR] = - new KTable( - inner.aggregate((() => initializer).asInitializer, adder.asAggregator, subtractor.asAggregator, materialized) - ) - - /** - * Aggregate the value of records of the original [[KTable]] that got [[KTable#groupBy]] - * to the same key into a new instance of [[KTable]] using default serializers and deserializers. - * - * @param initializer a function that provides an initial aggregate result value - * @param named a [[Named]] config used to name the processor in the topology - * @param adder a function that adds a new record to the aggregate result - * @param subtractor an aggregator function that removed an old record from the aggregate result - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the - * latest (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.KGroupedTable#aggregate` - */ - def aggregate[VR](initializer: => VR, named: Named)(adder: (K, V, VR) => VR, subtractor: (K, V, VR) => VR)(implicit - materialized: Materialized[K, VR, ByteArrayKeyValueStore] - ): KTable[K, VR] = - new KTable( - inner.aggregate( - (() => initializer).asInitializer, - adder.asAggregator, - subtractor.asAggregator, - named, - materialized - ) - ) -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KStream.scala deleted file mode 100644 index 76918a6f74..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KStream.scala +++ /dev/null @@ -1,877 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala -package kstream - -import org.apache.kafka.streams.kstream.{GlobalKTable, JoinWindows, KStream => KStreamJ, Printed} -import org.apache.kafka.streams.processor.TopicNameExtractor -import org.apache.kafka.streams.processor.api.{FixedKeyProcessorSupplier, ProcessorSupplier} -import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ - FlatValueMapperFromFunction, - FlatValueMapperWithKeyFromFunction, - ForeachActionFromFunction, - KeyValueMapperFromFunction, - MapperFromFunction, - PredicateFromFunction, - ValueMapperFromFunction, - ValueMapperWithKeyFromFunction -} - -import scala.jdk.CollectionConverters._ - -/** - * Wraps the Java class `org.apache.kafka.streams.kstream.KStream` and delegates method calls to the - * underlying Java object. - * - * @tparam K Type of keys - * @tparam V Type of values - * @param inner The underlying Java abstraction for KStream - * @see `org.apache.kafka.streams.kstream.KStream` - */ -//noinspection ScalaDeprecation -class KStream[K, V](val inner: KStreamJ[K, V]) { - - /** - * Create a new [[KStream]] that consists all records of this stream which satisfies the given predicate. - * - * @param predicate a filter that is applied to each record - * @return a [[KStream]] that contains only those records that satisfy the given predicate - * @see `org.apache.kafka.streams.kstream.KStream#filter` - */ - def filter(predicate: (K, V) => Boolean): KStream[K, V] = - new KStream(inner.filter(predicate.asPredicate)) - - /** - * Create a new [[KStream]] that consists all records of this stream which satisfies the given predicate. - * - * @param predicate a filter that is applied to each record - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KStream]] that contains only those records that satisfy the given predicate - * @see `org.apache.kafka.streams.kstream.KStream#filter` - */ - def filter(predicate: (K, V) => Boolean, named: Named): KStream[K, V] = - new KStream(inner.filter(predicate.asPredicate, named)) - - /** - * Create a new [[KStream]] that consists all records of this stream which do not satisfy the given - * predicate. - * - * @param predicate a filter that is applied to each record - * @return a [[KStream]] that contains only those records that do not satisfy the given predicate - * @see `org.apache.kafka.streams.kstream.KStream#filterNot` - */ - def filterNot(predicate: (K, V) => Boolean): KStream[K, V] = - new KStream(inner.filterNot(predicate.asPredicate)) - - /** - * Create a new [[KStream]] that consists all records of this stream which do not satisfy the given - * predicate. - * - * @param predicate a filter that is applied to each record - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KStream]] that contains only those records that do not satisfy the given predicate - * @see `org.apache.kafka.streams.kstream.KStream#filterNot` - */ - def filterNot(predicate: (K, V) => Boolean, named: Named): KStream[K, V] = - new KStream(inner.filterNot(predicate.asPredicate, named)) - - /** - * Set a new key (with possibly new type) for each input record. - *

- * The function `mapper` passed is applied to every record and results in the generation of a new - * key `KR`. The function outputs a new [[KStream]] where each record has this new key. - * - * @param mapper a function `(K, V) => KR` that computes a new key for each record - * @return a [[KStream]] that contains records with new key (possibly of different type) and unmodified value - * @see `org.apache.kafka.streams.kstream.KStream#selectKey` - */ - def selectKey[KR](mapper: (K, V) => KR): KStream[KR, V] = - new KStream(inner.selectKey[KR](mapper.asKeyValueMapper)) - - /** - * Set a new key (with possibly new type) for each input record. - *

- * The function `mapper` passed is applied to every record and results in the generation of a new - * key `KR`. The function outputs a new [[KStream]] where each record has this new key. - * - * @param mapper a function `(K, V) => KR` that computes a new key for each record - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KStream]] that contains records with new key (possibly of different type) and unmodified value - * @see `org.apache.kafka.streams.kstream.KStream#selectKey` - */ - def selectKey[KR](mapper: (K, V) => KR, named: Named): KStream[KR, V] = - new KStream(inner.selectKey[KR](mapper.asKeyValueMapper, named)) - - /** - * Transform each record of the input stream into a new record in the output stream (both key and value type can be - * altered arbitrarily). - *

- * The provided `mapper`, a function `(K, V) => (KR, VR)` is applied to each input record and computes a new output record. - * - * @param mapper a function `(K, V) => (KR, VR)` that computes a new output record - * @return a [[KStream]] that contains records with new key and value (possibly both of different type) - * @see `org.apache.kafka.streams.kstream.KStream#map` - */ - def map[KR, VR](mapper: (K, V) => (KR, VR)): KStream[KR, VR] = - new KStream(inner.map[KR, VR](mapper.asKeyValueMapper)) - - /** - * Transform each record of the input stream into a new record in the output stream (both key and value type can be - * altered arbitrarily). - *

- * The provided `mapper`, a function `(K, V) => (KR, VR)` is applied to each input record and computes a new output record. - * - * @param mapper a function `(K, V) => (KR, VR)` that computes a new output record - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KStream]] that contains records with new key and value (possibly both of different type) - * @see `org.apache.kafka.streams.kstream.KStream#map` - */ - def map[KR, VR](mapper: (K, V) => (KR, VR), named: Named): KStream[KR, VR] = - new KStream(inner.map[KR, VR](mapper.asKeyValueMapper, named)) - - /** - * Transform the value of each input record into a new value (with possible new type) of the output record. - *

- * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it - * - * @param mapper , a function `V => VR` that computes a new output value - * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KStream#mapValues` - */ - def mapValues[VR](mapper: V => VR): KStream[K, VR] = - new KStream(inner.mapValues[VR](mapper.asValueMapper)) - - /** - * Transform the value of each input record into a new value (with possible new type) of the output record. - *

- * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it - * - * @param mapper , a function `V => VR` that computes a new output value - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KStream#mapValues` - */ - def mapValues[VR](mapper: V => VR, named: Named): KStream[K, VR] = - new KStream(inner.mapValues[VR](mapper.asValueMapper, named)) - - /** - * Transform the value of each input record into a new value (with possible new type) of the output record. - *

- * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it - * - * @param mapper , a function `(K, V) => VR` that computes a new output value - * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KStream#mapValues` - */ - def mapValues[VR](mapper: (K, V) => VR): KStream[K, VR] = - new KStream(inner.mapValues[VR](mapper.asValueMapperWithKey)) - - /** - * Transform the value of each input record into a new value (with possible new type) of the output record. - *

- * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it - * - * @param mapper , a function `(K, V) => VR` that computes a new output value - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KStream#mapValues` - */ - def mapValues[VR](mapper: (K, V) => VR, named: Named): KStream[K, VR] = - new KStream(inner.mapValues[VR](mapper.asValueMapperWithKey, named)) - - /** - * Transform each record of the input stream into zero or more records in the output stream (both key and value type - * can be altered arbitrarily). - *

- * The provided `mapper`, function `(K, V) => Iterable[(KR, VR)]` is applied to each input record and computes zero or more output records. - * - * @param mapper function `(K, V) => Iterable[(KR, VR)]` that computes the new output records - * @return a [[KStream]] that contains more or less records with new key and value (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KStream#flatMap` - */ - def flatMap[KR, VR](mapper: (K, V) => Iterable[(KR, VR)]): KStream[KR, VR] = { - val kvMapper = mapper.tupled.andThen(_.map(ImplicitConversions.tuple2ToKeyValue).asJava) - new KStream(inner.flatMap[KR, VR](((k: K, v: V) => kvMapper(k, v)).asKeyValueMapper)) - } - - /** - * Transform each record of the input stream into zero or more records in the output stream (both key and value type - * can be altered arbitrarily). - *

- * The provided `mapper`, function `(K, V) => Iterable[(KR, VR)]` is applied to each input record and computes zero or more output records. - * - * @param mapper function `(K, V) => Iterable[(KR, VR)]` that computes the new output records - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KStream]] that contains more or less records with new key and value (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KStream#flatMap` - */ - def flatMap[KR, VR](mapper: (K, V) => Iterable[(KR, VR)], named: Named): KStream[KR, VR] = { - val kvMapper = mapper.tupled.andThen(_.map(ImplicitConversions.tuple2ToKeyValue).asJava) - new KStream(inner.flatMap[KR, VR](((k: K, v: V) => kvMapper(k, v)).asKeyValueMapper, named)) - } - - /** - * Create a new [[KStream]] by transforming the value of each record in this stream into zero or more values - * with the same key in the new stream. - *

- * Transform the value of each input record into zero or more records with the same (unmodified) key in the output - * stream (value type can be altered arbitrarily). - * The provided `mapper`, a function `V => Iterable[VR]` is applied to each input record and computes zero or more output values. - * - * @param mapper a function `V => Iterable[VR]` that computes the new output values - * @return a [[KStream]] that contains more or less records with unmodified keys and new values of different type - * @see `org.apache.kafka.streams.kstream.KStream#flatMapValues` - */ - def flatMapValues[VR](mapper: V => Iterable[VR]): KStream[K, VR] = - new KStream(inner.flatMapValues[VR](mapper.asValueMapper)) - - /** - * Create a new [[KStream]] by transforming the value of each record in this stream into zero or more values - * with the same key in the new stream. - *

- * Transform the value of each input record into zero or more records with the same (unmodified) key in the output - * stream (value type can be altered arbitrarily). - * The provided `mapper`, a function `V => Iterable[VR]` is applied to each input record and computes zero or more output values. - * - * @param mapper a function `V => Iterable[VR]` that computes the new output values - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KStream]] that contains more or less records with unmodified keys and new values of different type - * @see `org.apache.kafka.streams.kstream.KStream#flatMapValues` - */ - def flatMapValues[VR](mapper: V => Iterable[VR], named: Named): KStream[K, VR] = - new KStream(inner.flatMapValues[VR](mapper.asValueMapper, named)) - - /** - * Create a new [[KStream]] by transforming the value of each record in this stream into zero or more values - * with the same key in the new stream. - *

- * Transform the value of each input record into zero or more records with the same (unmodified) key in the output - * stream (value type can be altered arbitrarily). - * The provided `mapper`, a function `(K, V) => Iterable[VR]` is applied to each input record and computes zero or more output values. - * - * @param mapper a function `(K, V) => Iterable[VR]` that computes the new output values - * @return a [[KStream]] that contains more or less records with unmodified keys and new values of different type - * @see `org.apache.kafka.streams.kstream.KStream#flatMapValues` - */ - def flatMapValues[VR](mapper: (K, V) => Iterable[VR]): KStream[K, VR] = - new KStream(inner.flatMapValues[VR](mapper.asValueMapperWithKey)) - - /** - * Create a new [[KStream]] by transforming the value of each record in this stream into zero or more values - * with the same key in the new stream. - *

- * Transform the value of each input record into zero or more records with the same (unmodified) key in the output - * stream (value type can be altered arbitrarily). - * The provided `mapper`, a function `(K, V) => Iterable[VR]` is applied to each input record and computes zero or more output values. - * - * @param mapper a function `(K, V) => Iterable[VR]` that computes the new output values - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KStream]] that contains more or less records with unmodified keys and new values of different type - * @see `org.apache.kafka.streams.kstream.KStream#flatMapValues` - */ - def flatMapValues[VR](mapper: (K, V) => Iterable[VR], named: Named): KStream[K, VR] = - new KStream(inner.flatMapValues[VR](mapper.asValueMapperWithKey, named)) - - /** - * Print the records of this KStream using the options provided by `Printed` - * - * @param printed options for printing - * @see `org.apache.kafka.streams.kstream.KStream#print` - */ - def print(printed: Printed[K, V]): Unit = inner.print(printed) - - /** - * Perform an action on each record of `KStream` - * - * @param action an action to perform on each record - * @see `org.apache.kafka.streams.kstream.KStream#foreach` - */ - def foreach(action: (K, V) => Unit): Unit = - inner.foreach(action.asForeachAction) - - /** - * Perform an action on each record of `KStream` - * - * @param action an action to perform on each record - * @param named a [[Named]] config used to name the processor in the topology - * @see `org.apache.kafka.streams.kstream.KStream#foreach` - */ - def foreach(action: (K, V) => Unit, named: Named): Unit = - inner.foreach(action.asForeachAction, named) - - /** - * Split this stream. [[BranchedKStream]] can be used for routing the records to different branches depending - * on evaluation against the supplied predicates. - * Stream branching is a stateless record-by-record operation. - * - * @return [[BranchedKStream]] that provides methods for routing the records to different branches. - * @see `org.apache.kafka.streams.kstream.KStream#split` - */ - def split(): BranchedKStream[K, V] = - new BranchedKStream(inner.split()) - - /** - * Split this stream. [[BranchedKStream]] can be used for routing the records to different branches depending - * on evaluation against the supplied predicates. - * Stream branching is a stateless record-by-record operation. - * - * @param named a [[Named]] config used to name the processor in the topology and also to set the name prefix - * for the resulting branches (see [[BranchedKStream]]) - * @return [[BranchedKStream]] that provides methods for routing the records to different branches. - * @see `org.apache.kafka.streams.kstream.KStream#split` - */ - def split(named: Named): BranchedKStream[K, V] = - new BranchedKStream(inner.split(named)) - - /** - * Materialize this stream to a topic and creates a new [[KStream]] from the topic using the `Repartitioned` instance - * for configuration of the `Serde key serde`, `Serde value serde`, `StreamPartitioner`, number of partitions, and - * topic name part. - *

- * The created topic is considered as an internal topic and is meant to be used only by the current Kafka Streams instance. - * Similar to auto-repartitioning, the topic will be created with infinite retention time and data will be automatically purged by Kafka Streams. - * The topic will be named as "\${applicationId}-<name>-repartition", where "applicationId" is user-specified in - * `StreamsConfig` via parameter `APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG`, - * "<name>" is either provided via `Repartitioned#as(String)` or an internally - * generated name, and "-repartition" is a fixed suffix. - *

- * The user can either supply the `Repartitioned` instance as an implicit in scope or they can also provide implicit - * key and value serdes that will be converted to a `Repartitioned` instance implicitly. - *

- * {{{ - * Example: - * - * // brings implicit serdes in scope - * import Serdes._ - * - * //.. - * val clicksPerRegion: KStream[String, Long] = //.. - * - * // Implicit serdes in scope will generate an implicit Produced instance, which - * // will be passed automatically to the call of through below - * clicksPerRegion.repartition - * - * // Similarly you can create an implicit Repartitioned and it will be passed implicitly - * // to the repartition call - * }}} - * - * @param repartitioned the `Repartitioned` instance used to specify `Serdes`, `StreamPartitioner` which determines - * how records are distributed among partitions of the topic, - * part of the topic name, and number of partitions for a repartition topic. - * @return a [[KStream]] that contains the exact same repartitioned records as this [[KStream]] - * @see `org.apache.kafka.streams.kstream.KStream#repartition` - */ - def repartition(implicit repartitioned: Repartitioned[K, V]): KStream[K, V] = - new KStream(inner.repartition(repartitioned)) - - /** - * Materialize this stream to a topic using the `Produced` instance for - * configuration of the `Serde key serde`, `Serde value serde`, and `StreamPartitioner` - *

- * The user can either supply the `Produced` instance as an implicit in scope or they can also provide implicit - * key and value serdes that will be converted to a `Produced` instance implicitly. - *

- * {{{ - * Example: - * - * // brings implicit serdes in scope - * import Serdes._ - * - * //.. - * val clicksPerRegion: KTable[String, Long] = //.. - * - * // Implicit serdes in scope will generate an implicit Produced instance, which - * // will be passed automatically to the call of through below - * clicksPerRegion.to(topic) - * - * // Similarly you can create an implicit Produced and it will be passed implicitly - * // to the through call - * }}} - * - * @param topic the topic name - * @param produced the instance of Produced that gives the serdes and `StreamPartitioner` - * @see `org.apache.kafka.streams.kstream.KStream#to` - */ - def to(topic: String)(implicit produced: Produced[K, V]): Unit = - inner.to(topic, produced) - - /** - * Dynamically materialize this stream to topics using the `Produced` instance for - * configuration of the `Serde key serde`, `Serde value serde`, and `StreamPartitioner`. - * The topic names for each record to send to is dynamically determined based on the given mapper. - *

- * The user can either supply the `Produced` instance as an implicit in scope or they can also provide implicit - * key and value serdes that will be converted to a `Produced` instance implicitly. - *

- * {{{ - * Example: - * - * // brings implicit serdes in scope - * import Serdes._ - * - * //.. - * val clicksPerRegion: KTable[String, Long] = //.. - * - * // Implicit serdes in scope will generate an implicit Produced instance, which - * // will be passed automatically to the call of through below - * clicksPerRegion.to(topicChooser) - * - * // Similarly you can create an implicit Produced and it will be passed implicitly - * // to the through call - * }}} - * - * @param extractor the extractor to determine the name of the Kafka topic to write to for reach record - * @param produced the instance of Produced that gives the serdes and `StreamPartitioner` - * @see `org.apache.kafka.streams.kstream.KStream#to` - */ - def to(extractor: TopicNameExtractor[K, V])(implicit produced: Produced[K, V]): Unit = - inner.to(extractor, produced) - - /** - * Convert this stream to a [[KTable]]. - * - * @return a [[KTable]] that contains the same records as this [[KStream]] - * @see `org.apache.kafka.streams.kstream.KStream#toTable` - */ - def toTable: KTable[K, V] = - new KTable(inner.toTable) - - /** - * Convert this stream to a [[KTable]]. - * - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KTable]] that contains the same records as this [[KStream]] - * @see `org.apache.kafka.streams.kstream.KStream#toTable` - */ - def toTable(named: Named): KTable[K, V] = - new KTable(inner.toTable(named)) - - /** - * Convert this stream to a [[KTable]]. - * - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains the same records as this [[KStream]] - * @see `org.apache.kafka.streams.kstream.KStream#toTable` - */ - def toTable(materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = - new KTable(inner.toTable(materialized)) - - /** - * Convert this stream to a [[KTable]]. - * - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains the same records as this [[KStream]] - * @see `org.apache.kafka.streams.kstream.KStream#toTable` - */ - def toTable(named: Named, materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = - new KTable(inner.toTable(named, materialized)) - - /** - * Process all records in this stream, one record at a time, by applying a `Processor` (provided by the given - * `processorSupplier`). - * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected - * to the `Processor`. - * It's not required to connect global state stores that are added via `addGlobalStore`; - * read-only access to global state stores is available by default. - * - * Note that this overload takes a ProcessorSupplier instead of a Function to avoid post-erasure ambiguity with - * the older (deprecated) overload. - * - * @param processorSupplier a supplier for `org.apache.kafka.streams.processor.api.Processor` - * @param stateStoreNames the names of the state store used by the processor - * @see `org.apache.kafka.streams.kstream.KStream#process` - */ - def process[KR, VR](processorSupplier: ProcessorSupplier[K, V, KR, VR], stateStoreNames: String*): KStream[KR, VR] = - new KStream(inner.process(processorSupplier, stateStoreNames: _*)) - - /** - * Process all records in this stream, one record at a time, by applying a `Processor` (provided by the given - * `processorSupplier`). - * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected - * to the `Processor`. - * It's not required to connect global state stores that are added via `addGlobalStore`; - * read-only access to global state stores is available by default. - * - * Note that this overload takes a ProcessorSupplier instead of a Function to avoid post-erasure ambiguity with - * the older (deprecated) overload. - * - * @param processorSupplier a supplier for `org.apache.kafka.streams.processor.api.Processor` - * @param named a [[Named]] config used to name the processor in the topology - * @param stateStoreNames the names of the state store used by the processor - * @see `org.apache.kafka.streams.kstream.KStream#process` - */ - def process[KR, VR]( - processorSupplier: ProcessorSupplier[K, V, KR, VR], - named: Named, - stateStoreNames: String* - ): KStream[KR, VR] = - new KStream(inner.process(processorSupplier, named, stateStoreNames: _*)) - - /** - * Process all records in this stream, one record at a time, by applying a `FixedKeyProcessor` (provided by the given - * `processorSupplier`). - * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected - * to the `FixedKeyProcessor`. - * It's not required to connect global state stores that are added via `addGlobalStore`; - * read-only access to global state stores is available by default. - * - * Note that this overload takes a FixedKeyProcessorSupplier instead of a Function to avoid post-erasure ambiguity with - * the older (deprecated) overload. - * - * @param processorSupplier a supplier for `org.apache.kafka.streams.processor.api.FixedKeyProcessor` - * @param stateStoreNames the names of the state store used by the processor - * @see `org.apache.kafka.streams.kstream.KStream#process` - */ - def processValues[VR]( - processorSupplier: FixedKeyProcessorSupplier[K, V, VR], - stateStoreNames: String* - ): KStream[K, VR] = - new KStream(inner.processValues(processorSupplier, stateStoreNames: _*)) - - /** - * Process all records in this stream, one record at a time, by applying a `FixedKeyProcessor` (provided by the given - * `processorSupplier`). - * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected - * to the `FixedKeyProcessor`. - * It's not required to connect global state stores that are added via `addGlobalStore`; - * read-only access to global state stores is available by default. - * - * Note that this overload takes a ProcessorSupplier instead of a Function to avoid post-erasure ambiguity with - * the older (deprecated) overload. - * - * @param processorSupplier a supplier for `org.apache.kafka.streams.processor.api.FixedKeyProcessor` - * @param named a [[Named]] config used to name the processor in the topology - * @param stateStoreNames the names of the state store used by the processor - * @see `org.apache.kafka.streams.kstream.KStream#process` - */ - def processValues[VR]( - processorSupplier: FixedKeyProcessorSupplier[K, V, VR], - named: Named, - stateStoreNames: String* - ): KStream[K, VR] = - new KStream(inner.processValues(processorSupplier, named, stateStoreNames: _*)) - - /** - * Group the records by their current key into a [[KGroupedStream]] - *

- * The user can either supply the `Grouped` instance as an implicit in scope or they can also provide an implicit - * serdes that will be converted to a `Grouped` instance implicitly. - *

- * {{{ - * Example: - * - * // brings implicit serdes in scope - * import Serdes._ - * - * val clicksPerRegion: KTable[String, Long] = - * userClicksStream - * .leftJoin(userRegionsTable, (clicks: Long, region: String) => (if (region == null) "UNKNOWN" else region, clicks)) - * .map((_, regionWithClicks) => regionWithClicks) - * - * // the groupByKey gets the Grouped instance through an implicit conversion of the - * // serdes brought into scope through the import Serdes._ above - * .groupByKey - * .reduce(_ + _) - * - * // Similarly you can create an implicit Grouped and it will be passed implicitly - * // to the groupByKey call - * }}} - * - * @param grouped the instance of Grouped that gives the serdes - * @return a [[KGroupedStream]] that contains the grouped records of the original [[KStream]] - * @see `org.apache.kafka.streams.kstream.KStream#groupByKey` - */ - def groupByKey(implicit grouped: Grouped[K, V]): KGroupedStream[K, V] = - new KGroupedStream(inner.groupByKey(grouped)) - - /** - * Group the records of this [[KStream]] on a new key that is selected using the provided key transformation function - * and the `Grouped` instance. - *

- * The user can either supply the `Grouped` instance as an implicit in scope or they can also provide an implicit - * serdes that will be converted to a `Grouped` instance implicitly. - *

- * {{{ - * Example: - * - * // brings implicit serdes in scope - * import Serdes._ - * - * val textLines = streamBuilder.stream[String, String](inputTopic) - * - * val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) - * - * val wordCounts: KTable[String, Long] = - * textLines.flatMapValues(v => pattern.split(v.toLowerCase)) - * - * // the groupBy gets the Grouped instance through an implicit conversion of the - * // serdes brought into scope through the import Serdes._ above - * .groupBy((k, v) => v) - * - * .count() - * }}} - * - * @param selector a function that computes a new key for grouping - * @return a [[KGroupedStream]] that contains the grouped records of the original [[KStream]] - * @see `org.apache.kafka.streams.kstream.KStream#groupBy` - */ - def groupBy[KR](selector: (K, V) => KR)(implicit grouped: Grouped[KR, V]): KGroupedStream[KR, V] = - new KGroupedStream(inner.groupBy(selector.asKeyValueMapper, grouped)) - - /** - * Join records of this stream with another [[KStream]]'s records using windowed inner equi join with - * serializers and deserializers supplied by the implicit `StreamJoined` instance. - * - * @param otherStream the [[KStream]] to be joined with this stream - * @param joiner a function that computes the join result for a pair of matching records - * @param windows the specification of the `JoinWindows` - * @param streamJoin an implicit `StreamJoin` instance that defines the serdes to be used to serialize/deserialize - * inputs and outputs of the joined streams. Instead of `StreamJoin`, the user can also supply - * key serde, value serde and other value serde in implicit scope and they will be - * converted to the instance of `Stream` through implicit conversion. The `StreamJoin` instance can - * also name the repartition topic (if required), the state stores for the join, and the join - * processor node. - * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, - * one for each matched record-pair with the same key and within the joining window intervals - * @see `org.apache.kafka.streams.kstream.KStream#join` - */ - def join[VO, VR](otherStream: KStream[K, VO])( - joiner: (V, VO) => VR, - windows: JoinWindows - )(implicit streamJoin: StreamJoined[K, V, VO]): KStream[K, VR] = - new KStream(inner.join[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, streamJoin)) - - /** - * Join records of this stream with another [[KStream]]'s records using windowed left equi join with - * serializers and deserializers supplied by the implicit `StreamJoined` instance. - * - * @param otherStream the [[KStream]] to be joined with this stream - * @param joiner a function that computes the join result for a pair of matching records - * @param windows the specification of the `JoinWindows` - * @param streamJoin an implicit `StreamJoin` instance that defines the serdes to be used to serialize/deserialize - * inputs and outputs of the joined streams. Instead of `StreamJoin`, the user can also supply - * key serde, value serde and other value serde in implicit scope and they will be - * converted to the instance of `Stream` through implicit conversion. The `StreamJoin` instance can - * also name the repartition topic (if required), the state stores for the join, and the join - * processor node. - * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, - * one for each matched record-pair with the same key and within the joining window intervals - * @see `org.apache.kafka.streams.kstream.KStream#leftJoin` - */ - def leftJoin[VO, VR](otherStream: KStream[K, VO])( - joiner: (V, VO) => VR, - windows: JoinWindows - )(implicit streamJoin: StreamJoined[K, V, VO]): KStream[K, VR] = - new KStream(inner.leftJoin[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, streamJoin)) - - /** - * Join records of this stream with another [[KStream]]'s records using windowed outer equi join with - * serializers and deserializers supplied by the implicit `Joined` instance. - * - * @param otherStream the [[KStream]] to be joined with this stream - * @param joiner a function that computes the join result for a pair of matching records - * @param windows the specification of the `JoinWindows` - * @param streamJoin an implicit `StreamJoin` instance that defines the serdes to be used to serialize/deserialize - * inputs and outputs of the joined streams. Instead of `StreamJoin`, the user can also supply - * key serde, value serde and other value serde in implicit scope and they will be - * converted to the instance of `Stream` through implicit conversion. The `StreamJoin` instance can - * also name the repartition topic (if required), the state stores for the join, and the join - * processor node. - * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, - * one for each matched record-pair with the same key and within the joining window intervals - * @see `org.apache.kafka.streams.kstream.KStream#outerJoin` - */ - def outerJoin[VO, VR](otherStream: KStream[K, VO])( - joiner: (V, VO) => VR, - windows: JoinWindows - )(implicit streamJoin: StreamJoined[K, V, VO]): KStream[K, VR] = - new KStream(inner.outerJoin[VO, VR](otherStream.inner, joiner.asValueJoiner, windows, streamJoin)) - - /** - * Join records of this stream with another [[KTable]]'s records using inner equi join with - * serializers and deserializers supplied by the implicit `Joined` instance. - * - * @param table the [[KTable]] to be joined with this stream - * @param joiner a function that computes the join result for a pair of matching records - * @param joined an implicit `Joined` instance that defines the serdes to be used to serialize/deserialize - * inputs and outputs of the joined streams. Instead of `Joined`, the user can also supply - * key serde, value serde and other value serde in implicit scope and they will be - * converted to the instance of `Joined` through implicit conversion - * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, - * one for each matched record-pair with the same key - * @see `org.apache.kafka.streams.kstream.KStream#join` - */ - def join[VT, VR](table: KTable[K, VT])(joiner: (V, VT) => VR)(implicit joined: Joined[K, V, VT]): KStream[K, VR] = - new KStream(inner.join[VT, VR](table.inner, joiner.asValueJoiner, joined)) - - /** - * Join records of this stream with another [[KTable]]'s records using left equi join with - * serializers and deserializers supplied by the implicit `Joined` instance. - * - * @param table the [[KTable]] to be joined with this stream - * @param joiner a function that computes the join result for a pair of matching records - * @param joined an implicit `Joined` instance that defines the serdes to be used to serialize/deserialize - * inputs and outputs of the joined streams. Instead of `Joined`, the user can also supply - * key serde, value serde and other value serde in implicit scope and they will be - * converted to the instance of `Joined` through implicit conversion - * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, - * one for each matched record-pair with the same key - * @see `org.apache.kafka.streams.kstream.KStream#leftJoin` - */ - def leftJoin[VT, VR](table: KTable[K, VT])(joiner: (V, VT) => VR)(implicit joined: Joined[K, V, VT]): KStream[K, VR] = - new KStream(inner.leftJoin[VT, VR](table.inner, joiner.asValueJoiner, joined)) - - /** - * Join records of this stream with `GlobalKTable`'s records using non-windowed inner equi join. - * - * @param globalKTable the `GlobalKTable` to be joined with this stream - * @param keyValueMapper a function used to map from the (key, value) of this stream - * to the key of the `GlobalKTable` - * @param joiner a function that computes the join result for a pair of matching records - * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, - * one output for each input [[KStream]] record - * @see `org.apache.kafka.streams.kstream.KStream#join` - */ - def join[GK, GV, RV](globalKTable: GlobalKTable[GK, GV])( - keyValueMapper: (K, V) => GK, - joiner: (V, GV) => RV - ): KStream[K, RV] = - new KStream( - inner.join[GK, GV, RV]( - globalKTable, - ((k: K, v: V) => keyValueMapper(k, v)).asKeyValueMapper, - ((v: V, gv: GV) => joiner(v, gv)).asValueJoiner - ) - ) - - /** - * Join records of this stream with `GlobalKTable`'s records using non-windowed inner equi join. - * - * @param globalKTable the `GlobalKTable` to be joined with this stream - * @param named a [[Named]] config used to name the processor in the topology - * @param keyValueMapper a function used to map from the (key, value) of this stream - * to the key of the `GlobalKTable` - * @param joiner a function that computes the join result for a pair of matching records - * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, - * one output for each input [[KStream]] record - * @see `org.apache.kafka.streams.kstream.KStream#join` - */ - def join[GK, GV, RV](globalKTable: GlobalKTable[GK, GV], named: Named)( - keyValueMapper: (K, V) => GK, - joiner: (V, GV) => RV - ): KStream[K, RV] = - new KStream( - inner.join[GK, GV, RV]( - globalKTable, - ((k: K, v: V) => keyValueMapper(k, v)).asKeyValueMapper, - ((v: V, gv: GV) => joiner(v, gv)).asValueJoiner, - named - ) - ) - - /** - * Join records of this stream with `GlobalKTable`'s records using non-windowed left equi join. - * - * @param globalKTable the `GlobalKTable` to be joined with this stream - * @param keyValueMapper a function used to map from the (key, value) of this stream - * to the key of the `GlobalKTable` - * @param joiner a function that computes the join result for a pair of matching records - * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, - * one output for each input [[KStream]] record - * @see `org.apache.kafka.streams.kstream.KStream#leftJoin` - */ - def leftJoin[GK, GV, RV](globalKTable: GlobalKTable[GK, GV])( - keyValueMapper: (K, V) => GK, - joiner: (V, GV) => RV - ): KStream[K, RV] = - new KStream(inner.leftJoin[GK, GV, RV](globalKTable, keyValueMapper.asKeyValueMapper, joiner.asValueJoiner)) - - /** - * Join records of this stream with `GlobalKTable`'s records using non-windowed left equi join. - * - * @param globalKTable the `GlobalKTable` to be joined with this stream - * @param named a [[Named]] config used to name the processor in the topology - * @param keyValueMapper a function used to map from the (key, value) of this stream - * to the key of the `GlobalKTable` - * @param joiner a function that computes the join result for a pair of matching records - * @return a [[KStream]] that contains join-records for each key and values computed by the given `joiner`, - * one output for each input [[KStream]] record - * @see `org.apache.kafka.streams.kstream.KStream#leftJoin` - */ - def leftJoin[GK, GV, RV](globalKTable: GlobalKTable[GK, GV], named: Named)( - keyValueMapper: (K, V) => GK, - joiner: (V, GV) => RV - ): KStream[K, RV] = - new KStream(inner.leftJoin[GK, GV, RV](globalKTable, keyValueMapper.asKeyValueMapper, joiner.asValueJoiner, named)) - - /** - * Merge this stream and the given stream into one larger stream. - *

- * There is no ordering guarantee between records from this `KStream` and records from the provided `KStream` - * in the merged stream. Relative order is preserved within each input stream though (ie, records within - * one input stream are processed in order). - * - * @param stream a stream which is to be merged into this stream - * @return a merged stream containing all records from this and the provided [[KStream]] - * @see `org.apache.kafka.streams.kstream.KStream#merge` - */ - def merge(stream: KStream[K, V]): KStream[K, V] = - new KStream(inner.merge(stream.inner)) - - /** - * Merge this stream and the given stream into one larger stream. - *

- * There is no ordering guarantee between records from this `KStream` and records from the provided `KStream` - * in the merged stream. Relative order is preserved within each input stream though (ie, records within - * one input stream are processed in order). - * - * @param named a [[Named]] config used to name the processor in the topology - * @param stream a stream which is to be merged into this stream - * @return a merged stream containing all records from this and the provided [[KStream]] - * @see `org.apache.kafka.streams.kstream.KStream#merge` - */ - def merge(stream: KStream[K, V], named: Named): KStream[K, V] = - new KStream(inner.merge(stream.inner, named)) - - /** - * Perform an action on each record of `KStream`. - *

- * Peek is a non-terminal operation that triggers a side effect (such as logging or statistics collection) - * and returns an unchanged stream. - * - * @param action an action to perform on each record - * @see `org.apache.kafka.streams.kstream.KStream#peek` - */ - def peek(action: (K, V) => Unit): KStream[K, V] = - new KStream(inner.peek(action.asForeachAction)) - - /** - * Perform an action on each record of `KStream`. - *

- * Peek is a non-terminal operation that triggers a side effect (such as logging or statistics collection) - * and returns an unchanged stream. - * - * @param action an action to perform on each record - * @param named a [[Named]] config used to name the processor in the topology - * @see `org.apache.kafka.streams.kstream.KStream#peek` - */ - def peek(action: (K, V) => Unit, named: Named): KStream[K, V] = - new KStream(inner.peek(action.asForeachAction, named)) -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KTable.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KTable.scala deleted file mode 100644 index 6a7f42285a..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/KTable.scala +++ /dev/null @@ -1,806 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala -package kstream - -import scala.jdk.FunctionWrappers.AsJavaBiFunction -import org.apache.kafka.common.utils.Bytes -import org.apache.kafka.streams.kstream.{KTable => KTableJ, TableJoined, ValueJoiner, ValueTransformerWithKeySupplier} -import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ - FunctionFromFunction, - KeyValueMapperFromFunction, - MapperFromFunction, - PredicateFromFunction, - ValueMapperFromFunction, - ValueMapperWithKeyFromFunction -} -import org.apache.kafka.streams.state.KeyValueStore - -/** - * Wraps the Java class `org.apache.kafka.streams.kstream.KTable` and delegates method calls to the underlying Java object. - * - * @tparam K Type of keys - * @tparam V Type of values - * @param inner The underlying Java abstraction for KTable - * @see `org.apache.kafka.streams.kstream.KTable` - */ -class KTable[K, V](val inner: KTableJ[K, V]) { - - /** - * Create a new [[KTable]] that consists all records of this [[KTable]] which satisfies the given - * predicate - * - * @param predicate a filter that is applied to each record - * @return a [[KTable]] that contains only those records that satisfy the given predicate - * @see `org.apache.kafka.streams.kstream.KTable#filter` - */ - def filter(predicate: (K, V) => Boolean): KTable[K, V] = - new KTable(inner.filter(predicate.asPredicate)) - - /** - * Create a new [[KTable]] that consists all records of this [[KTable]] which satisfies the given - * predicate - * - * @param predicate a filter that is applied to each record - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KTable]] that contains only those records that satisfy the given predicate - * @see `org.apache.kafka.streams.kstream.KTable#filter` - */ - def filter(predicate: (K, V) => Boolean, named: Named): KTable[K, V] = - new KTable(inner.filter(predicate.asPredicate, named)) - - /** - * Create a new [[KTable]] that consists all records of this [[KTable]] which satisfies the given - * predicate - * - * @param predicate a filter that is applied to each record - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains only those records that satisfy the given predicate - * @see `org.apache.kafka.streams.kstream.KTable#filter` - */ - def filter(predicate: (K, V) => Boolean, materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = - new KTable(inner.filter(predicate.asPredicate, materialized)) - - /** - * Create a new [[KTable]] that consists all records of this [[KTable]] which satisfies the given - * predicate - * - * @param predicate a filter that is applied to each record - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains only those records that satisfy the given predicate - * @see `org.apache.kafka.streams.kstream.KTable#filter` - */ - def filter( - predicate: (K, V) => Boolean, - named: Named, - materialized: Materialized[K, V, ByteArrayKeyValueStore] - ): KTable[K, V] = - new KTable(inner.filter(predicate.asPredicate, named, materialized)) - - /** - * Create a new [[KTable]] that consists all records of this [[KTable]] which do not satisfy the given - * predicate - * - * @param predicate a filter that is applied to each record - * @return a [[KTable]] that contains only those records that do not satisfy the given predicate - * @see `org.apache.kafka.streams.kstream.KTable#filterNot` - */ - def filterNot(predicate: (K, V) => Boolean): KTable[K, V] = - new KTable(inner.filterNot(predicate.asPredicate)) - - /** - * Create a new [[KTable]] that consists all records of this [[KTable]] which do not satisfy the given - * predicate - * - * @param predicate a filter that is applied to each record - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KTable]] that contains only those records that do not satisfy the given predicate - * @see `org.apache.kafka.streams.kstream.KTable#filterNot` - */ - def filterNot(predicate: (K, V) => Boolean, named: Named): KTable[K, V] = - new KTable(inner.filterNot(predicate.asPredicate, named)) - - /** - * Create a new [[KTable]] that consists all records of this [[KTable]] which do not satisfy the given - * predicate - * - * @param predicate a filter that is applied to each record - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains only those records that do not satisfy the given predicate - * @see `org.apache.kafka.streams.kstream.KTable#filterNot` - */ - def filterNot(predicate: (K, V) => Boolean, materialized: Materialized[K, V, ByteArrayKeyValueStore]): KTable[K, V] = - new KTable(inner.filterNot(predicate.asPredicate, materialized)) - - /** - * Create a new [[KTable]] that consists all records of this [[KTable]] which do not satisfy the given - * predicate - * - * @param predicate a filter that is applied to each record - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains only those records that do not satisfy the given predicate - * @see `org.apache.kafka.streams.kstream.KTable#filterNot` - */ - def filterNot( - predicate: (K, V) => Boolean, - named: Named, - materialized: Materialized[K, V, ByteArrayKeyValueStore] - ): KTable[K, V] = - new KTable(inner.filterNot(predicate.asPredicate, named, materialized)) - - /** - * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value - * (with possible new type) in the new [[KTable]]. - *

- * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it - * - * @param mapper , a function `V => VR` that computes a new output value - * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KTable#mapValues` - */ - def mapValues[VR](mapper: V => VR): KTable[K, VR] = - new KTable(inner.mapValues[VR](mapper.asValueMapper)) - - /** - * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value - * (with possible new type) in the new [[KTable]]. - *

- * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it - * - * @param mapper , a function `V => VR` that computes a new output value - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KTable#mapValues` - */ - def mapValues[VR](mapper: V => VR, named: Named): KTable[K, VR] = - new KTable(inner.mapValues[VR](mapper.asValueMapper, named)) - - /** - * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value - * (with possible new type) in the new [[KTable]]. - *

- * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it - * - * @param mapper , a function `V => VR` that computes a new output value - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KTable#mapValues` - */ - def mapValues[VR](mapper: V => VR, materialized: Materialized[K, VR, ByteArrayKeyValueStore]): KTable[K, VR] = - new KTable(inner.mapValues[VR](mapper.asValueMapper, materialized)) - - /** - * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value - * (with possible new type) in the new [[KTable]]. - *

- * The provided `mapper`, a function `V => VR` is applied to each input record value and computes a new value for it - * - * @param mapper , a function `V => VR` that computes a new output value - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KTable#mapValues` - */ - def mapValues[VR]( - mapper: V => VR, - named: Named, - materialized: Materialized[K, VR, ByteArrayKeyValueStore] - ): KTable[K, VR] = - new KTable(inner.mapValues[VR](mapper.asValueMapper, named, materialized)) - - /** - * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value - * (with possible new type) in the new [[KTable]]. - *

- * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it - * - * @param mapper , a function `(K, V) => VR` that computes a new output value - * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KTable#mapValues` - */ - def mapValues[VR](mapper: (K, V) => VR): KTable[K, VR] = - new KTable(inner.mapValues[VR](mapper.asValueMapperWithKey)) - - /** - * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value - * (with possible new type) in the new [[KTable]]. - *

- * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it - * - * @param mapper , a function `(K, V) => VR` that computes a new output value - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KTable#mapValues` - */ - def mapValues[VR](mapper: (K, V) => VR, named: Named): KTable[K, VR] = - new KTable(inner.mapValues[VR](mapper.asValueMapperWithKey, named)) - - /** - * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value - * (with possible new type) in the new [[KTable]]. - *

- * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it - * - * @param mapper , a function `(K, V) => VR` that computes a new output value - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KTable#mapValues` - */ - def mapValues[VR](mapper: (K, V) => VR, materialized: Materialized[K, VR, ByteArrayKeyValueStore]): KTable[K, VR] = - new KTable(inner.mapValues[VR](mapper.asValueMapperWithKey, materialized)) - - /** - * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value - * (with possible new type) in the new [[KTable]]. - *

- * The provided `mapper`, a function `(K, V) => VR` is applied to each input record value and computes a new value for it - * - * @param mapper , a function `(K, V) => VR` that computes a new output value - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KTable#mapValues` - */ - def mapValues[VR]( - mapper: (K, V) => VR, - named: Named, - materialized: Materialized[K, VR, ByteArrayKeyValueStore] - ): KTable[K, VR] = - new KTable(inner.mapValues[VR](mapper.asValueMapperWithKey, named, materialized)) - - /** - * Convert this changelog stream to a [[KStream]]. - * - * @return a [[KStream]] that contains the same records as this [[KTable]] - * @see `org.apache.kafka.streams.kstream.KTable#toStream` - */ - def toStream: KStream[K, V] = - new KStream(inner.toStream) - - /** - * Convert this changelog stream to a [[KStream]]. - * - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KStream]] that contains the same records as this [[KTable]] - * @see `org.apache.kafka.streams.kstream.KTable#toStream` - */ - def toStream(named: Named): KStream[K, V] = - new KStream(inner.toStream(named)) - - /** - * Convert this changelog stream to a [[KStream]] using the given key/value mapper to select the new key - * - * @param mapper a function that computes a new key for each record - * @return a [[KStream]] that contains the same records as this [[KTable]] - * @see `org.apache.kafka.streams.kstream.KTable#toStream` - */ - def toStream[KR](mapper: (K, V) => KR): KStream[KR, V] = - new KStream(inner.toStream[KR](mapper.asKeyValueMapper)) - - /** - * Convert this changelog stream to a [[KStream]] using the given key/value mapper to select the new key - * - * @param mapper a function that computes a new key for each record - * @param named a [[Named]] config used to name the processor in the topology - * @return a [[KStream]] that contains the same records as this [[KTable]] - * @see `org.apache.kafka.streams.kstream.KTable#toStream` - */ - def toStream[KR](mapper: (K, V) => KR, named: Named): KStream[KR, V] = - new KStream(inner.toStream[KR](mapper.asKeyValueMapper, named)) - - /** - * Suppress some updates from this changelog stream, determined by the supplied `org.apache.kafka.streams.kstream.Suppressed` configuration. - * - * This controls what updates downstream table and stream operations will receive. - * - * @param suppressed Configuration object determining what, if any, updates to suppress. - * @return A new KTable with the desired suppression characteristics. - * @see `org.apache.kafka.streams.kstream.KTable#suppress` - */ - def suppress(suppressed: org.apache.kafka.streams.kstream.Suppressed[_ >: K]): KTable[K, V] = - new KTable(inner.suppress(suppressed)) - - /** - * Create a new `KTable` by transforming the value of each record in this `KTable` into a new value, (with possibly new type). - * Transform the value of each input record into a new value (with possible new type) of the output record. - * A `ValueTransformerWithKey` (provided by the given `ValueTransformerWithKeySupplier`) is applied to each input - * record value and computes a new value for it. - * This is similar to `#mapValues(ValueMapperWithKey)`, but more flexible, allowing access to additional state-stores, - * and to the `ProcessorContext`. - * If the downstream topology uses aggregation functions, (e.g. `KGroupedTable#reduce`, `KGroupedTable#aggregate`, etc), - * care must be taken when dealing with state, (either held in state-stores or transformer instances), to ensure correct - * aggregate results. - * In contrast, if the resulting KTable is materialized, (cf. `#transformValues(ValueTransformerWithKeySupplier, Materialized, String...)`), - * such concerns are handled for you. - * In order to assign a state, the state must be created and registered - * beforehand via stores added via `addStateStore` or `addGlobalStore` before they can be connected to the `Transformer` - * - * @param valueTransformerWithKeySupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey`. - * At least one transformer instance will be created per streaming task. - * Transformer implementations doe not need to be thread-safe. - * @param stateStoreNames the names of the state stores used by the processor - * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KStream#transformValues` - */ - def transformValues[VR]( - valueTransformerWithKeySupplier: ValueTransformerWithKeySupplier[K, V, VR], - stateStoreNames: String* - ): KTable[K, VR] = - new KTable(inner.transformValues[VR](valueTransformerWithKeySupplier, stateStoreNames: _*)) - - /** - * Create a new `KTable` by transforming the value of each record in this `KTable` into a new value, (with possibly new type). - * Transform the value of each input record into a new value (with possible new type) of the output record. - * A `ValueTransformerWithKey` (provided by the given `ValueTransformerWithKeySupplier`) is applied to each input - * record value and computes a new value for it. - * This is similar to `#mapValues(ValueMapperWithKey)`, but more flexible, allowing access to additional state-stores, - * and to the `ProcessorContext`. - * If the downstream topology uses aggregation functions, (e.g. `KGroupedTable#reduce`, `KGroupedTable#aggregate`, etc), - * care must be taken when dealing with state, (either held in state-stores or transformer instances), to ensure correct - * aggregate results. - * In contrast, if the resulting KTable is materialized, (cf. `#transformValues(ValueTransformerWithKeySupplier, Materialized, String...)`), - * such concerns are handled for you. - * In order to assign a state, the state must be created and registered - * beforehand via stores added via `addStateStore` or `addGlobalStore` before they can be connected to the `Transformer` - * - * @param valueTransformerWithKeySupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey`. - * At least one transformer instance will be created per streaming task. - * Transformer implementations doe not need to be thread-safe. - * @param named a [[Named]] config used to name the processor in the topology - * @param stateStoreNames the names of the state stores used by the processor - * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KStream#transformValues` - */ - def transformValues[VR]( - valueTransformerWithKeySupplier: ValueTransformerWithKeySupplier[K, V, VR], - named: Named, - stateStoreNames: String* - ): KTable[K, VR] = - new KTable(inner.transformValues[VR](valueTransformerWithKeySupplier, named, stateStoreNames: _*)) - - /** - * Create a new `KTable` by transforming the value of each record in this `KTable` into a new value, (with possibly new type). - * A `ValueTransformer` (provided by the given `ValueTransformerSupplier`) is applied to each input - * record value and computes a new value for it. - * This is similar to `#mapValues(ValueMapperWithKey)`, but more flexible, allowing stateful, rather than stateless, - * record-by-record operation, access to additional state-stores, and access to the `ProcessorContext`. - * In order to assign a state, the state must be created and registered - * beforehand via stores added via `addStateStore` or `addGlobalStore` before they can be connected to the `Transformer` - * The resulting `KTable` is materialized into another state store (additional to the provided state store names) - * as specified by the user via `Materialized` parameter, and is queryable through its given name. - * - * @param valueTransformerWithKeySupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey` - * At least one transformer instance will be created per streaming task. - * Transformer implementations doe not need to be thread-safe. - * @param materialized an instance of `Materialized` used to describe how the state store of the - * resulting table should be materialized. - * @param stateStoreNames the names of the state stores used by the processor - * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KStream#transformValues` - */ - def transformValues[VR]( - valueTransformerWithKeySupplier: ValueTransformerWithKeySupplier[K, V, VR], - materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]], - stateStoreNames: String* - ): KTable[K, VR] = - new KTable(inner.transformValues[VR](valueTransformerWithKeySupplier, materialized, stateStoreNames: _*)) - - /** - * Create a new `KTable` by transforming the value of each record in this `KTable` into a new value, (with possibly new type). - * A `ValueTransformer` (provided by the given `ValueTransformerSupplier`) is applied to each input - * record value and computes a new value for it. - * This is similar to `#mapValues(ValueMapperWithKey)`, but more flexible, allowing stateful, rather than stateless, - * record-by-record operation, access to additional state-stores, and access to the `ProcessorContext`. - * In order to assign a state, the state must be created and registered - * beforehand via stores added via `addStateStore` or `addGlobalStore` before they can be connected to the `Transformer` - * The resulting `KTable` is materialized into another state store (additional to the provided state store names) - * as specified by the user via `Materialized` parameter, and is queryable through its given name. - * - * @param valueTransformerWithKeySupplier a instance of `ValueTransformerWithKeySupplier` that generates a `ValueTransformerWithKey` - * At least one transformer instance will be created per streaming task. - * Transformer implementations doe not need to be thread-safe. - * @param materialized an instance of `Materialized` used to describe how the state store of the - * resulting table should be materialized. - * @param named a [[Named]] config used to name the processor in the topology - * @param stateStoreNames the names of the state stores used by the processor - * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type) - * @see `org.apache.kafka.streams.kstream.KStream#transformValues` - */ - def transformValues[VR]( - valueTransformerWithKeySupplier: ValueTransformerWithKeySupplier[K, V, VR], - materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]], - named: Named, - stateStoreNames: String* - ): KTable[K, VR] = - new KTable(inner.transformValues[VR](valueTransformerWithKeySupplier, materialized, named, stateStoreNames: _*)) - - /** - * Re-groups the records of this [[KTable]] using the provided key/value mapper - * and `Serde`s as specified by `Grouped`. - * - * @param selector a function that computes a new grouping key and value to be aggregated - * @param grouped the `Grouped` instance used to specify `Serdes` - * @return a [[KGroupedTable]] that contains the re-grouped records of the original [[KTable]] - * @see `org.apache.kafka.streams.kstream.KTable#groupBy` - */ - def groupBy[KR, VR](selector: (K, V) => (KR, VR))(implicit grouped: Grouped[KR, VR]): KGroupedTable[KR, VR] = - new KGroupedTable(inner.groupBy(selector.asKeyValueMapper, grouped)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner equi join. - * - * @param other the other [[KTable]] to be joined with this [[KTable]] - * @param joiner a function that computes the join result for a pair of matching records - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - * @see `org.apache.kafka.streams.kstream.KTable#join` - */ - def join[VO, VR](other: KTable[K, VO])(joiner: (V, VO) => VR): KTable[K, VR] = - new KTable(inner.join[VO, VR](other.inner, joiner.asValueJoiner)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner equi join. - * - * @param other the other [[KTable]] to be joined with this [[KTable]] - * @param named a [[Named]] config used to name the processor in the topology - * @param joiner a function that computes the join result for a pair of matching records - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - * @see `org.apache.kafka.streams.kstream.KTable#join` - */ - def join[VO, VR](other: KTable[K, VO], named: Named)(joiner: (V, VO) => VR): KTable[K, VR] = - new KTable(inner.join[VO, VR](other.inner, joiner.asValueJoiner, named)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner equi join. - * - * @param other the other [[KTable]] to be joined with this [[KTable]] - * @param joiner a function that computes the join result for a pair of matching records - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - * @see `org.apache.kafka.streams.kstream.KTable#join` - */ - def join[VO, VR](other: KTable[K, VO], materialized: Materialized[K, VR, ByteArrayKeyValueStore])( - joiner: (V, VO) => VR - ): KTable[K, VR] = - new KTable(inner.join[VO, VR](other.inner, joiner.asValueJoiner, materialized)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner equi join. - * - * @param other the other [[KTable]] to be joined with this [[KTable]] - * @param joiner a function that computes the join result for a pair of matching records - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - * @see `org.apache.kafka.streams.kstream.KTable#join` - */ - def join[VO, VR](other: KTable[K, VO], named: Named, materialized: Materialized[K, VR, ByteArrayKeyValueStore])( - joiner: (V, VO) => VR - ): KTable[K, VR] = - new KTable(inner.join[VO, VR](other.inner, joiner.asValueJoiner, named, materialized)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left equi join. - * - * @param other the other [[KTable]] to be joined with this [[KTable]] - * @param joiner a function that computes the join result for a pair of matching records - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` - */ - def leftJoin[VO, VR](other: KTable[K, VO])(joiner: (V, VO) => VR): KTable[K, VR] = - new KTable(inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left equi join. - * - * @param other the other [[KTable]] to be joined with this [[KTable]] - * @param named a [[Named]] config used to name the processor in the topology - * @param joiner a function that computes the join result for a pair of matching records - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` - */ - def leftJoin[VO, VR](other: KTable[K, VO], named: Named)(joiner: (V, VO) => VR): KTable[K, VR] = - new KTable(inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner, named)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left equi join. - * - * @param other the other [[KTable]] to be joined with this [[KTable]] - * @param joiner a function that computes the join result for a pair of matching records - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` - */ - def leftJoin[VO, VR](other: KTable[K, VO], materialized: Materialized[K, VR, ByteArrayKeyValueStore])( - joiner: (V, VO) => VR - ): KTable[K, VR] = - new KTable(inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner, materialized)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left equi join. - * - * @param other the other [[KTable]] to be joined with this [[KTable]] - * @param named a [[Named]] config used to name the processor in the topology - * @param joiner a function that computes the join result for a pair of matching records - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` - */ - def leftJoin[VO, VR](other: KTable[K, VO], named: Named, materialized: Materialized[K, VR, ByteArrayKeyValueStore])( - joiner: (V, VO) => VR - ): KTable[K, VR] = - new KTable(inner.leftJoin[VO, VR](other.inner, joiner.asValueJoiner, named, materialized)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed outer equi join. - * - * @param other the other [[KTable]] to be joined with this [[KTable]] - * @param joiner a function that computes the join result for a pair of matching records - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` - */ - def outerJoin[VO, VR](other: KTable[K, VO])(joiner: (V, VO) => VR): KTable[K, VR] = - new KTable(inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed outer equi join. - * - * @param other the other [[KTable]] to be joined with this [[KTable]] - * @param named a [[Named]] config used to name the processor in the topology - * @param joiner a function that computes the join result for a pair of matching records - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` - */ - def outerJoin[VO, VR](other: KTable[K, VO], named: Named)(joiner: (V, VO) => VR): KTable[K, VR] = - new KTable(inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner, named)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed outer equi join. - * - * @param other the other [[KTable]] to be joined with this [[KTable]] - * @param joiner a function that computes the join result for a pair of matching records - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` - */ - def outerJoin[VO, VR](other: KTable[K, VO], materialized: Materialized[K, VR, ByteArrayKeyValueStore])( - joiner: (V, VO) => VR - ): KTable[K, VR] = - new KTable(inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner, materialized)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed outer equi join. - * - * @param other the other [[KTable]] to be joined with this [[KTable]] - * @param named a [[Named]] config used to name the processor in the topology - * @param joiner a function that computes the join result for a pair of matching records - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - * @see `org.apache.kafka.streams.kstream.KTable#leftJoin` - */ - def outerJoin[VO, VR](other: KTable[K, VO], named: Named, materialized: Materialized[K, VR, ByteArrayKeyValueStore])( - joiner: (V, VO) => VR - ): KTable[K, VR] = - new KTable(inner.outerJoin[VO, VR](other.inner, joiner.asValueJoiner, named, materialized)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner join. Records from this - * table are joined according to the result of keyExtractor on the other KTable. - * - * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor - * @param keyExtractor a function that extracts the foreign key from this table's value - * @param joiner a function that computes the join result for a pair of matching records - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - */ - def join[VR, KO, VO]( - other: KTable[KO, VO], - keyExtractor: Function[V, KO], - joiner: ValueJoiner[V, VO, VR], - materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] - ): KTable[K, VR] = - new KTable(inner.join(other.inner, keyExtractor.asJavaFunction, joiner, materialized)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner join. Records from this - * table are joined according to the result of keyExtractor on the other KTable. - * - * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor - * @param keyExtractor a function that extracts the foreign key from this table's key and value - * @param joiner a function that computes the join result for a pair of matching records - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - */ - def join[VR, KO, VO]( - other: KTable[KO, VO], - keyExtractor: (K, V) => KO, - joiner: ValueJoiner[V, VO, VR], - materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] - ): KTable[K, VR] = - new KTable(inner.join(other.inner, AsJavaBiFunction[K, V, KO](keyExtractor), joiner, materialized)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner join. Records from this - * table are joined according to the result of keyExtractor on the other KTable. - * - * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor - * @param keyExtractor a function that extracts the foreign key from this table's value - * @param joiner a function that computes the join result for a pair of matching records - * @param tableJoined a `org.apache.kafka.streams.kstream.TableJoined` used to configure - * partitioners and names of internal topics and stores - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - */ - def join[VR, KO, VO]( - other: KTable[KO, VO], - keyExtractor: Function[V, KO], - joiner: ValueJoiner[V, VO, VR], - tableJoined: TableJoined[K, KO], - materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] - ): KTable[K, VR] = - new KTable(inner.join(other.inner, keyExtractor.asJavaFunction, joiner, tableJoined, materialized)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed inner join. Records from this - * table are joined according to the result of keyExtractor on the other KTable. - * - * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor - * @param keyExtractor a function that extracts the foreign key from this table's key and value - * @param joiner a function that computes the join result for a pair of matching records - * @param tableJoined a `org.apache.kafka.streams.kstream.TableJoined` used to configure - * partitioners and names of internal topics and stores - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - */ - def join[VR, KO, VO]( - other: KTable[KO, VO], - keyExtractor: (K, V) => KO, - joiner: ValueJoiner[V, VO, VR], - tableJoined: TableJoined[K, KO], - materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] - ): KTable[K, VR] = - new KTable(inner.join(other.inner, AsJavaBiFunction[K, V, KO](keyExtractor), joiner, tableJoined, materialized)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left join. Records from this - * table are joined according to the result of keyExtractor on the other KTable. - * - * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor - * @param keyExtractor a function that extracts the foreign key from this table's value - * @param joiner a function that computes the join result for a pair of matching records - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - */ - def leftJoin[VR, KO, VO]( - other: KTable[KO, VO], - keyExtractor: Function[V, KO], - joiner: ValueJoiner[V, VO, VR], - materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] - ): KTable[K, VR] = - new KTable(inner.leftJoin(other.inner, keyExtractor.asJavaFunction, joiner, materialized)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left join. Records from this - * table are joined according to the result of keyExtractor on the other KTable. - * - * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor - * @param keyExtractor a function that extracts the foreign key from this table's key and value - * @param joiner a function that computes the join result for a pair of matching records - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - */ - def leftJoin[VR, KO, VO]( - other: KTable[KO, VO], - keyExtractor: (K, V) => KO, - joiner: ValueJoiner[V, VO, VR], - materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] - ): KTable[K, VR] = - new KTable(inner.leftJoin(other.inner, AsJavaBiFunction[K, V, KO](keyExtractor), joiner, materialized)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left join. Records from this - * table are joined according to the result of keyExtractor on the other KTable. - * - * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor - * @param keyExtractor a function that extracts the foreign key from this table's value - * @param joiner a function that computes the join result for a pair of matching records - * @param tableJoined a `org.apache.kafka.streams.kstream.TableJoined` used to configure - * partitioners and names of internal topics and stores - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - */ - def leftJoin[VR, KO, VO]( - other: KTable[KO, VO], - keyExtractor: Function[V, KO], - joiner: ValueJoiner[V, VO, VR], - tableJoined: TableJoined[K, KO], - materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] - ): KTable[K, VR] = - new KTable(inner.leftJoin(other.inner, keyExtractor.asJavaFunction, joiner, tableJoined, materialized)) - - /** - * Join records of this [[KTable]] with another [[KTable]]'s records using non-windowed left join. Records from this - * table are joined according to the result of keyExtractor on the other KTable. - * - * @param other the other [[KTable]] to be joined with this [[KTable]], keyed on the value obtained from keyExtractor - * @param keyExtractor a function that extracts the foreign key from this table's key and value - * @param joiner a function that computes the join result for a pair of matching records - * @param tableJoined a `org.apache.kafka.streams.kstream.TableJoined` used to configure - * partitioners and names of internal topics and stores - * @param materialized a `Materialized` that describes how the `StateStore` for the resulting [[KTable]] - * should be materialized. - * @return a [[KTable]] that contains join-records for each key and values computed by the given joiner, - * one for each matched record-pair with the same key - */ - def leftJoin[VR, KO, VO]( - other: KTable[KO, VO], - keyExtractor: (K, V) => KO, - joiner: ValueJoiner[V, VO, VR], - tableJoined: TableJoined[K, KO], - materialized: Materialized[K, VR, KeyValueStore[Bytes, Array[Byte]]] - ): KTable[K, VR] = - new KTable(inner.leftJoin(other.inner, AsJavaBiFunction[K, V, KO](keyExtractor), joiner, tableJoined, materialized)) - - /** - * Get the name of the local state store used that can be used to query this [[KTable]]. - * - * @return the underlying state store name, or `null` if this [[KTable]] cannot be queried. - */ - def queryableStoreName: String = - inner.queryableStoreName -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Materialized.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Materialized.scala deleted file mode 100644 index 55c09896ac..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Materialized.scala +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.common.serialization.Serde -import org.apache.kafka.streams.kstream.{Materialized => MaterializedJ} -import org.apache.kafka.streams.processor.StateStore -import org.apache.kafka.streams.scala.{ByteArrayKeyValueStore, ByteArraySessionStore, ByteArrayWindowStore} -import org.apache.kafka.streams.state.{KeyValueBytesStoreSupplier, SessionBytesStoreSupplier, WindowBytesStoreSupplier} - -object Materialized { - - /** - * Materialize a `org.apache.kafka.streams.processor.StateStore` with the provided key and value Serdes. - * An internal name will be used for the store. - * - * @tparam K key type - * @tparam V value type - * @tparam S store type - * @param keySerde the key Serde to use. - * @param valueSerde the value Serde to use. - * @return a new [[Materialized]] instance with the given key and value serdes - */ - def `with`[K, V, S <: StateStore](implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, S] = - MaterializedJ.`with`(keySerde, valueSerde) - - /** - * Materialize a `org.apache.kafka.streams.processor.StateStore` with the given name. - * - * @tparam K key type of the store - * @tparam V value type of the store - * @tparam S type of the `org.apache.kafka.streams.processor.StateStore` - * @param storeName the name of the underlying [[org.apache.kafka.streams.scala.kstream.KTable]] state store; - * valid characters are ASCII alphanumerics, '.', '_' and '-'. - * @param keySerde the key serde to use. - * @param valueSerde the value serde to use. - * @return a new [[Materialized]] instance with the given storeName - */ - def as[K, V, S <: StateStore]( - storeName: String - )(implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, S] = - MaterializedJ.as(storeName).withKeySerde(keySerde).withValueSerde(valueSerde) - - /** - * Materialize a `org.apache.kafka.streams.state.WindowStore` using the provided - * `org.apache.kafka.streams.state.WindowBytesStoreSupplier`. - * - * Important: Custom subclasses are allowed here, but they should respect the retention contract: - * Window stores are required to retain windows at least as long as (window size + window grace period). - * Stores constructed via `org.apache.kafka.streams.state.Stores` already satisfy this contract. - * - * @tparam K key type of the store - * @tparam V value type of the store - * @param supplier the `org.apache.kafka.streams.state.WindowBytesStoreSupplier` used to materialize the store - * @param keySerde the key serde to use. - * @param valueSerde the value serde to use. - * @return a new [[Materialized]] instance with the given supplier - */ - def as[K, V]( - supplier: WindowBytesStoreSupplier - )(implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, ByteArrayWindowStore] = - MaterializedJ.as(supplier).withKeySerde(keySerde).withValueSerde(valueSerde) - - /** - * Materialize a `org.apache.kafka.streams.state.SessionStore` using the provided - * `org.apache.kafka.streams.state.SessionBytesStoreSupplier`. - * - * Important: Custom subclasses are allowed here, but they should respect the retention contract: - * Session stores are required to retain windows at least as long as (session inactivity gap + session grace period). - * Stores constructed via `org.apache.kafka.streams.state.Stores` already satisfy this contract. - * - * @tparam K key type of the store - * @tparam V value type of the store - * @param supplier the `org.apache.kafka.streams.state.SessionBytesStoreSupplier` used to materialize the store - * @param keySerde the key serde to use. - * @param valueSerde the value serde to use. - * @return a new [[Materialized]] instance with the given supplier - */ - def as[K, V]( - supplier: SessionBytesStoreSupplier - )(implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, ByteArraySessionStore] = - MaterializedJ.as(supplier).withKeySerde(keySerde).withValueSerde(valueSerde) - - /** - * Materialize a `org.apache.kafka.streams.state.KeyValueStore` using the provided - * `org.apache.kafka.streams.state.KeyValueBytesStoreSupplier`. - * - * @tparam K key type of the store - * @tparam V value type of the store - * @param supplier the `org.apache.kafka.streams.state.KeyValueBytesStoreSupplier` used to - * materialize the store - * @param keySerde the key serde to use. - * @param valueSerde the value serde to use. - * @return a new [[Materialized]] instance with the given supplier - */ - def as[K, V]( - supplier: KeyValueBytesStoreSupplier - )(implicit keySerde: Serde[K], valueSerde: Serde[V]): MaterializedJ[K, V, ByteArrayKeyValueStore] = - MaterializedJ.as(supplier).withKeySerde(keySerde).withValueSerde(valueSerde) -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Produced.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Produced.scala deleted file mode 100644 index 1f4498b8af..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Produced.scala +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.common.serialization.Serde -import org.apache.kafka.streams.kstream.{Produced => ProducedJ} -import org.apache.kafka.streams.processor.StreamPartitioner - -object Produced { - - /** - * Create a Produced instance with provided keySerde and valueSerde. - * - * @tparam K key type - * @tparam V value type - * @param keySerde Serde to use for serializing the key - * @param valueSerde Serde to use for serializing the value - * @return A new [[Produced]] instance configured with keySerde and valueSerde - * @see KStream#through(String, Produced) - * @see KStream#to(String, Produced) - */ - def `with`[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): ProducedJ[K, V] = - ProducedJ.`with`(keySerde, valueSerde) - - /** - * Create a Produced instance with provided keySerde, valueSerde, and partitioner. - * - * @tparam K key type - * @tparam V value type - * @param partitioner the function used to determine how records are distributed among partitions of the topic, - * if not specified and `keySerde` provides a - * `org.apache.kafka.streams.kstream.internals.WindowedSerializer` for the key - * `org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner` will be - * used—otherwise `org.apache.kafka.clients.producer.internals.DefaultPartitioner` - * will be used - * @param keySerde Serde to use for serializing the key - * @param valueSerde Serde to use for serializing the value - * @return A new [[Produced]] instance configured with keySerde, valueSerde, and partitioner - * @see KStream#through(String, Produced) - * @see KStream#to(String, Produced) - */ - def `with`[K, V]( - partitioner: StreamPartitioner[K, V] - )(implicit keySerde: Serde[K], valueSerde: Serde[V]): ProducedJ[K, V] = - ProducedJ.`with`(keySerde, valueSerde, partitioner) -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Repartitioned.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Repartitioned.scala deleted file mode 100644 index f968c859c5..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/Repartitioned.scala +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.common.serialization.Serde -import org.apache.kafka.streams.kstream.{Repartitioned => RepartitionedJ} -import org.apache.kafka.streams.processor.StreamPartitioner - -object Repartitioned { - - /** - * Create a Repartitioned instance with provided keySerde and valueSerde. - * - * @tparam K key type - * @tparam V value type - * @param keySerde Serde to use for serializing the key - * @param valueSerde Serde to use for serializing the value - * @return A new [[Repartitioned]] instance configured with keySerde and valueSerde - * @see KStream#repartition(Repartitioned) - */ - def `with`[K, V](implicit keySerde: Serde[K], valueSerde: Serde[V]): RepartitionedJ[K, V] = - RepartitionedJ.`with`(keySerde, valueSerde) - - /** - * Create a Repartitioned instance with provided keySerde, valueSerde, and name used as part of the repartition topic. - * - * @tparam K key type - * @tparam V value type - * @param name the name used as a processor named and part of the repartition topic name. - * @param keySerde Serde to use for serializing the key - * @param valueSerde Serde to use for serializing the value - * @return A new [[Repartitioned]] instance configured with keySerde, valueSerde, and processor and repartition topic name - * @see KStream#repartition(Repartitioned) - */ - def `with`[K, V](name: String)(implicit keySerde: Serde[K], valueSerde: Serde[V]): RepartitionedJ[K, V] = - RepartitionedJ.`as`(name).withKeySerde(keySerde).withValueSerde(valueSerde) - - /** - * Create a Repartitioned instance with provided keySerde, valueSerde, and partitioner. - * - * @tparam K key type - * @tparam V value type - * @param partitioner the function used to determine how records are distributed among partitions of the topic, - * if not specified and `keySerde` provides a - * `org.apache.kafka.streams.kstream.internals.WindowedSerializer` for the key - * `org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner` will be - * used—otherwise `org.apache.kafka.clients.producer.internals.DefaultPartitioner` - * will be used - * @param keySerde Serde to use for serializing the key - * @param valueSerde Serde to use for serializing the value - * @return A new [[Repartitioned]] instance configured with keySerde, valueSerde, and partitioner - * @see KStream#repartition(Repartitioned) - */ - def `with`[K, V]( - partitioner: StreamPartitioner[K, V] - )(implicit keySerde: Serde[K], valueSerde: Serde[V]): RepartitionedJ[K, V] = - RepartitionedJ.`streamPartitioner`(partitioner).withKeySerde(keySerde).withValueSerde(valueSerde) - - /** - * Create a Repartitioned instance with provided keySerde, valueSerde, and number of partitions for repartition topic. - * - * @tparam K key type - * @tparam V value type - * @param numberOfPartitions number of partitions used when creating repartition topic - * @param keySerde Serde to use for serializing the key - * @param valueSerde Serde to use for serializing the value - * @return A new [[Repartitioned]] instance configured with keySerde, valueSerde, and number of partitions - * @see KStream#repartition(Repartitioned) - */ - def `with`[K, V](numberOfPartitions: Int)(implicit keySerde: Serde[K], valueSerde: Serde[V]): RepartitionedJ[K, V] = - RepartitionedJ.`numberOfPartitions`(numberOfPartitions).withKeySerde(keySerde).withValueSerde(valueSerde) - -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedCogroupedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedCogroupedKStream.scala deleted file mode 100644 index 1b20179d5d..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedCogroupedKStream.scala +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala -package kstream - -import org.apache.kafka.streams.kstream.{SessionWindowedCogroupedKStream => SessionWindowedCogroupedKStreamJ, Windowed} -import org.apache.kafka.streams.scala.FunctionsCompatConversions.{InitializerFromFunction, MergerFromFunction} - -/** - * Wraps the Java class SessionWindowedCogroupedKStream and delegates method calls to the underlying Java object. - * - * @tparam K Type of keys - * @tparam V Type of values - * @param inner The underlying Java abstraction for SessionWindowedCogroupedKStream - * @see `org.apache.kafka.streams.kstream.SessionWindowedCogroupedKStream` - */ -class SessionWindowedCogroupedKStream[K, V](val inner: SessionWindowedCogroupedKStreamJ[K, V]) { - - /** - * Aggregate the values of records in this stream by the grouped key and defined `SessionWindows`. - * - * @param initializer the initializer function - * @param merger a function that combines two aggregation results. - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent - * the latest (rolling) aggregate for each key within a window - * @see `org.apache.kafka.streams.kstream.SessionWindowedCogroupedKStream#aggregate` - */ - def aggregate(initializer: => V, merger: (K, V, V) => V)(implicit - materialized: Materialized[K, V, ByteArraySessionStore] - ): KTable[Windowed[K], V] = - new KTable(inner.aggregate((() => initializer).asInitializer, merger.asMerger, materialized)) - - /** - * Aggregate the values of records in this stream by the grouped key and defined `SessionWindows`. - * - * @param initializer the initializer function - * @param merger a function that combines two aggregation results. - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent - * the latest (rolling) aggregate for each key within a window - * @see `org.apache.kafka.streams.kstream.SessionWindowedCogroupedKStream#aggregate` - */ - def aggregate(initializer: => V, merger: (K, V, V) => V, named: Named)(implicit - materialized: Materialized[K, V, ByteArraySessionStore] - ): KTable[Windowed[K], V] = - new KTable(inner.aggregate((() => initializer).asInitializer, merger.asMerger, named, materialized)) - -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedKStream.scala deleted file mode 100644 index 3d6e157ecd..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/SessionWindowedKStream.scala +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala -package kstream - -import org.apache.kafka.streams.kstream.internals.KTableImpl -import org.apache.kafka.streams.scala.serialization.Serdes -import org.apache.kafka.streams.kstream.{KTable => KTableJ, SessionWindowedKStream => SessionWindowedKStreamJ, Windowed} -import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ - AggregatorFromFunction, - InitializerFromFunction, - MergerFromFunction, - ReducerFromFunction, - ValueMapperFromFunction -} - -/** - * Wraps the Java class SessionWindowedKStream and delegates method calls to the underlying Java object. - * - * @tparam K Type of keys - * @tparam V Type of values - * @param inner The underlying Java abstraction for SessionWindowedKStream - * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream` - */ -class SessionWindowedKStream[K, V](val inner: SessionWindowedKStreamJ[K, V]) { - - /** - * Aggregate the values of records in this stream by the grouped key and defined `SessionWindows`. - * - * @param initializer the initializer function - * @param aggregator the aggregator function - * @param merger the merger function - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent - * the latest (rolling) aggregate for each key within a window - * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#aggregate` - */ - def aggregate[VR](initializer: => VR)(aggregator: (K, V, VR) => VR, merger: (K, VR, VR) => VR)(implicit - materialized: Materialized[K, VR, ByteArraySessionStore] - ): KTable[Windowed[K], VR] = - new KTable( - inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, merger.asMerger, materialized) - ) - - /** - * Aggregate the values of records in this stream by the grouped key and defined `SessionWindows`. - * - * @param initializer the initializer function - * @param aggregator the aggregator function - * @param merger the merger function - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent - * the latest (rolling) aggregate for each key within a window - * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#aggregate` - */ - def aggregate[VR](initializer: => VR, named: Named)(aggregator: (K, V, VR) => VR, merger: (K, VR, VR) => VR)(implicit - materialized: Materialized[K, VR, ByteArraySessionStore] - ): KTable[Windowed[K], VR] = - new KTable( - inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, merger.asMerger, named, materialized) - ) - - /** - * Count the number of records in this stream by the grouped key into `SessionWindows`. - * - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a windowed [[KTable]] that contains "update" records with unmodified keys and `Long` values - * that represent the latest (rolling) count (i.e., number of records) for each key within a window - * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#count` - */ - def count()(implicit materialized: Materialized[K, Long, ByteArraySessionStore]): KTable[Windowed[K], Long] = { - val javaCountTable: KTableJ[Windowed[K], java.lang.Long] = - inner.count(materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArraySessionStore]]) - val tableImpl = javaCountTable.asInstanceOf[KTableImpl[Windowed[K], ByteArraySessionStore, java.lang.Long]] - new KTable( - javaCountTable.mapValues[Long]( - ((l: java.lang.Long) => Long2long(l)).asValueMapper, - Materialized.`with`[Windowed[K], Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) - ) - ) - } - - /** - * Count the number of records in this stream by the grouped key into `SessionWindows`. - * - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a windowed [[KTable]] that contains "update" records with unmodified keys and `Long` values - * that represent the latest (rolling) count (i.e., number of records) for each key within a window - * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#count` - */ - def count( - named: Named - )(implicit materialized: Materialized[K, Long, ByteArraySessionStore]): KTable[Windowed[K], Long] = { - val javaCountTable: KTableJ[Windowed[K], java.lang.Long] = - inner.count(named, materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArraySessionStore]]) - val tableImpl = javaCountTable.asInstanceOf[KTableImpl[Windowed[K], ByteArraySessionStore, java.lang.Long]] - new KTable( - javaCountTable.mapValues[Long]( - ((l: java.lang.Long) => Long2long(l)).asValueMapper, - Materialized.`with`[Windowed[K], Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) - ) - ) - } - - /** - * Combine values of this stream by the grouped key into `SessionWindows`. - * - * @param reducer a reducer function that computes a new aggregate result. - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent - * the latest (rolling) aggregate for each key within a window - * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#reduce` - */ - def reduce(reducer: (V, V) => V)(implicit - materialized: Materialized[K, V, ByteArraySessionStore] - ): KTable[Windowed[K], V] = - new KTable(inner.reduce(reducer.asReducer, materialized)) - - /** - * Combine values of this stream by the grouped key into `SessionWindows`. - * - * @param reducer a reducer function that computes a new aggregate result. - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a windowed [[KTable]] that contains "update" records with unmodified keys, and values that represent - * the latest (rolling) aggregate for each key within a window - * @see `org.apache.kafka.streams.kstream.SessionWindowedKStream#reduce` - */ - def reduce(reducer: (V, V) => V, named: Named)(implicit - materialized: Materialized[K, V, ByteArraySessionStore] - ): KTable[Windowed[K], V] = - new KTable(inner.reduce(reducer.asReducer, named, materialized)) -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/StreamJoined.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/StreamJoined.scala deleted file mode 100644 index c8c08ba0fd..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/StreamJoined.scala +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.common.serialization.Serde -import org.apache.kafka.streams.kstream.{StreamJoined => StreamJoinedJ} -import org.apache.kafka.streams.state.WindowBytesStoreSupplier - -object StreamJoined { - - /** - * Create an instance of [[StreamJoined]] with key, value, and otherValue - * `org.apache.kafka.common.serialization.Serde` instances. - * `null` values are accepted and will be replaced by the default serdes as defined in config. - * - * @tparam K key type - * @tparam V value type - * @tparam VO other value type - * @param keySerde the key serde to use. - * @param valueSerde the value serde to use. - * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used - * @return new [[StreamJoined]] instance with the provided serdes - */ - def `with`[K, V, VO](implicit - keySerde: Serde[K], - valueSerde: Serde[V], - otherValueSerde: Serde[VO] - ): StreamJoinedJ[K, V, VO] = - StreamJoinedJ.`with`(keySerde, valueSerde, otherValueSerde) - - /** - * Create an instance of [[StreamJoined]] with store suppliers for the calling stream - * and the other stream. Also adds the key, value, and otherValue - * `org.apache.kafka.common.serialization.Serde` instances. - * `null` values are accepted and will be replaced by the default serdes as defined in config. - * - * @tparam K key type - * @tparam V value type - * @tparam VO other value type - * @param supplier store supplier to use - * @param otherSupplier other store supplier to use - * @param keySerde the key serde to use. - * @param valueSerde the value serde to use. - * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used - * @return new [[StreamJoined]] instance with the provided store suppliers and serdes - */ - def `with`[K, V, VO]( - supplier: WindowBytesStoreSupplier, - otherSupplier: WindowBytesStoreSupplier - )(implicit keySerde: Serde[K], valueSerde: Serde[V], otherValueSerde: Serde[VO]): StreamJoinedJ[K, V, VO] = - StreamJoinedJ - .`with`(supplier, otherSupplier) - .withKeySerde(keySerde) - .withValueSerde(valueSerde) - .withOtherValueSerde(otherValueSerde) - - /** - * Create an instance of [[StreamJoined]] with the name used for naming - * the state stores involved in the join. Also adds the key, value, and otherValue - * `org.apache.kafka.common.serialization.Serde` instances. - * `null` values are accepted and will be replaced by the default serdes as defined in config. - * - * @tparam K key type - * @tparam V value type - * @tparam VO other value type - * @param storeName the name to use as a base name for the state stores of the join - * @param keySerde the key serde to use. - * @param valueSerde the value serde to use. - * @param otherValueSerde the otherValue serde to use. If `null` the default value serde from config will be used - * @return new [[StreamJoined]] instance with the provided store suppliers and serdes - */ - def as[K, V, VO]( - storeName: String - )(implicit keySerde: Serde[K], valueSerde: Serde[V], otherValueSerde: Serde[VO]): StreamJoinedJ[K, V, VO] = - StreamJoinedJ.as(storeName).withKeySerde(keySerde).withValueSerde(valueSerde).withOtherValueSerde(otherValueSerde) - -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedCogroupedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedCogroupedKStream.scala deleted file mode 100644 index ad24228ecc..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedCogroupedKStream.scala +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala -package kstream - -import org.apache.kafka.streams.kstream.{TimeWindowedCogroupedKStream => TimeWindowedCogroupedKStreamJ, Windowed} -import org.apache.kafka.streams.scala.FunctionsCompatConversions.InitializerFromFunction - -/** - * Wraps the Java class TimeWindowedCogroupedKStream and delegates method calls to the underlying Java object. - * - * @tparam K Type of keys - * @tparam V Type of values - * @param inner The underlying Java abstraction for TimeWindowedCogroupedKStream - * @see `org.apache.kafka.streams.kstream.TimeWindowedCogroupedKStream` - */ -class TimeWindowedCogroupedKStream[K, V](val inner: TimeWindowedCogroupedKStreamJ[K, V]) { - - /** - * Aggregate the values of records in these streams by the grouped key and defined window. - * - * @param initializer an initializer function that computes an initial intermediate aggregation result - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the latest - * (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.TimeWindowedCogroupedKStream#aggregate` - */ - def aggregate(initializer: => V)(implicit - materialized: Materialized[K, V, ByteArrayWindowStore] - ): KTable[Windowed[K], V] = - new KTable(inner.aggregate((() => initializer).asInitializer, materialized)) - - /** - * Aggregate the values of records in these streams by the grouped key and defined window. - * - * @param initializer an initializer function that computes an initial intermediate aggregation result - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the latest - * (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.TimeWindowedCogroupedKStream#aggregate` - */ - def aggregate(initializer: => V, named: Named)(implicit - materialized: Materialized[K, V, ByteArrayWindowStore] - ): KTable[Windowed[K], V] = - new KTable(inner.aggregate((() => initializer).asInitializer, named, materialized)) - -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedKStream.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedKStream.scala deleted file mode 100644 index 4fcf227e03..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/TimeWindowedKStream.scala +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala -package kstream - -import org.apache.kafka.streams.kstream.internals.KTableImpl -import org.apache.kafka.streams.scala.serialization.Serdes -import org.apache.kafka.streams.kstream.{KTable => KTableJ, TimeWindowedKStream => TimeWindowedKStreamJ, Windowed} -import org.apache.kafka.streams.scala.FunctionsCompatConversions.{ - AggregatorFromFunction, - InitializerFromFunction, - ReducerFromFunction, - ValueMapperFromFunction -} - -/** - * Wraps the Java class TimeWindowedKStream and delegates method calls to the underlying Java object. - * - * @tparam K Type of keys - * @tparam V Type of values - * @param inner The underlying Java abstraction for TimeWindowedKStream - * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream` - */ -class TimeWindowedKStream[K, V](val inner: TimeWindowedKStreamJ[K, V]) { - - /** - * Aggregate the values of records in this stream by the grouped key. - * - * @param initializer an initializer function that computes an initial intermediate aggregation result - * @param aggregator an aggregator function that computes a new aggregate result - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the - * latest (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#aggregate` - */ - def aggregate[VR](initializer: => VR)(aggregator: (K, V, VR) => VR)(implicit - materialized: Materialized[K, VR, ByteArrayWindowStore] - ): KTable[Windowed[K], VR] = - new KTable(inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, materialized)) - - /** - * Aggregate the values of records in this stream by the grouped key. - * - * @param initializer an initializer function that computes an initial intermediate aggregation result - * @param named a [[Named]] config used to name the processor in the topology - * @param aggregator an aggregator function that computes a new aggregate result - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the - * latest (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#aggregate` - */ - def aggregate[VR](initializer: => VR, named: Named)(aggregator: (K, V, VR) => VR)(implicit - materialized: Materialized[K, VR, ByteArrayWindowStore] - ): KTable[Windowed[K], VR] = - new KTable(inner.aggregate((() => initializer).asInitializer, aggregator.asAggregator, named, materialized)) - - /** - * Count the number of records in this stream by the grouped key and the defined windows. - * - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that - * represent the latest (rolling) count (i.e., number of records) for each key - * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#count` - */ - def count()(implicit materialized: Materialized[K, Long, ByteArrayWindowStore]): KTable[Windowed[K], Long] = { - val javaCountTable: KTableJ[Windowed[K], java.lang.Long] = - inner.count(materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayWindowStore]]) - val tableImpl = javaCountTable.asInstanceOf[KTableImpl[Windowed[K], ByteArrayWindowStore, java.lang.Long]] - new KTable( - javaCountTable.mapValues[Long]( - ((l: java.lang.Long) => Long2long(l)).asValueMapper, - Materialized.`with`[Windowed[K], Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) - ) - ) - } - - /** - * Count the number of records in this stream by the grouped key and the defined windows. - * - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys and `Long` values that - * represent the latest (rolling) count (i.e., number of records) for each key - * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#count` - */ - def count( - named: Named - )(implicit materialized: Materialized[K, Long, ByteArrayWindowStore]): KTable[Windowed[K], Long] = { - val javaCountTable: KTableJ[Windowed[K], java.lang.Long] = - inner.count(named, materialized.asInstanceOf[Materialized[K, java.lang.Long, ByteArrayWindowStore]]) - val tableImpl = javaCountTable.asInstanceOf[KTableImpl[Windowed[K], ByteArrayWindowStore, java.lang.Long]] - new KTable( - javaCountTable.mapValues[Long]( - ((l: java.lang.Long) => Long2long(l)).asValueMapper, - Materialized.`with`[Windowed[K], Long, ByteArrayKeyValueStore](tableImpl.keySerde(), Serdes.longSerde) - ) - ) - } - - /** - * Combine the values of records in this stream by the grouped key. - * - * @param reducer a function that computes a new aggregate result - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the - * latest (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#reduce` - */ - def reduce(reducer: (V, V) => V)(implicit - materialized: Materialized[K, V, ByteArrayWindowStore] - ): KTable[Windowed[K], V] = - new KTable(inner.reduce(reducer.asReducer, materialized)) - - /** - * Combine the values of records in this stream by the grouped key. - * - * @param reducer a function that computes a new aggregate result - * @param named a [[Named]] config used to name the processor in the topology - * @param materialized an instance of `Materialized` used to materialize a state store. - * @return a [[KTable]] that contains "update" records with unmodified keys, and values that represent the - * latest (rolling) aggregate for each key - * @see `org.apache.kafka.streams.kstream.TimeWindowedKStream#reduce` - */ - def reduce(reducer: (V, V) => V, named: Named)(implicit - materialized: Materialized[K, V, ByteArrayWindowStore] - ): KTable[Windowed[K], V] = - new KTable(inner.reduce(reducer.asReducer, materialized)) -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/package.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/package.scala deleted file mode 100644 index 7365c68ad1..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/kstream/package.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala - -import org.apache.kafka.streams.processor.StateStore - -package object kstream { - type Materialized[K, V, S <: StateStore] = org.apache.kafka.streams.kstream.Materialized[K, V, S] - type Grouped[K, V] = org.apache.kafka.streams.kstream.Grouped[K, V] - type Consumed[K, V] = org.apache.kafka.streams.kstream.Consumed[K, V] - type Produced[K, V] = org.apache.kafka.streams.kstream.Produced[K, V] - type Repartitioned[K, V] = org.apache.kafka.streams.kstream.Repartitioned[K, V] - type Joined[K, V, VO] = org.apache.kafka.streams.kstream.Joined[K, V, VO] - type StreamJoined[K, V, VO] = org.apache.kafka.streams.kstream.StreamJoined[K, V, VO] - type Named = org.apache.kafka.streams.kstream.Named - type Branched[K, V] = org.apache.kafka.streams.kstream.Branched[K, V] -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/package.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/package.scala deleted file mode 100644 index 6a3906dd58..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/package.scala +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams - -import org.apache.kafka.streams.state.{KeyValueStore, SessionStore, WindowStore} -import org.apache.kafka.common.utils.Bytes - -package object scala { - type ByteArrayKeyValueStore = KeyValueStore[Bytes, Array[Byte]] - type ByteArraySessionStore = SessionStore[Bytes, Array[Byte]] - type ByteArrayWindowStore = WindowStore[Bytes, Array[Byte]] -} diff --git a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/serialization/Serdes.scala b/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/serialization/Serdes.scala deleted file mode 100644 index cf643d518a..0000000000 --- a/streams/streams-scala/bin/main/org/apache/kafka/streams/scala/serialization/Serdes.scala +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.serialization - -import java.nio.ByteBuffer -import java.util -import java.util.UUID - -import org.apache.kafka.common.serialization.{Deserializer, Serde, Serdes => JSerdes, Serializer} -import org.apache.kafka.streams.kstream.WindowedSerdes - -object Serdes extends LowPrioritySerdes { - implicit def stringSerde: Serde[String] = JSerdes.String() - implicit def longSerde: Serde[Long] = JSerdes.Long().asInstanceOf[Serde[Long]] - implicit def javaLongSerde: Serde[java.lang.Long] = JSerdes.Long() - implicit def byteArraySerde: Serde[Array[Byte]] = JSerdes.ByteArray() - implicit def bytesSerde: Serde[org.apache.kafka.common.utils.Bytes] = JSerdes.Bytes() - implicit def byteBufferSerde: Serde[ByteBuffer] = JSerdes.ByteBuffer() - implicit def shortSerde: Serde[Short] = JSerdes.Short().asInstanceOf[Serde[Short]] - implicit def javaShortSerde: Serde[java.lang.Short] = JSerdes.Short() - implicit def floatSerde: Serde[Float] = JSerdes.Float().asInstanceOf[Serde[Float]] - implicit def javaFloatSerde: Serde[java.lang.Float] = JSerdes.Float() - implicit def doubleSerde: Serde[Double] = JSerdes.Double().asInstanceOf[Serde[Double]] - implicit def javaDoubleSerde: Serde[java.lang.Double] = JSerdes.Double() - implicit def intSerde: Serde[Int] = JSerdes.Integer().asInstanceOf[Serde[Int]] - implicit def javaIntegerSerde: Serde[java.lang.Integer] = JSerdes.Integer() - implicit def uuidSerde: Serde[UUID] = JSerdes.UUID() - - implicit def sessionWindowedSerde[T](implicit tSerde: Serde[T]): WindowedSerdes.SessionWindowedSerde[T] = - new WindowedSerdes.SessionWindowedSerde[T](tSerde) - - def fromFn[T >: Null](serializer: T => Array[Byte], deserializer: Array[Byte] => Option[T]): Serde[T] = - JSerdes.serdeFrom( - new Serializer[T] { - override def serialize(topic: String, data: T): Array[Byte] = serializer(data) - override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () - override def close(): Unit = () - }, - new Deserializer[T] { - override def deserialize(topic: String, data: Array[Byte]): T = deserializer(data).orNull - override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () - override def close(): Unit = () - } - ) - - def fromFn[T >: Null]( - serializer: (String, T) => Array[Byte], - deserializer: (String, Array[Byte]) => Option[T] - ): Serde[T] = - JSerdes.serdeFrom( - new Serializer[T] { - override def serialize(topic: String, data: T): Array[Byte] = serializer(topic, data) - override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () - override def close(): Unit = () - }, - new Deserializer[T] { - override def deserialize(topic: String, data: Array[Byte]): T = deserializer(topic, data).orNull - override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () - override def close(): Unit = () - } - ) -} - -trait LowPrioritySerdes { - - implicit val nullSerde: Serde[Null] = - Serdes.fromFn[Null]( - { _: Null => - null - }, - { _: Array[Byte] => - None - } - ) -} diff --git a/streams/streams-scala/bin/test/log4j2.yaml b/streams/streams-scala/bin/test/log4j2.yaml deleted file mode 100644 index 8c33fae786..0000000000 --- a/streams/streams-scala/bin/test/log4j2.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -Configuration: - Properties: - Property: - - name: "logPattern" - value: "%-4r [%t] %-5p %c %x - %m%n" - - Appenders: - Console: - name: STDOUT - PatternLayout: - pattern: "${logPattern}" - - Loggers: - Root: - level: INFO - AppenderRef: - - ref: STDOUT diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/TopologyTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/TopologyTest.scala deleted file mode 100644 index 2b1f25afdf..0000000000 --- a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/TopologyTest.scala +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala - -import java.time.Duration -import java.util -import java.util.{Locale, Properties} -import java.util.regex.Pattern -import org.apache.kafka.common.serialization.{Serdes => SerdesJ} -import org.apache.kafka.streams.kstream.{ - Aggregator, - Initializer, - JoinWindows, - KGroupedStream => KGroupedStreamJ, - KStream => KStreamJ, - KTable => KTableJ, - KeyValueMapper, - Materialized => MaterializedJ, - Reducer, - StreamJoined => StreamJoinedJ, - ValueJoiner, - ValueMapper -} -import org.apache.kafka.streams.processor.api -import org.apache.kafka.streams.processor.api.{Processor, ProcessorSupplier} -import org.apache.kafka.streams.scala.ImplicitConversions._ -import org.apache.kafka.streams.scala.serialization.{Serdes => NewSerdes} -import org.apache.kafka.streams.scala.serialization.Serdes._ -import org.apache.kafka.streams.scala.kstream._ -import org.apache.kafka.streams.{KeyValue, StreamsBuilder => StreamsBuilderJ, StreamsConfig, TopologyDescription} -import org.junit.jupiter.api.Assertions._ -import org.junit.jupiter.api._ - -import scala.jdk.CollectionConverters._ - -/** - * Test suite that verifies that the topology built by the Java and Scala APIs match. - */ -//noinspection ScalaDeprecation -@Timeout(600) -class TopologyTest { - private val inputTopic = "input-topic" - private val userClicksTopic = "user-clicks-topic" - private val userRegionsTopic = "user-regions-topic" - - private val pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS) - - @Test - def shouldBuildIdenticalTopologyInJavaNScalaSimple(): Unit = { - - // build the Scala topology - def getTopologyScala: TopologyDescription = { - - import org.apache.kafka.streams.scala.serialization.Serdes._ - - val streamBuilder = new StreamsBuilder - val textLines = streamBuilder.stream[String, String](inputTopic) - - val _: KStream[String, String] = textLines.flatMapValues(v => pattern.split(v.toLowerCase)) - - streamBuilder.build().describe() - } - - // build the Java topology - def getTopologyJava: TopologyDescription = { - val streamBuilder = new StreamsBuilderJ - val textLines = streamBuilder.stream[String, String](inputTopic) - val _: KStreamJ[String, String] = textLines.flatMapValues(s => pattern.split(s.toLowerCase).toBuffer.asJava) - streamBuilder.build().describe() - } - - // should match - assertEquals(getTopologyScala, getTopologyJava) - } - - @Test - def shouldBuildIdenticalTopologyInJavaNScalaAggregate(): Unit = { - - // build the Scala topology - def getTopologyScala: TopologyDescription = { - - import org.apache.kafka.streams.scala.serialization.Serdes._ - - val streamBuilder = new StreamsBuilder - val textLines = streamBuilder.stream[String, String](inputTopic) - - textLines - .flatMapValues(v => pattern.split(v.toLowerCase)) - .groupBy((_, v) => v) - .count() - - streamBuilder.build().describe() - } - - // build the Java topology - def getTopologyJava: TopologyDescription = { - - val streamBuilder = new StreamsBuilderJ - val textLines: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopic) - - val splits: KStreamJ[String, String] = - textLines.flatMapValues(s => pattern.split(s.toLowerCase).toBuffer.asJava) - - val grouped: KGroupedStreamJ[String, String] = splits.groupBy((_, v) => v) - - grouped.count() - - streamBuilder.build().describe() - } - - // should match - assertEquals(getTopologyScala, getTopologyJava) - } - - @Test def shouldBuildIdenticalTopologyInJavaNScalaCogroupSimple(): Unit = { - - // build the Scala topology - def getTopologyScala: TopologyDescription = { - - import org.apache.kafka.streams.scala.serialization.Serdes._ - - val streamBuilder = new StreamsBuilder - val textLines = streamBuilder.stream[String, String](inputTopic) - textLines - .mapValues(v => v.length) - .groupByKey - .cogroup((_, v1, v2: Long) => v1 + v2) - .aggregate(0L) - - streamBuilder.build().describe() - } - - // build the Java topology - def getTopologyJava: TopologyDescription = { - - val streamBuilder = new StreamsBuilderJ - val textLines: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopic) - - val splits: KStreamJ[String, Int] = textLines.mapValues( - new ValueMapper[String, Int] { - def apply(s: String): Int = s.length - } - ) - - splits.groupByKey - .cogroup((k: String, v: Int, a: Long) => a + v) - .aggregate(() => 0L) - - streamBuilder.build().describe() - } - - // should match - assertEquals(getTopologyScala, getTopologyJava) - } - - @Test def shouldBuildIdenticalTopologyInJavaNScalaCogroup(): Unit = { - - // build the Scala topology - def getTopologyScala: TopologyDescription = { - - import org.apache.kafka.streams.scala.serialization.Serdes._ - - val streamBuilder = new StreamsBuilder - val textLines1 = streamBuilder.stream[String, String](inputTopic) - val textLines2 = streamBuilder.stream[String, String]("inputTopic2") - - textLines1 - .mapValues(v => v.length) - .groupByKey - .cogroup((_, v1, v2: Long) => v1 + v2) - .cogroup(textLines2.groupByKey, (_, v: String, a) => v.length + a) - .aggregate(0L) - - streamBuilder.build().describe() - } - - // build the Java topology - def getTopologyJava: TopologyDescription = { - - val streamBuilder = new StreamsBuilderJ - val textLines1: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopic) - val textLines2: KStreamJ[String, String] = streamBuilder.stream[String, String]("inputTopic2") - - val splits: KStreamJ[String, Int] = textLines1.mapValues( - new ValueMapper[String, Int] { - def apply(s: String): Int = s.length - } - ) - - splits.groupByKey - .cogroup((k: String, v: Int, a: Long) => a + v) - .cogroup(textLines2.groupByKey(), (k: String, v: String, a: Long) => v.length + a) - .aggregate(() => 0L) - - streamBuilder.build().describe() - } - - // should match - assertEquals(getTopologyScala, getTopologyJava) - } - - @Test def shouldBuildIdenticalTopologyInJavaNScalaJoin(): Unit = { - - // build the Scala topology - def getTopologyScala: TopologyDescription = { - import org.apache.kafka.streams.scala.serialization.Serdes._ - - val builder = new StreamsBuilder() - - val userClicksStream: KStream[String, Long] = builder.stream(userClicksTopic) - - val userRegionsTable: KTable[String, String] = builder.table(userRegionsTopic) - - // clicks per region - userClicksStream - .leftJoin(userRegionsTable)((clicks, region) => (if (region == null) "UNKNOWN" else region, clicks)) - .map((_, regionWithClicks) => regionWithClicks) - .groupByKey - .reduce(_ + _) - - builder.build().describe() - } - - // build the Java topology - def getTopologyJava: TopologyDescription = { - - import java.lang.{Long => JLong} - - val builder: StreamsBuilderJ = new StreamsBuilderJ() - - val userClicksStream: KStreamJ[String, JLong] = - builder.stream[String, JLong](userClicksTopic, Consumed.`with`[String, JLong]) - - val userRegionsTable: KTableJ[String, String] = - builder.table[String, String](userRegionsTopic, Consumed.`with`[String, String]) - - // Join the stream against the table. - val valueJoinerJ: ValueJoiner[JLong, String, (String, JLong)] = - (clicks: JLong, region: String) => (if (region == null) "UNKNOWN" else region, clicks) - val userClicksJoinRegion: KStreamJ[String, (String, JLong)] = userClicksStream.leftJoin( - userRegionsTable, - valueJoinerJ, - Joined.`with`[String, JLong, String] - ) - - // Change the stream from -> to -> - val clicksByRegion: KStreamJ[String, JLong] = userClicksJoinRegion.map { (_, regionWithClicks) => - new KeyValue(regionWithClicks._1, regionWithClicks._2) - } - - // Compute the total per region by summing the individual click counts per region. - clicksByRegion - .groupByKey(Grouped.`with`[String, JLong]) - .reduce((v1, v2) => v1 + v2) - - builder.build().describe() - } - - // should match - assertEquals(getTopologyScala, getTopologyJava) - } - - @Test - def shouldBuildIdenticalTopologyInJavaNScalaProcess(): Unit = { - val processorSupplier = new ProcessorSupplier[String, String, String, String] { - override def get(): Processor[String, String, String, String] = - new api.Processor[String, String, String, String] { - override def process(record: api.Record[String, String]): Unit = {} - } - } - - // build the Scala topology - def getTopologyScala: TopologyDescription = { - - import org.apache.kafka.streams.scala.serialization.Serdes._ - - val streamBuilder = new StreamsBuilder - val textLines = streamBuilder.stream[String, String](inputTopic) - - val _: KTable[String, Long] = textLines - .process(processorSupplier) - .groupBy((_, v) => v) - .count() - - streamBuilder.build().describe() - } - - // build the Java topology - def getTopologyJava: TopologyDescription = { - - val streamBuilder = new StreamsBuilderJ - val textLines: KStreamJ[String, String] = streamBuilder.stream[String, String](inputTopic) - - val lowered: KStreamJ[String, String] = textLines.process(processorSupplier) - - val grouped: KGroupedStreamJ[String, String] = lowered.groupBy((_, v) => v) - - // word counts - grouped.count() - - streamBuilder.build().describe() - } - - // should match - assertEquals(getTopologyScala, getTopologyJava) - } - - @Test - def shouldBuildIdenticalTopologyInJavaNScalaProperties(): Unit = { - - val props = new Properties() - props.put(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.OPTIMIZE) - - val propsNoOptimization = new Properties() - propsNoOptimization.put(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.NO_OPTIMIZATION) - - val AGGREGATION_TOPIC = "aggregationTopic" - val REDUCE_TOPIC = "reduceTopic" - val JOINED_TOPIC = "joinedTopic" - - // build the Scala topology - def getTopologyScala: StreamsBuilder = { - - val aggregator = (_: String, v: String, agg: Int) => agg + v.length - val reducer = (v1: String, v2: String) => v1 + ":" + v2 - val processorValueCollector: util.List[String] = new util.ArrayList[String] - - val builder: StreamsBuilder = new StreamsBuilder - - val sourceStream: KStream[String, String] = - builder.stream(inputTopic)(Consumed.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) - - val mappedStream: KStream[String, String] = - sourceStream.map((k: String, v: String) => (k.toUpperCase(Locale.getDefault), v)) - mappedStream - .filter((k: String, _: String) => k == "B") - .mapValues((v: String) => v.toUpperCase(Locale.getDefault)) - .process(new SimpleProcessorSupplier(processorValueCollector)) - - val stream2 = mappedStream.groupByKey - .aggregate(0)(aggregator)(Materialized.`with`(NewSerdes.stringSerde, NewSerdes.intSerde)) - .toStream - stream2.to(AGGREGATION_TOPIC)(Produced.`with`(NewSerdes.stringSerde, NewSerdes.intSerde)) - - // adding operators for case where the repartition node is further downstream - val stream3 = mappedStream - .filter((_: String, _: String) => true) - .peek((k: String, v: String) => System.out.println(k + ":" + v)) - .groupByKey - .reduce(reducer)(Materialized.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) - .toStream - stream3.to(REDUCE_TOPIC)(Produced.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) - - mappedStream - .filter((k: String, _: String) => k == "A") - .join(stream2)( - (v1: String, v2: Int) => v1 + ":" + v2.toString, - JoinWindows.ofTimeDifferenceAndGrace(Duration.ofMillis(5000), Duration.ofHours(24)) - )( - StreamJoined.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde, NewSerdes.intSerde) - ) - .to(JOINED_TOPIC) - - mappedStream - .filter((k: String, _: String) => k == "A") - .join(stream3)( - (v1: String, v2: String) => v1 + ":" + v2.toString, - JoinWindows.ofTimeDifferenceAndGrace(Duration.ofMillis(5000), Duration.ofHours(24)) - )( - StreamJoined.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde, NewSerdes.stringSerde) - ) - .to(JOINED_TOPIC) - - builder - } - - // build the Java topology - def getTopologyJava: StreamsBuilderJ = { - - val keyValueMapper: KeyValueMapper[String, String, KeyValue[String, String]] = - (key, value) => KeyValue.pair(key.toUpperCase(Locale.getDefault), value) - val initializer: Initializer[Integer] = () => 0 - val aggregator: Aggregator[String, String, Integer] = (_, value, aggregate) => aggregate + value.length - val reducer: Reducer[String] = (v1, v2) => v1 + ":" + v2 - val valueMapper: ValueMapper[String, String] = v => v.toUpperCase(Locale.getDefault) - val processorValueCollector = new util.ArrayList[String] - val processorSupplier = new SimpleProcessorSupplier(processorValueCollector) - val valueJoiner2: ValueJoiner[String, Integer, String] = (value1, value2) => value1 + ":" + value2.toString - val valueJoiner3: ValueJoiner[String, String, String] = (value1, value2) => value1 + ":" + value2 - - val builder = new StreamsBuilderJ - - val sourceStream = builder.stream(inputTopic, Consumed.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) - - val mappedStream: KStreamJ[String, String] = - sourceStream.map(keyValueMapper) - mappedStream - .filter((key, _) => key == "B") - .mapValues[String](valueMapper) - .process(processorSupplier) - - val stream2: KStreamJ[String, Integer] = mappedStream.groupByKey - .aggregate(initializer, aggregator, MaterializedJ.`with`(NewSerdes.stringSerde, SerdesJ.Integer)) - .toStream - stream2.to(AGGREGATION_TOPIC, Produced.`with`(NewSerdes.stringSerde, SerdesJ.Integer)) - - // adding operators for case where the repartition node is further downstream - val stream3 = mappedStream - .filter((_, _) => true) - .peek((k, v) => System.out.println(k + ":" + v)) - .groupByKey - .reduce(reducer, MaterializedJ.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) - .toStream - stream3.to(REDUCE_TOPIC, Produced.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde)) - - mappedStream - .filter((key, _) => key == "A") - .join[Integer, String]( - stream2, - valueJoiner2, - JoinWindows.ofTimeDifferenceAndGrace(Duration.ofMillis(5000), Duration.ofHours(24)), - StreamJoinedJ.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde, SerdesJ.Integer) - ) - .to(JOINED_TOPIC) - - mappedStream - .filter((key, _) => key == "A") - .join( - stream3, - valueJoiner3, - JoinWindows.ofTimeDifferenceAndGrace(Duration.ofMillis(5000), Duration.ofHours(24)), - StreamJoinedJ.`with`(NewSerdes.stringSerde, NewSerdes.stringSerde, SerdesJ.String) - ) - .to(JOINED_TOPIC) - - builder - } - - assertNotEquals( - getTopologyScala.build(props).describe.toString, - getTopologyScala.build(propsNoOptimization).describe.toString - ) - assertEquals( - getTopologyScala.build(propsNoOptimization).describe.toString, - getTopologyJava.build(propsNoOptimization).describe.toString - ) - assertEquals(getTopologyScala.build(props).describe.toString, getTopologyJava.build(props).describe.toString) - } - - private class SimpleProcessorSupplier private[TopologyTest] (val valueList: util.List[String]) - extends ProcessorSupplier[String, String, Void, Void] { - - override def get(): Processor[String, String, Void, Void] = - (record: api.Record[String, String]) => valueList.add(record.value()) - } -} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ConsumedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ConsumedTest.scala deleted file mode 100644 index 4656a4d12f..0000000000 --- a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ConsumedTest.scala +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.clients.consumer.internals.AutoOffsetResetStrategy -import org.apache.kafka.streams.AutoOffsetReset -import org.apache.kafka.streams.kstream.internals.ConsumedInternal -import org.apache.kafka.streams.processor.FailOnInvalidTimestamp -import org.apache.kafka.streams.scala.serialization.Serdes -import org.apache.kafka.streams.scala.serialization.Serdes._ -import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.Test - -class ConsumedTest { - - @Test - def testCreateConsumed(): Unit = { - val consumed: Consumed[String, Long] = Consumed.`with`[String, Long] - - val internalConsumed = new ConsumedInternal(consumed) - assertEquals(Serdes.stringSerde.getClass, internalConsumed.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalConsumed.valueSerde.getClass) - } - - @Test - def testCreateConsumedWithTimestampExtractorAndResetPolicy(): Unit = { - val timestampExtractor = new FailOnInvalidTimestamp() - val resetPolicy = AutoOffsetReset.latest() - val consumed: Consumed[String, Long] = - Consumed.`with`(timestampExtractor, resetPolicy) - - val internalConsumed = new ConsumedInternal(consumed) - assertEquals(Serdes.stringSerde.getClass, internalConsumed.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalConsumed.valueSerde.getClass) - assertEquals(timestampExtractor, internalConsumed.timestampExtractor) - assertEquals(AutoOffsetResetStrategy.StrategyType.LATEST, internalConsumed.offsetResetPolicy.offsetResetStrategy()) - } - - @Test - def testCreateConsumedWithTimestampExtractor(): Unit = { - val timestampExtractor = new FailOnInvalidTimestamp() - val consumed: Consumed[String, Long] = Consumed.`with`[String, Long](timestampExtractor) - - val internalConsumed = new ConsumedInternal(consumed) - assertEquals(Serdes.stringSerde.getClass, internalConsumed.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalConsumed.valueSerde.getClass) - assertEquals(timestampExtractor, internalConsumed.timestampExtractor) - } - - @Test - def testCreateConsumedWithResetPolicy(): Unit = { - val resetPolicy = AutoOffsetReset.latest() - val consumed: Consumed[String, Long] = Consumed.`with`[String, Long](resetPolicy) - - val internalConsumed = new ConsumedInternal(consumed) - assertEquals(Serdes.stringSerde.getClass, internalConsumed.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalConsumed.valueSerde.getClass) - assertEquals(AutoOffsetResetStrategy.StrategyType.LATEST, internalConsumed.offsetResetPolicy.offsetResetStrategy()) - } -} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/GroupedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/GroupedTest.scala deleted file mode 100644 index 02f333ec6b..0000000000 --- a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/GroupedTest.scala +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.streams.kstream.internals.GroupedInternal -import org.apache.kafka.streams.scala.serialization.Serdes -import org.apache.kafka.streams.scala.serialization.Serdes._ -import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.Test - -class GroupedTest { - - @Test - def testCreateGrouped(): Unit = { - val grouped: Grouped[String, Long] = Grouped.`with`[String, Long] - - val internalGrouped = new GroupedInternal[String, Long](grouped) - assertEquals(Serdes.stringSerde.getClass, internalGrouped.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalGrouped.valueSerde.getClass) - } - - @Test - def testCreateGroupedWithRepartitionTopicName(): Unit = { - val repartitionTopicName = "repartition-topic" - val grouped: Grouped[String, Long] = Grouped.`with`(repartitionTopicName) - - val internalGrouped = new GroupedInternal[String, Long](grouped) - assertEquals(Serdes.stringSerde.getClass, internalGrouped.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalGrouped.valueSerde.getClass) - assertEquals(repartitionTopicName, internalGrouped.name()) - } -} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/JoinedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/JoinedTest.scala deleted file mode 100644 index 4e6fa563f5..0000000000 --- a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/JoinedTest.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.streams.scala.serialization.Serdes -import org.apache.kafka.streams.scala.serialization.Serdes._ -import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.Test - -class JoinedTest { - - @Test - def testCreateJoined(): Unit = { - val joined: Joined[String, Long, Int] = Joined.`with`[String, Long, Int] - - assertEquals(joined.keySerde.getClass, Serdes.stringSerde.getClass) - assertEquals(joined.valueSerde.getClass, Serdes.longSerde.getClass) - assertEquals(joined.otherValueSerde.getClass, Serdes.intSerde.getClass) - } - - @Test - def testCreateJoinedWithSerdesAndRepartitionTopicName(): Unit = { - val repartitionTopicName = "repartition-topic" - val joined: Joined[String, Long, Int] = Joined.`with`(repartitionTopicName) - - assertEquals(joined.keySerde.getClass, Serdes.stringSerde.getClass) - assertEquals(joined.valueSerde.getClass, Serdes.longSerde.getClass) - assertEquals(joined.otherValueSerde.getClass, Serdes.intSerde.getClass) - } -} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamSplitTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamSplitTest.scala deleted file mode 100644 index bbcc1b503f..0000000000 --- a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamSplitTest.scala +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.streams.kstream.Named -import org.apache.kafka.streams.scala.ImplicitConversions._ -import org.apache.kafka.streams.scala.StreamsBuilder -import org.apache.kafka.streams.scala.serialization.Serdes._ -import org.apache.kafka.streams.scala.utils.TestDriver -import org.junit.jupiter.api.Assertions._ -import org.junit.jupiter.api.Test - -import scala.jdk.CollectionConverters._ - -class KStreamSplitTest extends TestDriver { - - @Test - def testRouteMessagesAccordingToPredicates(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = Array("default", "even", "three"); - - val m = builder - .stream[Integer, Integer](sourceTopic) - .split(Named.as("_")) - .branch((_, v) => v % 2 == 0) - .branch((_, v) => v % 3 == 0) - .defaultBranch() - - m("_0").to(sinkTopic(0)) - m("_1").to(sinkTopic(1)) - m("_2").to(sinkTopic(2)) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[Integer, Integer](sourceTopic) - val testOutput = sinkTopic.map(name => testDriver.createOutput[Integer, Integer](name)) - - testInput.pipeValueList( - List(1, 2, 3, 4, 5) - .map(Integer.valueOf) - .asJava - ) - assertEquals(List(1, 5), testOutput(0).readValuesToList().asScala) - assertEquals(List(2, 4), testOutput(1).readValuesToList().asScala) - assertEquals(List(3), testOutput(2).readValuesToList().asScala) - - testDriver.close() - } - - @Test - def testRouteMessagesToConsumers(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - - val m = builder - .stream[Integer, Integer](sourceTopic) - .split(Named.as("_")) - .branch((_, v) => v % 2 == 0, Branched.withConsumer(ks => ks.to("even"), "consumedEvens")) - .branch((_, v) => v % 3 == 0, Branched.withFunction(ks => ks.mapValues(x => x * x), "mapped")) - .noDefaultBranch() - - m("_mapped").to("mapped") - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[Integer, Integer](sourceTopic) - testInput.pipeValueList( - List(1, 2, 3, 4, 5, 9) - .map(Integer.valueOf) - .asJava - ) - - val even = testDriver.createOutput[Integer, Integer]("even") - val mapped = testDriver.createOutput[Integer, Integer]("mapped") - - assertEquals(List(2, 4), even.readValuesToList().asScala) - assertEquals(List(9, 81), mapped.readValuesToList().asScala) - - testDriver.close() - } - - @Test - def testRouteMessagesToAnonymousConsumers(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - - val m = builder - .stream[Integer, Integer](sourceTopic) - .split(Named.as("_")) - .branch((_, v) => v % 2 == 0, Branched.withConsumer(ks => ks.to("even"))) - .branch((_, v) => v % 3 == 0, Branched.withFunction(ks => ks.mapValues(x => x * x))) - .noDefaultBranch() - - m("_2").to("mapped") - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[Integer, Integer](sourceTopic) - testInput.pipeValueList( - List(1, 2, 3, 4, 5, 9) - .map(Integer.valueOf) - .asJava - ) - - val even = testDriver.createOutput[Integer, Integer]("even") - val mapped = testDriver.createOutput[Integer, Integer]("mapped") - - assertEquals(List(2, 4), even.readValuesToList().asScala) - assertEquals(List(9, 81), mapped.readValuesToList().asScala) - - testDriver.close() - } -} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamTest.scala deleted file mode 100644 index 6a0b6c1b0e..0000000000 --- a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KStreamTest.scala +++ /dev/null @@ -1,419 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import java.time.Duration.ofSeconds -import java.time.{Duration, Instant} -import org.apache.kafka.streams.kstream.{JoinWindows, Named} -import org.apache.kafka.streams.processor.api -import org.apache.kafka.streams.processor.api.{FixedKeyRecord, Processor, ProcessorSupplier} -import org.apache.kafka.streams.scala.ImplicitConversions._ -import org.apache.kafka.streams.scala.serialization.Serdes._ -import org.apache.kafka.streams.scala.StreamsBuilder -import org.apache.kafka.streams.scala.serialization.Serdes -import org.apache.kafka.streams.scala.utils.TestDriver -import org.apache.kafka.streams.state.{KeyValueStore, StoreBuilder, Stores} -import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} -import org.junit.jupiter.api.Test - -import java.util -import java.util.Collections -import scala.jdk.CollectionConverters._ - -class KStreamTest extends TestDriver { - - @Test - def testFilterRecordsSatisfyingPredicate(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - - builder.stream[String, String](sourceTopic).filter((_, value) => value != "value2").to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - val testOutput = testDriver.createOutput[String, String](sinkTopic) - - testInput.pipeInput("1", "value1") - assertEquals("value1", testOutput.readValue) - - testInput.pipeInput("2", "value2") - assertTrue(testOutput.isEmpty) - - testInput.pipeInput("3", "value3") - assertEquals("value3", testOutput.readValue) - - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testFilterRecordsNotSatisfyingPredicate(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - - builder.stream[String, String](sourceTopic).filterNot((_, value) => value == "value2").to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - val testOutput = testDriver.createOutput[String, String](sinkTopic) - - testInput.pipeInput("1", "value1") - assertEquals("value1", testOutput.readValue) - - testInput.pipeInput("2", "value2") - assertTrue(testOutput.isEmpty) - - testInput.pipeInput("3", "value3") - assertEquals("value3", testOutput.readValue) - - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testForeachActionsOnRecords(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - - var acc = "" - builder.stream[String, String](sourceTopic).foreach((_, value) => acc += value) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - - testInput.pipeInput("1", "value1") - assertEquals("value1", acc) - - testInput.pipeInput("2", "value2") - assertEquals("value1value2", acc) - - testDriver.close() - } - - @Test - def testPeekActionsOnRecords(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - - var acc = "" - builder.stream[String, String](sourceTopic).peek((_, v) => acc += v).to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - val testOutput = testDriver.createOutput[String, String](sinkTopic) - - testInput.pipeInput("1", "value1") - assertEquals("value1", acc) - assertEquals("value1", testOutput.readValue) - - testInput.pipeInput("2", "value2") - assertEquals("value1value2", acc) - assertEquals("value2", testOutput.readValue) - - testDriver.close() - } - - @Test - def testSelectNewKey(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - - builder.stream[String, String](sourceTopic).selectKey((_, value) => value).to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - val testOutput = testDriver.createOutput[String, String](sinkTopic) - - testInput.pipeInput("1", "value1") - assertEquals("value1", testOutput.readKeyValue.key) - - testInput.pipeInput("1", "value2") - assertEquals("value2", testOutput.readKeyValue.key) - - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testRepartitionKStream(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val repartitionName = "repartition" - val sinkTopic = "sink" - - builder.stream[String, String](sourceTopic).repartition(Repartitioned.`with`(repartitionName)).to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - val testOutput = testDriver.createOutput[String, String](sinkTopic) - - testInput.pipeInput("1", "value1") - val kv1 = testOutput.readKeyValue - assertEquals("1", kv1.key) - assertEquals("value1", kv1.value) - - testInput.pipeInput("2", "value2") - val kv2 = testOutput.readKeyValue - assertEquals("2", kv2.key) - assertEquals("value2", kv2.value) - - assertTrue(testOutput.isEmpty) - - // appId == "test" - testDriver.producedTopicNames() contains "test-" + repartitionName + "-repartition" - - testDriver.close() - } - - // noinspection ScalaDeprecation - @Test - def testJoinCorrectlyRecords(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic1 = "source1" - val sourceTopic2 = "source2" - val sinkTopic = "sink" - - val stream1 = builder.stream[String, String](sourceTopic1) - val stream2 = builder.stream[String, String](sourceTopic2) - stream1 - .join(stream2)((a, b) => s"$a-$b", JoinWindows.ofTimeDifferenceAndGrace(ofSeconds(1), Duration.ofHours(24))) - .to(sinkTopic) - - val now = Instant.now() - - val testDriver = createTestDriver(builder, now) - val testInput1 = testDriver.createInput[String, String](sourceTopic1) - val testInput2 = testDriver.createInput[String, String](sourceTopic2) - val testOutput = testDriver.createOutput[String, String](sinkTopic) - - testInput1.pipeInput("1", "topic1value1", now) - testInput2.pipeInput("1", "topic2value1", now) - - assertEquals("topic1value1-topic2value1", testOutput.readValue) - - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testProcessCorrectlyRecords(): Unit = { - val processorSupplier: ProcessorSupplier[String, String, String, String] = - new api.ProcessorSupplier[String, String, String, String] { - private val storeName = "store-name" - - override def stores: util.Set[StoreBuilder[_]] = { - val keyValueStoreBuilder = Stores.keyValueStoreBuilder( - Stores.persistentKeyValueStore(storeName), - Serdes.stringSerde, - Serdes.stringSerde - ) - Collections.singleton(keyValueStoreBuilder) - } - - override def get(): Processor[String, String, String, String] = - new api.Processor[String, String, String, String] { - private var context: api.ProcessorContext[String, String] = _ - private var store: KeyValueStore[String, String] = _ - - override def init(context: api.ProcessorContext[String, String]): Unit = { - this.context = context - store = context.getStateStore(storeName) - } - - override def process(record: api.Record[String, String]): Unit = { - val key = record.key() - val value = record.value() - val processedKey = s"$key-processed" - val processedValue = s"$value-processed" - store.put(processedKey, processedValue) - context.forward(new api.Record(processedKey, processedValue, record.timestamp())) - } - } - } - - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - - val stream = builder.stream[String, String](sourceTopic) - stream - .process(processorSupplier) - .to(sinkTopic) - - val now = Instant.now() - val testDriver = createTestDriver(builder, now) - val testInput = testDriver.createInput[String, String](sourceTopic) - val testOutput = testDriver.createOutput[String, String](sinkTopic) - - testInput.pipeInput("1", "value", now) - - val result = testOutput.readKeyValue() - assertEquals("value-processed", result.value) - assertEquals("1-processed", result.key) - - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testProcessValuesCorrectlyRecords(): Unit = { - val processorSupplier: api.FixedKeyProcessorSupplier[String, String, String] = - () => - new api.FixedKeyProcessor[String, String, String] { - private var context: api.FixedKeyProcessorContext[String, String] = _ - - override def init(context: api.FixedKeyProcessorContext[String, String]): Unit = - this.context = context - - override def process(record: FixedKeyRecord[String, String]): Unit = { - val processedValue = s"${record.value()}-processed" - context.forward(record.withValue(processedValue)) - } - } - - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - - val stream = builder.stream[String, String](sourceTopic) - stream - .processValues(processorSupplier) - .to(sinkTopic) - - val now = Instant.now() - val testDriver = createTestDriver(builder, now) - val testInput = testDriver.createInput[String, String](sourceTopic) - val testOutput = testDriver.createOutput[String, String](sinkTopic) - - testInput.pipeInput("1", "value", now) - - val result = testOutput.readKeyValue() - assertEquals("value-processed", result.value) - assertEquals("1", result.key) - - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testJoinTwoKStreamToTables(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic1 = "source1" - val sourceTopic2 = "source2" - val sinkTopic = "sink" - - val table1 = builder.stream[String, String](sourceTopic1).toTable - val table2 = builder.stream[String, String](sourceTopic2).toTable - table1.join(table2)((a, b) => a + b).toStream.to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput1 = testDriver.createInput[String, String](sourceTopic1) - val testInput2 = testDriver.createInput[String, String](sourceTopic2) - val testOutput = testDriver.createOutput[String, String](sinkTopic) - - testInput1.pipeInput("1", "topic1value1") - testInput2.pipeInput("1", "topic2value1") - - assertEquals("topic1value1topic2value1", testOutput.readValue) - - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testSettingNameOnFilter(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - - builder - .stream[String, String](sourceTopic) - .filter((_, value) => value != "value2", Named.as("my-name")) - .to(sinkTopic) - - import scala.jdk.CollectionConverters._ - - val filterNode = builder.build().describe().subtopologies().asScala.head.nodes().asScala.toList(1) - assertEquals("my-name", filterNode.name()) - } - - @Test - def testSettingNameOnOutputTable(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic1 = "source1" - val sinkTopic = "sink" - - builder - .stream[String, String](sourceTopic1) - .toTable(Named.as("my-name")) - .toStream - .to(sinkTopic) - - import scala.jdk.CollectionConverters._ - - val tableNode = builder.build().describe().subtopologies().asScala.head.nodes().asScala.toList(1) - assertEquals("my-name", tableNode.name()) - } - - @Test - def testSettingNameOnJoin(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic1 = "source" - val sourceGTable = "table" - val sinkTopic = "sink" - - val stream = builder.stream[String, String](sourceTopic1) - val table = builder.globalTable[String, String](sourceGTable) - stream - .join(table, Named.as("my-name"))((a, b) => s"$a-$b", (a, b) => a + b) - .to(sinkTopic) - - import scala.jdk.CollectionConverters._ - - val joinNode = builder.build().describe().subtopologies().asScala.head.nodes().asScala.toList(1) - assertEquals("my-name", joinNode.name()) - } - - @Test - def testSettingNameOnProcess(): Unit = { - class TestProcessor extends api.Processor[String, String, String, String] { - override def process(record: api.Record[String, String]): Unit = {} - } - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - - val stream = builder.stream[String, String](sourceTopic) - stream - .process(() => new TestProcessor, Named.as("my-name")) - .to(sinkTopic) - - val transformNode = builder.build().describe().subtopologies().asScala.head.nodes().asScala.toList(1) - assertEquals("my-name", transformNode.name()) - } -} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KTableTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KTableTest.scala deleted file mode 100644 index e473c6579a..0000000000 --- a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/KTableTest.scala +++ /dev/null @@ -1,617 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.streams.kstream.Suppressed.BufferConfig -import org.apache.kafka.streams.kstream.{ - Named, - SessionWindows, - SlidingWindows, - Suppressed => JSuppressed, - TimeWindows, - Windowed -} -import org.apache.kafka.streams.scala.ImplicitConversions._ -import org.apache.kafka.streams.scala.serialization.Serdes._ -import org.apache.kafka.streams.scala.utils.TestDriver -import org.apache.kafka.streams.scala.{ByteArrayKeyValueStore, StreamsBuilder} -import org.junit.jupiter.api.Assertions.{assertEquals, assertNull, assertTrue} -import org.junit.jupiter.api.Test -import java.time.Duration -import java.time.Duration.ofMillis - -import scala.jdk.CollectionConverters._ - -//noinspection ScalaDeprecation -class KTableTest extends TestDriver { - - @Test - def testFilterRecordsSatisfyingPredicate(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - - val table = builder.stream[String, String](sourceTopic).groupBy((key, _) => key).count() - table.filter((key, value) => key.equals("a") && value == 1).toStream.to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - val testOutput = testDriver.createOutput[String, Long](sinkTopic) - - { - testInput.pipeInput("a", "passes filter : add new row to table") - val record = testOutput.readKeyValue - assertEquals("a", record.key) - assertEquals(1, record.value) - } - { - testInput.pipeInput("a", "fails filter : remove existing row from table") - val record = testOutput.readKeyValue - assertEquals("a", record.key) - assertNull(record.value) - } - { - testInput.pipeInput("b", "fails filter : no output") - assertTrue(testOutput.isEmpty) - } - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testFilterRecordsNotSatisfyingPredicate(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - - val table = builder.stream[String, String](sourceTopic).groupBy((key, _) => key).count() - table.filterNot((_, value) => value > 1).toStream.to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - val testOutput = testDriver.createOutput[String, Long](sinkTopic) - - { - testInput.pipeInput("1", "value1") - val record = testOutput.readKeyValue - assertEquals("1", record.key) - assertEquals(1, record.value) - } - { - testInput.pipeInput("1", "value2") - val record = testOutput.readKeyValue - assertEquals("1", record.key) - assertNull(record.value) - } - { - testInput.pipeInput("2", "value1") - val record = testOutput.readKeyValue - assertEquals("2", record.key) - assertEquals(1, record.value) - } - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testJoinCorrectlyRecords(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic1 = "source1" - val sourceTopic2 = "source2" - val sinkTopic = "sink" - - val table1 = builder.stream[String, String](sourceTopic1).groupBy((key, _) => key).count() - val table2 = builder.stream[String, String](sourceTopic2).groupBy((key, _) => key).count() - table1.join(table2)((a, b) => a + b).toStream.to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput1 = testDriver.createInput[String, String](sourceTopic1) - val testInput2 = testDriver.createInput[String, String](sourceTopic2) - val testOutput = testDriver.createOutput[String, Long](sinkTopic) - - testInput1.pipeInput("1", "topic1value1") - testInput2.pipeInput("1", "topic2value1") - assertEquals(2, testOutput.readValue) - - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testJoinCorrectlyRecordsAndStateStore(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic1 = "source1" - val sourceTopic2 = "source2" - val sinkTopic = "sink" - val stateStore = "store" - val materialized = Materialized.as[String, Long, ByteArrayKeyValueStore](stateStore) - - val table1 = builder.stream[String, String](sourceTopic1).groupBy((key, _) => key).count() - val table2 = builder.stream[String, String](sourceTopic2).groupBy((key, _) => key).count() - table1.join(table2, materialized)((a, b) => a + b).toStream.to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput1 = testDriver.createInput[String, String](sourceTopic1) - val testInput2 = testDriver.createInput[String, String](sourceTopic2) - val testOutput = testDriver.createOutput[String, Long](sinkTopic) - - testInput1.pipeInput("1", "topic1value1") - testInput2.pipeInput("1", "topic2value1") - assertEquals(2, testOutput.readValue) - assertEquals(2, testDriver.getKeyValueStore[String, Long](stateStore).get("1")) - - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testCorrectlySuppressResultsUsingSuppressedUntilTimeLimit(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - val window = TimeWindows.ofSizeAndGrace(Duration.ofSeconds(1L), Duration.ofHours(24)) - val suppression = JSuppressed.untilTimeLimit[Windowed[String]](Duration.ofSeconds(2L), BufferConfig.unbounded()) - - val table: KTable[Windowed[String], Long] = builder - .stream[String, String](sourceTopic) - .groupByKey - .windowedBy(window) - .count() - .suppress(suppression) - - table.toStream((k, _) => s"${k.window().start()}:${k.window().end()}:${k.key()}").to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - val testOutput = testDriver.createOutput[String, Long](sinkTopic) - - { - // publish key=1 @ time 0 => count==1 - testInput.pipeInput("1", "value1", 0L) - assertTrue(testOutput.isEmpty) - } - { - // publish key=1 @ time 1 => count==2 - testInput.pipeInput("1", "value2", 1L) - assertTrue(testOutput.isEmpty) - } - { - // move event time past the first window, but before the suppression window - testInput.pipeInput("2", "value1", 1001L) - assertTrue(testOutput.isEmpty) - } - { - // move event time riiiight before suppression window ends - testInput.pipeInput("2", "value2", 1999L) - assertTrue(testOutput.isEmpty) - } - { - // publish a late event before suppression window terminates => count==3 - testInput.pipeInput("1", "value3", 999L) - assertTrue(testOutput.isEmpty) - } - { - // move event time right past the suppression window of the first window. - testInput.pipeInput("2", "value3", 2001L) - val record = testOutput.readKeyValue - assertEquals("0:1000:1", record.key) - assertEquals(3L, record.value) - } - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testCorrectlyGroupByKeyWindowedBySlidingWindow(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - val window = SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(1000L), ofMillis(1000L)) - val suppression = JSuppressed.untilWindowCloses(BufferConfig.unbounded()) - - val table: KTable[Windowed[String], Long] = builder - .stream[String, String](sourceTopic) - .groupByKey - .windowedBy(window) - .count() - .suppress(suppression) - - table.toStream((k, _) => s"${k.window().start()}:${k.window().end()}:${k.key()}").to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - val testOutput = testDriver.createOutput[String, Long](sinkTopic) - - { - // publish key=1 @ time 0 => count==1 - testInput.pipeInput("1", "value1", 0L) - assertTrue(testOutput.isEmpty) - } - { - // move event time right past the grace period of the first window. - testInput.pipeInput("2", "value3", 5001L) - val record = testOutput.readKeyValue - assertEquals("0:1000:1", record.key) - assertEquals(1L, record.value) - } - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testCorrectlySuppressResultsUsingSuppressedUntilWindowClosesByWindowed(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - val window = TimeWindows.ofSizeAndGrace(Duration.ofSeconds(1L), Duration.ofSeconds(1L)) - val suppression = JSuppressed.untilWindowCloses(BufferConfig.unbounded()) - - val table: KTable[Windowed[String], Long] = builder - .stream[String, String](sourceTopic) - .groupByKey - .windowedBy(window) - .count() - .suppress(suppression) - - table.toStream((k, _) => s"${k.window().start()}:${k.window().end()}:${k.key()}").to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - val testOutput = testDriver.createOutput[String, Long](sinkTopic) - - { - // publish key=1 @ time 0 => count==1 - testInput.pipeInput("1", "value1", 0L) - assertTrue(testOutput.isEmpty) - } - { - // publish key=1 @ time 1 => count==2 - testInput.pipeInput("1", "value2", 1L) - assertTrue(testOutput.isEmpty) - } - { - // move event time past the window, but before the grace period - testInput.pipeInput("2", "value1", 1001L) - assertTrue(testOutput.isEmpty) - } - { - // move event time riiiight before grace period ends - testInput.pipeInput("2", "value2", 1999L) - assertTrue(testOutput.isEmpty) - } - { - // publish a late event before grace period terminates => count==3 - testInput.pipeInput("1", "value3", 999L) - assertTrue(testOutput.isEmpty) - } - { - // move event time right past the grace period of the first window. - testInput.pipeInput("2", "value3", 2001L) - val record = testOutput.readKeyValue - assertEquals("0:1000:1", record.key) - assertEquals(3L, record.value) - } - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testCorrectlySuppressResultsUsingSuppressedUntilWindowClosesBySession(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - // Very similar to SuppressScenarioTest.shouldSupportFinalResultsForSessionWindows - val window = SessionWindows.ofInactivityGapAndGrace(Duration.ofMillis(5L), Duration.ofMillis(10L)) - val suppression = JSuppressed.untilWindowCloses(BufferConfig.unbounded()) - - val table: KTable[Windowed[String], Long] = builder - .stream[String, String](sourceTopic) - .groupByKey - .windowedBy(window) - .count() - .suppress(suppression) - - table.toStream((k, _) => s"${k.window().start()}:${k.window().end()}:${k.key()}").to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - val testOutput = testDriver.createOutput[String, Long](sinkTopic) - - { - // first window - testInput.pipeInput("k1", "v1", 0L) - assertTrue(testOutput.isEmpty) - } - { - // first window - testInput.pipeInput("k1", "v1", 1L) - assertTrue(testOutput.isEmpty) - } - { - // new window, but grace period hasn't ended for first window - testInput.pipeInput("k1", "v1", 8L) - assertTrue(testOutput.isEmpty) - } - { - // out-of-order event for first window, included since grade period hasn't passed - testInput.pipeInput("k1", "v1", 2L) - assertTrue(testOutput.isEmpty) - } - { - // add to second window - testInput.pipeInput("k1", "v1", 13L) - assertTrue(testOutput.isEmpty) - } - { - // add out-of-order to second window - testInput.pipeInput("k1", "v1", 10L) - assertTrue(testOutput.isEmpty) - } - { - // push stream time forward to flush other events through - testInput.pipeInput("k1", "v1", 30L) - // late event should get dropped from the stream - testInput.pipeInput("k1", "v1", 3L) - // should now have to results - val r1 = testOutput.readRecord - assertEquals("0:2:k1", r1.key) - assertEquals(3L, r1.value) - assertEquals(2L, r1.timestamp) - val r2 = testOutput.readRecord - assertEquals("8:13:k1", r2.key) - assertEquals(3L, r2.value) - assertEquals(13L, r2.timestamp) - } - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testCorrectlySuppressResultsUsingSuppressedUntilTimeLimtByNonWindowed(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - val suppression = JSuppressed.untilTimeLimit[String](Duration.ofSeconds(2L), BufferConfig.unbounded()) - - val table: KTable[String, Long] = builder - .stream[String, String](sourceTopic) - .groupByKey - .count() - .suppress(suppression) - - table.toStream.to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - val testOutput = testDriver.createOutput[String, Long](sinkTopic) - - { - // publish key=1 @ time 0 => count==1 - testInput.pipeInput("1", "value1", 0L) - assertTrue(testOutput.isEmpty) - } - { - // publish key=1 @ time 1 => count==2 - testInput.pipeInput("1", "value2", 1L) - assertTrue(testOutput.isEmpty) - } - { - // move event time past the window, but before the grace period - testInput.pipeInput("2", "value1", 1001L) - assertTrue(testOutput.isEmpty) - } - { - // move event time right before grace period ends - testInput.pipeInput("2", "value2", 1999L) - assertTrue(testOutput.isEmpty) - } - { - // publish a late event before grace period terminates => count==3 - testInput.pipeInput("1", "value3", 999L) - assertTrue(testOutput.isEmpty) - } - { - // move event time right past the grace period of the first window. - testInput.pipeInput("2", "value3", 2001L) - val record = testOutput.readKeyValue - assertEquals("1", record.key) - assertEquals(3L, record.value) - } - assertTrue(testOutput.isEmpty) - - testDriver.close() - } - - @Test - def testSettingNameOnFilterProcessor(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - - val table = builder.stream[String, String](sourceTopic).groupBy((key, _) => key).count() - table - .filter((key, value) => key.equals("a") && value == 1, Named.as("my-name")) - .toStream - .to(sinkTopic) - - import scala.jdk.CollectionConverters._ - - val filterNode = builder.build().describe().subtopologies().asScala.toList(1).nodes().asScala.toList(3) - assertEquals("my-name", filterNode.name()) - } - - @Test - def testSettingNameOnCountProcessor(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val sinkTopic = "sink" - - val table = builder.stream[String, String](sourceTopic).groupBy((key, _) => key).count(Named.as("my-name")) - table.toStream.to(sinkTopic) - - import scala.jdk.CollectionConverters._ - - val countNode = builder.build().describe().subtopologies().asScala.toList(1).nodes().asScala.toList(1) - assertEquals("my-name", countNode.name()) - } - - @Test - def testSettingNameOnJoinProcessor(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic1 = "source1" - val sourceTopic2 = "source2" - val sinkTopic = "sink" - - val table1 = builder.stream[String, String](sourceTopic1).groupBy((key, _) => key).count() - val table2 = builder.stream[String, String](sourceTopic2).groupBy((key, _) => key).count() - table1 - .join(table2, Named.as("my-name"))((a, b) => a + b) - .toStream - .to(sinkTopic) - - val joinNodeLeft = builder.build().describe().subtopologies().asScala.toList(1).nodes().asScala.toList(6) - val joinNodeRight = builder.build().describe().subtopologies().asScala.toList(1).nodes().asScala.toList(7) - assertTrue(joinNodeLeft.name().contains("my-name")) - assertTrue(joinNodeRight.name().contains("my-name")) - } - - @Test - def testMapValuesWithValueMapperWithMaterialized(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val stateStore = "store" - val materialized = Materialized.as[String, Long, ByteArrayKeyValueStore](stateStore) - - val table = builder.stream[String, String](sourceTopic).toTable - table.mapValues(value => value.length.toLong, materialized) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - - testInput.pipeInput("1", "topic1value1") - assertEquals(12, testDriver.getKeyValueStore[String, Long](stateStore).get("1")) - - testDriver.close() - } - - @Test - def testMapValuesWithValueMapperWithKeyAndWithMaterialized(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic = "source" - val stateStore = "store" - val materialized = Materialized.as[String, Long, ByteArrayKeyValueStore](stateStore) - - val table = builder.stream[String, String](sourceTopic).toTable - table.mapValues((key, value) => key.length + value.length.toLong, materialized) - - val testDriver = createTestDriver(builder) - val testInput = testDriver.createInput[String, String](sourceTopic) - - testInput.pipeInput("1", "topic1value1") - assertEquals(13, testDriver.getKeyValueStore[String, Long](stateStore).get("1")) - - testDriver.close() - } - - @Test - def testJoinWithBiFunctionKeyExtractor(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic1 = "source1" - val sourceTopic2 = "source2" - val sinkTopic = "sink" - - val table1 = builder.stream[String, String](sourceTopic1).toTable - val table2 = builder.stream[String, String](sourceTopic2).toTable - - table1 - .join[String, String, String]( - table2, - (key: String, value: String) => s"$key-$value", - joiner = (v1: String, v2: String) => s"$v1+$v2", - materialized = Materialized.`with`[String, String, ByteArrayKeyValueStore] - ) - .toStream - .to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput1 = testDriver.createInput[String, String](sourceTopic1) - val testInput2 = testDriver.createInput[String, String](sourceTopic2) - val testOutput = testDriver.createOutput[String, String](sinkTopic) - - testInput1.pipeInput("k1", "v1") - testInput2.pipeInput("k1-v1", "v2") - - val record = testOutput.readKeyValue - assertEquals("k1", record.key) - assertEquals("v1+v2", record.value) - - testDriver.close() - } - - @Test - def testLeftJoinWithBiFunctionKeyExtractor(): Unit = { - val builder = new StreamsBuilder() - val sourceTopic1 = "source1" - val sourceTopic2 = "source2" - val sinkTopic = "sink" - - val table1 = builder.stream[String, String](sourceTopic1).toTable - val table2 = builder.stream[String, String](sourceTopic2).toTable - - table1 - .leftJoin[String, String, String]( - table2, - (key: String, value: String) => s"$key-$value", - joiner = (v1: String, v2: String) => s"${v1}+${Option(v2).getOrElse("null")}", - materialized = Materialized.`with`[String, String, ByteArrayKeyValueStore] - ) - .toStream - .to(sinkTopic) - - val testDriver = createTestDriver(builder) - val testInput1 = testDriver.createInput[String, String](sourceTopic1) - val testInput2 = testDriver.createInput[String, String](sourceTopic2) - val testOutput = testDriver.createOutput[String, String](sinkTopic) - - // First insert into the foreign key table (table2) - testInput2.pipeInput("k1-v1", "v2") - - // Then insert into the primary table (table1) - testInput1.pipeInput("k1", "v1") - - val record1 = testOutput.readKeyValue - assertEquals("k1", record1.key) - assertEquals("v1+v2", record1.value) - - // Test with non-matching foreign key (should still output due to left join) - testInput1.pipeInput("k2", "v3") - - val record2 = testOutput.readKeyValue - assertEquals("k2", record2.key) - assertEquals("v3+null", record2.value) - - testDriver.close() - } -} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/MaterializedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/MaterializedTest.scala deleted file mode 100644 index 9e0c466c26..0000000000 --- a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/MaterializedTest.scala +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.streams.kstream.internals.MaterializedInternal -import org.apache.kafka.streams.scala._ -import org.apache.kafka.streams.scala.serialization.Serdes -import org.apache.kafka.streams.scala.serialization.Serdes._ -import org.apache.kafka.streams.state.Stores -import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.Test - -import java.time.Duration - -class MaterializedTest { - - @Test - def testCreateMaterializedWithSerdes(): Unit = { - val materialized: Materialized[String, Long, ByteArrayKeyValueStore] = - Materialized.`with`[String, Long, ByteArrayKeyValueStore] - - val internalMaterialized = new MaterializedInternal(materialized) - assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) - } - - @Test - def testCreateMaterializedWithSerdesAndStoreName(): Unit = { - val storeName = "store" - val materialized: Materialized[String, Long, ByteArrayKeyValueStore] = - Materialized.as[String, Long, ByteArrayKeyValueStore](storeName) - - val internalMaterialized = new MaterializedInternal(materialized) - assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) - assertEquals(storeName, internalMaterialized.storeName) - } - - @Test - def testCreateMaterializedWithSerdesAndWindowStoreSupplier(): Unit = { - val storeSupplier = Stores.persistentWindowStore("store", Duration.ofMillis(1), Duration.ofMillis(1), true) - val materialized: Materialized[String, Long, ByteArrayWindowStore] = - Materialized.as[String, Long](storeSupplier) - - val internalMaterialized = new MaterializedInternal(materialized) - assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) - assertEquals(storeSupplier, internalMaterialized.storeSupplier) - } - - @Test - def testCreateMaterializedWithSerdesAndKeyValueStoreSupplier(): Unit = { - val storeSupplier = Stores.persistentKeyValueStore("store") - val materialized: Materialized[String, Long, ByteArrayKeyValueStore] = - Materialized.as[String, Long](storeSupplier) - - val internalMaterialized = new MaterializedInternal(materialized) - assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) - assertEquals(storeSupplier, internalMaterialized.storeSupplier) - } - - @Test - def testCreateMaterializedWithSerdesAndSessionStoreSupplier(): Unit = { - val storeSupplier = Stores.persistentSessionStore("store", Duration.ofMillis(1)) - val materialized: Materialized[String, Long, ByteArraySessionStore] = - Materialized.as[String, Long](storeSupplier) - - val internalMaterialized = new MaterializedInternal(materialized) - assertEquals(Serdes.stringSerde.getClass, internalMaterialized.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalMaterialized.valueSerde.getClass) - assertEquals(storeSupplier, internalMaterialized.storeSupplier) - } -} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ProducedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ProducedTest.scala deleted file mode 100644 index 6fca794bfb..0000000000 --- a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/ProducedTest.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.streams.kstream.internals.ProducedInternal -import org.apache.kafka.streams.processor.StreamPartitioner -import org.apache.kafka.streams.scala.serialization.Serdes -import org.apache.kafka.streams.scala.serialization.Serdes._ -import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.Test - -import java.util -import java.util.Optional - -class ProducedTest { - - @Test - def testCreateProducedWithSerdes(): Unit = { - val produced: Produced[String, Long] = Produced.`with`[String, Long] - - val internalProduced = new ProducedInternal(produced) - assertEquals(Serdes.stringSerde.getClass, internalProduced.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalProduced.valueSerde.getClass) - } - - @Test - def testCreateProducedWithSerdesAndStreamPartitioner(): Unit = { - val partitioner = new StreamPartitioner[String, Long] { - override def partitions( - topic: String, - key: String, - value: Long, - numPartitions: Int - ): Optional[util.Set[Integer]] = { - val partitions = new util.HashSet[Integer]() - partitions.add(Int.box(0)) - Optional.of(partitions) - } - } - val produced: Produced[String, Long] = Produced.`with`(partitioner) - - val internalProduced = new ProducedInternal(produced) - assertEquals(Serdes.stringSerde.getClass, internalProduced.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalProduced.valueSerde.getClass) - assertEquals(partitioner, internalProduced.streamPartitioner) - } -} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/RepartitionedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/RepartitionedTest.scala deleted file mode 100644 index ee3515ac61..0000000000 --- a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/RepartitionedTest.scala +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.streams.kstream.internals.RepartitionedInternal -import org.apache.kafka.streams.processor.StreamPartitioner -import org.apache.kafka.streams.scala.serialization.Serdes -import org.apache.kafka.streams.scala.serialization.Serdes._ -import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.Test - -import java.util -import java.util.Optional - -class RepartitionedTest { - - @Test - def testCreateRepartitionedWithSerdes(): Unit = { - val repartitioned: Repartitioned[String, Long] = Repartitioned.`with`[String, Long] - - val internalRepartitioned = new RepartitionedInternal(repartitioned) - assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) - } - - @Test - def testCreateRepartitionedWithSerdesAndNumPartitions(): Unit = { - val repartitioned: Repartitioned[String, Long] = Repartitioned.`with`[String, Long](5) - - val internalRepartitioned = new RepartitionedInternal(repartitioned) - assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) - assertEquals(5, internalRepartitioned.numberOfPartitions) - - } - - @Test - def testCreateRepartitionedWithSerdesAndTopicName(): Unit = { - val repartitioned: Repartitioned[String, Long] = Repartitioned.`with`[String, Long]("repartitionTopic") - - val internalRepartitioned = new RepartitionedInternal(repartitioned) - assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) - assertEquals("repartitionTopic", internalRepartitioned.name) - } - - @Test - def testCreateRepartitionedWithSerdesAndTopicNameAndNumPartitionsAndStreamPartitioner(): Unit = { - val partitioner = new StreamPartitioner[String, Long] { - override def partitions( - topic: String, - key: String, - value: Long, - numPartitions: Int - ): Optional[util.Set[Integer]] = { - val partitions = new util.HashSet[Integer]() - partitions.add(Int.box(0)) - Optional.of(partitions) - } - } - val repartitioned: Repartitioned[String, Long] = Repartitioned.`with`[String, Long](partitioner) - - val internalRepartitioned = new RepartitionedInternal(repartitioned) - assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) - assertEquals(partitioner, internalRepartitioned.streamPartitioner) - } - - @Test - def testCreateRepartitionedWithTopicNameAndNumPartitionsAndStreamPartitioner(): Unit = { - val partitioner = new StreamPartitioner[String, Long] { - override def partitions( - topic: String, - key: String, - value: Long, - numPartitions: Int - ): Optional[util.Set[Integer]] = { - val partitions = new util.HashSet[Integer]() - partitions.add(Int.box(0)) - Optional.of(partitions) - } - } - val repartitioned: Repartitioned[String, Long] = - Repartitioned - .`with`[String, Long](5) - .withName("repartitionTopic") - .withStreamPartitioner(partitioner) - - val internalRepartitioned = new RepartitionedInternal(repartitioned) - assertEquals(Serdes.stringSerde.getClass, internalRepartitioned.keySerde.getClass) - assertEquals(Serdes.longSerde.getClass, internalRepartitioned.valueSerde.getClass) - assertEquals(5, internalRepartitioned.numberOfPartitions) - assertEquals("repartitionTopic", internalRepartitioned.name) - assertEquals(partitioner, internalRepartitioned.streamPartitioner) - } - -} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/StreamJoinedTest.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/StreamJoinedTest.scala deleted file mode 100644 index 7a5a48ccb9..0000000000 --- a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/kstream/StreamJoinedTest.scala +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.kstream - -import org.apache.kafka.streams.kstream.internals.{InternalStreamsBuilder, StreamJoinedInternal} -import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder -import org.apache.kafka.streams.scala.serialization.Serdes -import org.apache.kafka.streams.scala.serialization.Serdes._ -import org.apache.kafka.streams.state.Stores -import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.extension.ExtendWith -import org.junit.jupiter.api.{BeforeEach, Test} -import org.mockito.Mockito.{mock, when} -import org.mockito.junit.jupiter.{MockitoExtension, MockitoSettings} -import org.mockito.quality.Strictness - -import java.time.Duration - -@ExtendWith(Array(classOf[MockitoExtension])) -@MockitoSettings(strictness = Strictness.STRICT_STUBS) -class StreamJoinedTest { - - val builder: InternalStreamsBuilder = mock(classOf[InternalStreamsBuilder]) - val topoBuilder: InternalTopologyBuilder = mock(classOf[InternalTopologyBuilder]) - - @BeforeEach - def before(): Unit = { - when(builder.internalTopologyBuilder()).thenReturn(topoBuilder) - when(topoBuilder.topologyConfigs()).thenReturn(null) - } - - @Test - def testCreateStreamJoinedWithSerdes(): Unit = { - val streamJoined: StreamJoined[String, String, Long] = StreamJoined.`with`[String, String, Long] - - val streamJoinedInternal = new StreamJoinedInternal[String, String, Long](streamJoined, builder) - assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.keySerde().getClass) - assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.valueSerde().getClass) - assertEquals(Serdes.longSerde.getClass, streamJoinedInternal.otherValueSerde().getClass) - } - - @Test - def testCreateStreamJoinedWithSerdesAndStoreSuppliers(): Unit = { - val storeSupplier = Stores.inMemoryWindowStore("myStore", Duration.ofMillis(500), Duration.ofMillis(250), false) - - val otherStoreSupplier = - Stores.inMemoryWindowStore("otherStore", Duration.ofMillis(500), Duration.ofMillis(250), false) - - val streamJoined: StreamJoined[String, String, Long] = - StreamJoined.`with`[String, String, Long](storeSupplier, otherStoreSupplier) - - val streamJoinedInternal = new StreamJoinedInternal[String, String, Long](streamJoined, builder) - assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.keySerde().getClass) - assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.valueSerde().getClass) - assertEquals(Serdes.longSerde.getClass, streamJoinedInternal.otherValueSerde().getClass) - assertEquals(otherStoreSupplier, streamJoinedInternal.otherStoreSupplier()) - assertEquals(storeSupplier, streamJoinedInternal.thisStoreSupplier()) - } - - @Test - def testCreateStreamJoinedWithSerdesAndStateStoreName(): Unit = { - val streamJoined: StreamJoined[String, String, Long] = StreamJoined.as[String, String, Long]("myStoreName") - - val streamJoinedInternal = new StreamJoinedInternal[String, String, Long](streamJoined, builder) - assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.keySerde().getClass) - assertEquals(Serdes.stringSerde.getClass, streamJoinedInternal.valueSerde().getClass) - assertEquals(Serdes.longSerde.getClass, streamJoinedInternal.otherValueSerde().getClass) - assertEquals("myStoreName", streamJoinedInternal.storeName()) - } - -} diff --git a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/TestDriver.scala b/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/TestDriver.scala deleted file mode 100644 index 23a24178fb..0000000000 --- a/streams/streams-scala/bin/test/org/apache/kafka/streams/scala/utils/TestDriver.scala +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kafka.streams.scala.utils - -import java.time.Instant -import java.util.Properties - -import org.apache.kafka.common.serialization.Serde -import org.apache.kafka.streams.scala.StreamsBuilder -import org.apache.kafka.streams.{StreamsConfig, TestInputTopic, TestOutputTopic, TopologyTestDriver} -import org.apache.kafka.test.TestUtils - -trait TestDriver { - def createTestDriver(builder: StreamsBuilder, initialWallClockTime: Instant = Instant.now()): TopologyTestDriver = { - val config = new Properties() - config.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath) - new TopologyTestDriver(builder.build(), config, initialWallClockTime) - } - - implicit class TopologyTestDriverOps(inner: TopologyTestDriver) { - def createInput[K, V](topic: String)(implicit serdeKey: Serde[K], serdeValue: Serde[V]): TestInputTopic[K, V] = - inner.createInputTopic(topic, serdeKey.serializer, serdeValue.serializer) - - def createOutput[K, V](topic: String)(implicit serdeKey: Serde[K], serdeValue: Serde[V]): TestOutputTopic[K, V] = - inner.createOutputTopic(topic, serdeKey.deserializer, serdeValue.deserializer) - } -} diff --git a/streams/test-utils/bin/test/log4j2.yaml b/streams/test-utils/bin/test/log4j2.yaml deleted file mode 100644 index be546a18b5..0000000000 --- a/streams/test-utils/bin/test/log4j2.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -Configuration: - Properties: - Property: - - name: "logPattern" - value: "[%d] %p %m (%c:%L)%n" - - Appenders: - Console: - name: STDOUT - PatternLayout: - pattern: "${logPattern}" - - Loggers: - Root: - level: INFO - AppenderRef: - - ref: STDOUT - Logger: - - name: org.apache.kafka - level: INFO From ddf73b1d42e221f087d95fe0d552aae5cff6fd2e Mon Sep 17 00:00:00 2001 From: Giuseppe Lillo Date: Fri, 9 Jan 2026 16:48:36 +0100 Subject: [PATCH 5/7] a --- ...he.kafka.connect.transforms.Transformation | 41 ------------------- ...ka.connect.transforms.predicates.Predicate | 18 -------- 2 files changed, 59 deletions(-) delete mode 100644 connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.Transformation delete mode 100644 connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate diff --git a/connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.Transformation b/connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.Transformation deleted file mode 100644 index cf9646be37..0000000000 --- a/connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.Transformation +++ /dev/null @@ -1,41 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.transforms.Cast$Key -org.apache.kafka.connect.transforms.Cast$Value -org.apache.kafka.connect.transforms.DropHeaders -org.apache.kafka.connect.transforms.ExtractField$Key -org.apache.kafka.connect.transforms.ExtractField$Value -org.apache.kafka.connect.transforms.Filter -org.apache.kafka.connect.transforms.Flatten$Key -org.apache.kafka.connect.transforms.Flatten$Value -org.apache.kafka.connect.transforms.HeaderFrom$Key -org.apache.kafka.connect.transforms.HeaderFrom$Value -org.apache.kafka.connect.transforms.HoistField$Key -org.apache.kafka.connect.transforms.HoistField$Value -org.apache.kafka.connect.transforms.InsertField$Key -org.apache.kafka.connect.transforms.InsertField$Value -org.apache.kafka.connect.transforms.InsertHeader -org.apache.kafka.connect.transforms.MaskField$Key -org.apache.kafka.connect.transforms.MaskField$Value -org.apache.kafka.connect.transforms.RegexRouter -org.apache.kafka.connect.transforms.ReplaceField$Key -org.apache.kafka.connect.transforms.ReplaceField$Value -org.apache.kafka.connect.transforms.SetSchemaMetadata$Key -org.apache.kafka.connect.transforms.SetSchemaMetadata$Value -org.apache.kafka.connect.transforms.TimestampConverter$Key -org.apache.kafka.connect.transforms.TimestampConverter$Value -org.apache.kafka.connect.transforms.TimestampRouter -org.apache.kafka.connect.transforms.ValueToKey diff --git a/connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate b/connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate deleted file mode 100644 index b451672377..0000000000 --- a/connect/transforms/bin/main/META-INF/services/org.apache.kafka.connect.transforms.predicates.Predicate +++ /dev/null @@ -1,18 +0,0 @@ - # Licensed to the Apache Software Foundation (ASF) under one or more - # contributor license agreements. See the NOTICE file distributed with - # this work for additional information regarding copyright ownership. - # The ASF licenses this file to You under the Apache License, Version 2.0 - # (the "License"); you may not use this file except in compliance with - # the License. You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - -org.apache.kafka.connect.transforms.predicates.HasHeaderKey -org.apache.kafka.connect.transforms.predicates.RecordIsTombstone -org.apache.kafka.connect.transforms.predicates.TopicNameMatches \ No newline at end of file From 8af769e13ae509e699f898aa0f73d32401b48a5c Mon Sep 17 00:00:00 2001 From: Giuseppe Lillo Date: Fri, 9 Jan 2026 16:56:00 +0100 Subject: [PATCH 6/7] a --- core/src/main/scala/kafka/server/ReplicaManager.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/kafka/server/ReplicaManager.scala b/core/src/main/scala/kafka/server/ReplicaManager.scala index 580e7f1a40..7bead205bb 100644 --- a/core/src/main/scala/kafka/server/ReplicaManager.scala +++ b/core/src/main/scala/kafka/server/ReplicaManager.scala @@ -1895,7 +1895,7 @@ class ReplicaManager(val config: KafkaConfig, // check if this fetch request can be satisfied right away val logReadResults = readFromLog(classicParams, classicFetchInfos, quota, readFromPurgatory = false) - println("logReadResult: " + logReadResults) +// println("logReadResult: " + logReadResults) var bytesReadable: Long = 0 var errorReadingData = false From 27b8dc32456f82603ee075046051bf743dc50617 Mon Sep 17 00:00:00 2001 From: Giuseppe Lillo Date: Mon, 12 Jan 2026 14:13:28 +0100 Subject: [PATCH 7/7] wip --- .../scala/kafka/server/ReplicaManager.scala | 2 +- .../ClassicToDisklessMigrationTest.java | 50 ++++++++----------- 2 files changed, 21 insertions(+), 31 deletions(-) diff --git a/core/src/main/scala/kafka/server/ReplicaManager.scala b/core/src/main/scala/kafka/server/ReplicaManager.scala index 7bead205bb..3ab9784dc8 100644 --- a/core/src/main/scala/kafka/server/ReplicaManager.scala +++ b/core/src/main/scala/kafka/server/ReplicaManager.scala @@ -1778,7 +1778,7 @@ class ReplicaManager(val config: KafkaConfig, fetchInfos: Seq[(TopicIdPartition, PartitionData)], quota: ReplicaQuota, responseCallback: Seq[(TopicIdPartition, FetchPartitionData)] => Unit): Unit = { - println(fetchInfos) +// println(fetchInfos) if (fetchInfos.isEmpty) { responseCallback(Seq.empty) return diff --git a/core/src/test/java/kafka/server/ClassicToDisklessMigrationTest.java b/core/src/test/java/kafka/server/ClassicToDisklessMigrationTest.java index c760ee4bcc..962bf39cf1 100644 --- a/core/src/test/java/kafka/server/ClassicToDisklessMigrationTest.java +++ b/core/src/test/java/kafka/server/ClassicToDisklessMigrationTest.java @@ -167,29 +167,13 @@ public void migrateClassicToDisklessTopic() throws Exception { } // Step 2: Produce messages to classic topic - AtomicInteger recordsProducedBeforeMigration = new AtomicInteger(); final long now = System.currentTimeMillis(); - try (Producer producer = new KafkaProducer<>(clientConfigs)) { - for (int i = 0; i < numRecordsBeforeMigration; i++) { - byte[] value = new byte[10000]; - final ProducerRecord record = new ProducerRecord<>(topicName, 0, now, null, value); - producer.send(record, (metadata, exception) -> { - if (exception != null) { - log.error("Failed to send record", exception); - } else { - log.info("Committed value at offset {} at {}", metadata.offset(), now); - recordsProducedBeforeMigration.incrementAndGet(); - } - }); - } - producer.flush(); - } - assertEquals(numRecordsBeforeMigration, recordsProducedBeforeMigration.get()); + int nextOffset = produceRecords(clientConfigs, topicName, now, 0, numRecordsBeforeMigration); - // Step 2: Consume messages from classic topic to verify it works + // Step 3: Consume messages from classic topic to verify it works consumeWithSubscription(TimestampType.CREATE_TIME, clientConfigs, topicName, now, numRecordsBeforeMigration); - // Step 3: Migrate topic to diskless mode by changing config + // Step 4: Migrate topic to diskless mode by changing config try (Admin admin = AdminClient.create(clientConfigs)) { ConfigResource topicResource = new ConfigResource(ConfigResource.Type.TOPIC, topicName); AlterConfigOp alterConfigOp = new AlterConfigOp( @@ -200,27 +184,33 @@ public void migrateClassicToDisklessTopic() throws Exception { .all().get(10, TimeUnit.SECONDS); } - // Step 4: Produce more messages after migration - AtomicInteger recordsProducedAfterMigration = new AtomicInteger(); + // Step 5: Produce more messages after migration + produceRecords(clientConfigs, topicName, now, nextOffset, numRecordsAfterMigration); + + // Step 6: Consume from the beginning to verify all messages are available + consumeWithSubscription(TimestampType.CREATE_TIME, clientConfigs, topicName, now, totalRecords); + } + + private static int produceRecords(Map clientConfigs, String topicName, long timestamp, int startOffset, int numRecords) { + AtomicInteger recordsProduced = new AtomicInteger(); try (Producer producer = new KafkaProducer<>(clientConfigs)) { - for (int i = 0; i < numRecordsAfterMigration; i++) { - byte[] value = new byte[10000]; - final ProducerRecord record = new ProducerRecord<>(topicName, 0, now, null, value); + for (int i = 0; i < numRecords; i++) { + int offset = startOffset + i; + byte[] value = String.valueOf(offset).getBytes(); + final ProducerRecord record = new ProducerRecord<>(topicName, 0, timestamp, null, value); producer.send(record, (metadata, exception) -> { if (exception != null) { log.error("Failed to send record", exception); } else { - log.info("Committed value at offset {} at {}", metadata.offset(), now); - recordsProducedAfterMigration.incrementAndGet(); + log.info("Committed value at offset {} at {}", metadata.offset(), timestamp); + recordsProduced.incrementAndGet(); } }); } producer.flush(); } - assertEquals(numRecordsAfterMigration, recordsProducedAfterMigration.get()); - - // Step 4: Consume from the beginning to verify all messages are available - consumeWithSubscription(TimestampType.CREATE_TIME, clientConfigs, topicName, now, totalRecords); + assertEquals(numRecords, recordsProduced.get()); + return startOffset + numRecords; } private static void consumeWithSubscription(TimestampType timestampType, Map clientConfigs, String topicName, long now, int numRecords) {